added functionality to save/move tokenizer file

This commit is contained in:
PaulKoudelka 2026-04-10 18:31:20 +02:00
parent 09258c7548
commit 0854debc00
23 changed files with 839 additions and 264 deletions

View File

@ -1915,6 +1915,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3403290862"] = "The selec
-- Select a provider first
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3654197869"] = "Select a provider first"
-- Estimated amount of tokens:
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T377990776"] = "Estimated amount of tokens:"
-- Start new chat in workspace '{0}'
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3928697643"] = "Start new chat in workspace '{0}'"
@ -3838,6 +3841,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2331453405"] = "(O
-- Add
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2646845972"] = "Add"
-- Selected file path for the custom tokenizer
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T278585345"] = "Selected file path for the custom tokenizer"
-- No models loaded or available.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2810182573"] = "No models loaded or available."
@ -3847,6 +3853,12 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2842060373"] = "In
-- Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T290547799"] = "Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually."
-- Choose a custom tokenizer here
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T3787466119"] = "Choose a custom tokenizer here"
-- For better embeddings and less storage usage, it's recommended to use a custom tokenizer to enable a more accurate token count.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T4126312157"] = "For better embeddings and less storage usage, it's recommended to use a custom tokenizer to enable a more accurate token count."
-- Model selection
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T416738168"] = "Model selection"
@ -5689,6 +5701,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1019424746"] = "Startup log file
-- Browse AI Studio's source code on GitHub — we welcome your contributions.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1107156991"] = "Browse AI Studio's source code on GitHub — we welcome your contributions."
-- The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1132433749"] = "The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer."
-- ID mismatch: the plugin ID differs from the enterprise configuration ID.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1137744461"] = "ID mismatch: the plugin ID differs from the enterprise configuration ID."
@ -5929,6 +5944,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T566998575"] = "This is a library
-- Used .NET SDK
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T585329785"] = "Used .NET SDK"
-- We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T591393704"] = "We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate."
-- This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T633932150"] = "This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated."

View File

@ -34,7 +34,7 @@
</ChildContent>
<FooterContent>
<MudElement Style="flex: 0 0 auto;">
<MudTextField
<UserPromptComponent
T="string"
@ref="@this.inputField"
@bind-Text="@this.userInput"
@ -50,8 +50,11 @@
Disabled="@this.IsInputForbidden()"
Immediate="@true"
OnKeyUp="@this.InputKeyEvent"
WhenTextChangedAsync="@(_ =>this.CalculateTokenCount())"
UserAttributes="@USER_INPUT_ATTRIBUTES"
Class="@this.UserInputClass"
DebounceTime="TimeSpan.FromSeconds(1)"
HelperText="@this.TokenCountMessage"
Style="@this.UserInputStyle"/>
</MudElement>
<MudToolBar WrapContent="true" Gutters="@false" Class="border border-solid rounded" Style="border-color: lightgrey; gap: 2px;">

View File

@ -3,6 +3,7 @@ using AIStudio.Dialogs;
using AIStudio.Provider;
using AIStudio.Settings;
using AIStudio.Settings.DataModel;
using AIStudio.Tools.Services;
using Microsoft.AspNetCore.Components;
using Microsoft.AspNetCore.Components.Web;
@ -44,6 +45,8 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
[Inject]
private IDialogService DialogService { get; init; } = null!;
[Inject]
private RustService RustService { get; init; } = null!;
[Inject]
private IJSRuntime JsRuntime { get; init; } = null!;
@ -69,10 +72,12 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
private Guid currentChatThreadId = Guid.Empty;
private CancellationTokenSource? cancellationTokenSource;
private HashSet<FileAttachment> chatDocumentPaths = [];
private string tokenCount = "0";
private string TokenCountMessage => $"{this.T("Estimated amount of tokens:")} {this.tokenCount}";
// Unfortunately, we need the input field reference to blur the focus away. Without
// this, we cannot clear the input field.
private MudTextField<string> inputField = null!;
private UserPromptComponent<string> inputField = null!;
#region Overrides of ComponentBase
@ -460,6 +465,9 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
// Was a modifier key pressed as well?
var isModifier = keyEvent.AltKey || keyEvent.CtrlKey || keyEvent.MetaKey || keyEvent.ShiftKey;
if (isEnter)
await this.CalculateTokenCount();
// Depending on the user's settings, might react to shortcuts:
switch (this.SettingsManager.ConfigurationData.Chat.ShortcutSendBehavior)
{
@ -596,6 +604,7 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
this.chatDocumentPaths.Clear();
await this.inputField.BlurAsync();
this.tokenCount = "0";
// Enable the stream state for the chat component:
this.isStreaming = true;
@ -978,6 +987,25 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
return Task.CompletedTask;
}
private async Task CalculateTokenCount()
{
if (this.inputField.Value is null)
{
this.tokenCount = "0";
return;
}
var response = await this.RustService.GetTokenCount(this.inputField.Value);
if (response is null)
return;
if (!response.Value.Success)
{
this.Logger.LogWarning($"Failed to calculate token count: {response.Value.Message}");
return;
}
this.tokenCount = response.Value.TokenCount.ToString();
this.StateHasChanged();
}
#region Overrides of MSGComponentBase
protected override async Task ProcessIncomingMessage<T>(ComponentBase? sendingComponent, Event triggeredEvent, T? data) where T : default

View File

@ -5,12 +5,16 @@
T="string"
Text="@this.File"
Label="@this.Label"
ReadOnly="@true"
ReadOnly="@(!this.IsClearable)"
Validation="@this.Validation"
Adornment="Adornment.Start"
AdornmentIcon="@Icons.Material.Filled.AttachFile"
UserAttributes="@SPELLCHECK_ATTRIBUTES"
Variant="Variant.Outlined"
Clearable="this.IsClearable"
Error="@this.Error"
ErrorText="@this.ErrorText"
OnClearButtonClick="@this.OnClear"
/>
<MudButton StartIcon="@Icons.Material.Filled.FolderOpen" Variant="Variant.Outlined" Color="Color.Primary" Disabled="this.Disabled" OnClick="@this.OpenFileDialog">

View File

@ -2,6 +2,7 @@ using AIStudio.Tools.Rust;
using AIStudio.Tools.Services;
using Microsoft.AspNetCore.Components;
using Microsoft.AspNetCore.Components.Web;
namespace AIStudio.Components;
@ -27,7 +28,19 @@ public partial class SelectFile : MSGComponentBase
[Parameter]
public Func<string, string?> Validation { get; set; } = _ => null;
[Parameter]
public bool IsClearable { get; set; } = false;
[Parameter]
public bool Error { get; set; } = false;
[Parameter]
public string ErrorText { get; set; } = string.Empty;
[Parameter]
public Func<MouseEventArgs, Task> OnClear { get; set; } = _ => Task.CompletedTask;
[Inject]
public RustService RustService { get; set; } = null!;
@ -52,7 +65,7 @@ public partial class SelectFile : MSGComponentBase
this.File = file;
this.FileChanged.InvokeAsync(file);
}
private async Task OpenFileDialog()
{
var response = await this.RustService.SelectFile(this.FileDialogTitle, this.Filter, string.IsNullOrWhiteSpace(this.File) ? null : this.File);

View File

@ -0,0 +1,68 @@
using Microsoft.AspNetCore.Components;
using Timer = System.Timers.Timer;
namespace AIStudio.Components;
/// <summary>
/// Debounced multi-line text input built on <see cref="MudTextField{T}"/>.
/// Keeps the base API while adding a debounce timer.
/// Callers can override any property as usual.
/// </summary>
public class UserPromptComponent<T> : MudTextField<T>
{
[Parameter]
public TimeSpan DebounceTime { get; set; } = TimeSpan.FromMilliseconds(800);
[Parameter]
public Func<string, Task> WhenTextChangedAsync { get; set; } = _ => Task.CompletedTask;
private readonly Timer debounceTimer = new();
private string text = string.Empty;
private string lastParameterText = string.Empty;
private string lastNotifiedText = string.Empty;
private bool isInitialized;
protected override async Task OnInitializedAsync()
{
this.text = this.Text ?? string.Empty;
this.lastParameterText = this.text;
this.lastNotifiedText = this.text;
this.debounceTimer.AutoReset = false;
this.debounceTimer.Interval = this.DebounceTime.TotalMilliseconds;
this.debounceTimer.Elapsed += (_, _) =>
{
this.debounceTimer.Stop();
if (this.text == this.lastNotifiedText)
return;
this.lastNotifiedText = this.text;
this.InvokeAsync(async () => await this.TextChanged.InvokeAsync(this.text));
this.InvokeAsync(async () => await this.WhenTextChangedAsync(this.text));
};
this.isInitialized = true;
await base.OnInitializedAsync();
}
protected override async Task OnParametersSetAsync()
{
// Ensure the timer uses the latest debouncing interval:
if (!this.isInitialized)
return;
if(Math.Abs(this.debounceTimer.Interval - this.DebounceTime.TotalMilliseconds) > 1)
this.debounceTimer.Interval = this.DebounceTime.TotalMilliseconds;
// Only sync when the parent's parameter actually changed since the last change:
if (this.Text != this.lastParameterText)
{
this.text = this.Text ?? string.Empty;
this.lastParameterText = this.text;
}
this.debounceTimer.Stop();
this.debounceTimer.Start();
await base.OnParametersSetAsync();
}
}

View File

@ -1,5 +1,6 @@
@using AIStudio.Provider
@using AIStudio.Provider.SelfHosted
@using AIStudio.Tools.Rust
@inherits MSGComponentBase
<MudDialog>
@ -7,7 +8,7 @@
<MudForm @ref="@this.form" @bind-IsValid="@this.dataIsValid" @bind-Errors="@this.dataIssues">
<MudStack Row="@true" AlignItems="AlignItems.Center">
@* ReSharper disable once CSharpWarnings::CS8974 *@
<MudSelect @bind-Value="@this.DataLLMProvider" Label="@T("Provider")" Class="mb-3" OpenIcon="@Icons.Material.Filled.AccountBalance" AdornmentColor="Color.Info" Adornment="Adornment.Start" Validation="@this.providerValidation.ValidatingProvider">
<MudSelect @bind-Value="@this.DataLLMProvider" Label="@T("Provider")" Class="mb-3" OpenIcon="@Icons.Material.Filled.AccountBalance" AdornmentColor="Color.Info" Adornment="Adornment.Start" Validation="@this.providerValidation.ValidatingProvider">
@foreach (LLMProviders provider in Enum.GetValues(typeof(LLMProviders)))
{
if (provider.ProvideEmbeddingAPI() || provider is LLMProviders.NONE)
@ -22,7 +23,7 @@
@T("Create account")
</MudButton>
</MudStack>
@if (this.DataLLMProvider.IsAPIKeyNeeded(this.DataHost))
{
<SecretInputField Secret="@this.dataAPIKey" SecretChanged="@this.OnAPIKeyChanged" Label="@this.APIKeyText" Validation="@this.providerValidation.ValidatingAPIKey"/>
@ -71,15 +72,14 @@
AdornmentColor="Color.Info"
Validation="@this.ValidateManuallyModel"
UserAttributes="@SPELLCHECK_ATTRIBUTES"
HelperText="@T("Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.")"
/>
HelperText="@T("Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.")"/>
}
else
{
<MudButton Disabled="@(!this.DataLLMProvider.CanLoadModels(this.DataHost, this.dataAPIKey))" Variant="Variant.Filled" Size="Size.Small" StartIcon="@Icons.Material.Filled.Refresh" OnClick="@this.ReloadModels">
@T("Load")
</MudButton>
@if(this.availableModels.Count is 0)
@if (this.availableModels.Count is 0)
{
<MudText Typo="Typo.body1">
@T("No models loaded or available.")
@ -122,18 +122,36 @@
AdornmentIcon="@Icons.Material.Filled.Lightbulb"
AdornmentColor="Color.Info"
Validation="@this.providerValidation.ValidatingInstanceName"
UserAttributes="@SPELLCHECK_ATTRIBUTES"
/>
UserAttributes="@SPELLCHECK_ATTRIBUTES"/>
@if (this.DataModel != default){
<MudJustifiedText Typo="Typo.body1" Class="mb-3">
@T("For better embeddings and less storage usage, it's recommended to use a custom tokenizer to enable a more accurate token count.")
</MudJustifiedText>
<SelectFile
File="@this.dataFilePath"
FileChanged="@this.OnDataFilePathChanged"
Label="@T("Selected file path for the custom tokenizer")"
FileDialogTitle="@T("Choose a custom tokenizer here")"
Filter="[FileTypes.JSON]"
IsClearable="@true"
Error="@(!string.IsNullOrWhiteSpace(this.dataCustomTokenizerValidationIssue))"
ErrorText="@(this.dataCustomTokenizerValidationIssue)"
Validation="@this.providerValidation.ValidatingCustomTokenizer"
OnClear = "@this.ClearPathTokenizer"
/>
}
</MudForm>
<Issues IssuesData="@this.dataIssues"/>
@if (this.dataStoreWasAttempted)
{
<Issues IssuesData="@this.dataIssues"/>
}
</DialogContent>
<DialogActions>
<MudButton OnClick="@this.Cancel" Variant="Variant.Filled">
@T("Cancel")
</MudButton>
<MudButton OnClick="@this.Store" Variant="Variant.Filled" Color="Color.Primary">
@if(this.IsEditing)
@if (this.IsEditing)
{
@T("Update")
}
@ -143,4 +161,4 @@
}
</MudButton>
</DialogActions>
</MudDialog>
</MudDialog>

View File

@ -1,3 +1,4 @@
using AIStudio.Chat;
using AIStudio.Components;
using AIStudio.Provider;
using AIStudio.Settings;
@ -5,7 +6,7 @@ using AIStudio.Tools.Services;
using AIStudio.Tools.Validation;
using Microsoft.AspNetCore.Components;
using Microsoft.AspNetCore.Components.Web;
using Host = AIStudio.Provider.SelfHosted.Host;
namespace AIStudio.Dialogs;
@ -89,6 +90,11 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
private string dataAPIKeyStorageIssue = string.Empty;
private string dataEditingPreviousInstanceName = string.Empty;
private string dataLoadingModelsIssue = string.Empty;
private string dataFilePath = string.Empty;
private string dataCustomTokenizerValidationIssue = string.Empty;
private Task dataTokenizerValidationTask = Task.CompletedTask;
private bool dataStoreWasAttempted;
private int dataTokenizerValidationRevision;
// We get the form reference from Blazor code to validate it manually:
private MudForm form = null!;
@ -96,7 +102,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
private readonly List<Model> availableModels = new();
private readonly Encryption encryption = Program.ENCRYPTION;
private readonly ProviderValidation providerValidation;
public EmbeddingProviderDialog()
{
this.providerValidation = new()
@ -107,6 +113,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
GetUsedInstanceNames = () => this.UsedInstanceNames,
GetHost = () => this.DataHost,
IsModelProvidedManually = () => this.DataLLMProvider is LLMProviders.SELF_HOSTED && this.DataHost is Host.OLLAMA,
GetCustomTokenizerValidationIssue = () => this.dataCustomTokenizerValidationIssue,
};
}
@ -152,10 +159,12 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
// Load the used instance names:
this.UsedInstanceNames = this.SettingsManager.ConfigurationData.EmbeddingProviders.Select(x => x.Name.ToLowerInvariant()).ToList();
Console.WriteLine($"Previous instance names: {this.dataEditingPreviousInstanceName}");
// When editing, we need to load the data:
if(this.IsEditing)
{
this.dataEditingPreviousInstanceName = this.DataName.ToLowerInvariant();
Console.WriteLine($"Previous instance name is '{this.dataEditingPreviousInstanceName}'");
// When using self-hosted embedding, we must copy the model name:
if (this.DataLLMProvider is LLMProviders.SELF_HOSTED)
@ -211,6 +220,8 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
private async Task Store()
{
this.dataStoreWasAttempted = true;
await this.dataTokenizerValidationTask;
await this.form.Validate();
this.dataAPIKeyStorageIssue = string.Empty;
@ -227,6 +238,11 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
if (!this.dataIsValid)
return;
var response = await this.RustService.StoreTokenizer(this.DataName, this.dataEditingPreviousInstanceName, this.dataFilePath);
Console.WriteLine($"Response from Rust: {response.Message}");
if (!response.Success)
return;
// Use the data model to store the provider.
// We just return this data to the parent component:
var addedProviderSettings = this.CreateEmbeddingProviderSettings();
@ -265,6 +281,58 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
}
}
private Task ClearPathTokenizer(MouseEventArgs _)
{
return this.OnDataFilePathChanged(string.Empty);
}
private async Task OnDataFilePathChanged(string filePath)
{
this.dataFilePath = filePath;
var validationRevision = ++this.dataTokenizerValidationRevision;
this.dataTokenizerValidationTask = this.ValidateCustomTokenizer(filePath, validationRevision);
await this.dataTokenizerValidationTask;
if (validationRevision != this.dataTokenizerValidationRevision)
return;
if (this.dataStoreWasAttempted)
await this.form.Validate();
else
this.form.ResetValidation();
}
private async Task ValidateCustomTokenizer(string filePath, int validationRevision)
{
if (string.IsNullOrWhiteSpace(filePath))
{
if (validationRevision == this.dataTokenizerValidationRevision)
this.dataCustomTokenizerValidationIssue = string.Empty;
return;
}
try
{
var response = await this.RustService.ValidateTokenizer(filePath);
if (validationRevision != this.dataTokenizerValidationRevision)
return;
if (response.Success)
this.dataCustomTokenizerValidationIssue = string.Empty;
else
this.dataCustomTokenizerValidationIssue = T("Invalid tokenizer: ") + response.Message;
}
catch (Exception e)
{
if (validationRevision != this.dataTokenizerValidationRevision)
return;
this.Logger.LogError(e, "Failed to validate custom tokenizer.");
this.dataCustomTokenizerValidationIssue = T("Failed to validate the selected tokenizer. Please try again.");
}
}
private void OnHostChanged(Host selectedHost)
{
// When the host changes, reset the model selection state:
@ -307,4 +375,4 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
};
private bool IsNoneProvider => this.DataLLMProvider is LLMProviders.NONE;
}
}

View File

@ -290,6 +290,8 @@
<ThirdPartyComponent Name="sysinfo" Developer="Guillaume Gomez & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/GuillaumeGomez/sysinfo/blob/main/LICENSE" RepositoryUrl="https://github.com/GuillaumeGomez/sysinfo" UseCase="@T("This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated.")"/>
<ThirdPartyComponent Name="tempfile" Developer="Steven Allen, Ashley Mannix & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/Stebalien/tempfile/blob/master/LICENSE-MIT" RepositoryUrl="https://github.com/Stebalien/tempfile" UseCase="@T("This library is used to create temporary folders for saving the certificate and private key for communication with Qdrant.")"/>
<ThirdPartyComponent Name="Lua-CSharp" Developer="Yusuke Nakada & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/nuskey8/Lua-CSharp/blob/main/LICENSE" RepositoryUrl="https://github.com/nuskey8/Lua-CSharp" UseCase="@T("We use Lua as the language for plugins. Lua-CSharp lets Lua scripts communicate with AI Studio and vice versa. Thank you, Yusuke Nakada, for this great library.")" />
<ThirdPartyComponent Name="DeepSeek-V3.2 Tokenizer" Developer="DeepSeek-AI" LicenseName="MIT" LicenseUrl="https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/mit.md" RepositoryUrl="https://huggingface.co/deepseek-ai/DeepSeek-V3.2/tree/main" UseCase="@T("We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate.")" />
<ThirdPartyComponent Name="Tokenizer" Developer="Anthony Moi, Nicolas Patry, Pierric Cistac, Arthur Zucker & Open Source Community" LicenseName="Apache-2.0" LicenseUrl="https://github.com/huggingface/tokenizers/blob/main/LICENSE" RepositoryUrl="https://github.com/huggingface/tokenizers" UseCase="@T("The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer.")" />
<ThirdPartyComponent Name="HtmlAgilityPack" Developer="ZZZ Projects & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/zzzprojects/html-agility-pack/blob/master/LICENSE" RepositoryUrl="https://github.com/zzzprojects/html-agility-pack" UseCase="@T("We use the HtmlAgilityPack to extract content from the web. This is necessary, e.g., when you provide a URL as input for an assistant.")"/>
<ThirdPartyComponent Name="ReverseMarkdown" Developer="Babu Annamalai & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/mysticmind/reversemarkdown-net/blob/master/LICENSE" RepositoryUrl="https://github.com/mysticmind/reversemarkdown-net" UseCase="@T("This library is used to convert HTML to Markdown. This is necessary, e.g., when you provide a URL as input for an assistant.")"/>
<ThirdPartyComponent Name="wikEd diff" Developer="Cacycle & Open Source Community" LicenseName="None (public domain)" LicenseUrl="https://en.wikipedia.org/wiki/User:Cacycle/diff#License" RepositoryUrl="https://en.wikipedia.org/wiki/User:Cacycle/diff" UseCase="@T("This library is used to display the differences between two texts. This is necessary, e.g., for the grammar and spelling assistant.")"/>

View File

@ -1917,6 +1917,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3403290862"] = "Der ausge
-- Select a provider first
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3654197869"] = "Wähle zuerst einen Anbieter aus"
-- Estimated amount of tokens:
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T377990776"] = "Geschätzte Anzahl an Tokens:"
-- Start new chat in workspace "{0}"
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3928697643"] = "Neuen Chat im Arbeitsbereich \"{0}\" starten"
@ -5691,6 +5694,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1019424746"] = "Startprotokollda
-- Browse AI Studio's source code on GitHub — we welcome your contributions.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1107156991"] = "Sehen Sie sich den Quellcode von AI Studio auf GitHub an wir freuen uns über ihre Beiträge."
-- The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1132433749"] = "Die TokenizerBibliothek dient als BasisFramework für die Integration des DeepSeekTokenizers."
-- ID mismatch: the plugin ID differs from the enterprise configuration ID.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1137744461"] = "ID-Konflikt: Die Plugin-ID stimmt nicht mit der ID der Unternehmenskonfiguration überein."
@ -5931,6 +5937,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T566998575"] = "Dies ist eine Bib
-- Used .NET SDK
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T585329785"] = "Verwendetes .NET SDK"
-- We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T591393704"] = "Wir verwenden den DeepSeekTokenizer, um die TokenAnzahl einer Eingabe zu schätzen."
-- This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T633932150"] = "Diese Bibliothek wird verwendet, um Sidecar-Prozesse zu verwalten und sicherzustellen, dass veraltete oder Zombie-Sidecars erkannt und beendet werden."

View File

@ -1917,6 +1917,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3403290862"] = "The selec
-- Select a provider first
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3654197869"] = "Select a provider first"
-- Estimated amount of tokens:
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T377990776"] = "Estimated amount of tokens:"
-- Start new chat in workspace "{0}"
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::CHATCOMPONENT::T3928697643"] = "Start new chat in workspace \"{0}\""
@ -5691,6 +5694,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1019424746"] = "Startup log file
-- Browse AI Studio's source code on GitHub — we welcome your contributions.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1107156991"] = "Browse AI Studio's source code on GitHub — we welcome your contributions."
-- The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1132433749"] = "The Tokenizer library serves as the base framework for integrating the DeepSeek tokenizer."
-- ID mismatch: the plugin ID differs from the enterprise configuration ID.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T1137744461"] = "ID mismatch: the plugin ID differs from the enterprise configuration ID."
@ -5931,6 +5937,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T566998575"] = "This is a library
-- Used .NET SDK
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T585329785"] = "Used .NET SDK"
-- We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T591393704"] = "We use the DeepSeek Tokenizer to estimate the number of tokens an input will generate."
-- This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated.
UI_TEXT_CONTENT["AISTUDIO::PAGES::INFORMATION::T633932150"] = "This library is used to manage sidecar processes and to ensure that stale or zombie sidecars are detected and terminated."

View File

@ -0,0 +1,41 @@
namespace AIStudio.Tools.Rust;
/// <summary>
/// Represents a file type that can optionally contain child file types.
/// Use the static helpers <see cref="Leaf"/>, <see cref="Parent"/> and <see cref="Composite"/> to build readable trees.
/// </summary>
/// <param name="FilterName">Display name of the type (e.g., "Document").</param>
/// <param name="FilterExtensions">File extensions belonging to this type (without dot).</param>
/// <param name="Children">Nested file types that are included when this type is selected.</param>
public sealed record FileType(string FilterName, string[] FilterExtensions, IReadOnlyList<FileType> Children)
{
/// <summary>
/// Factory for a leaf node.
/// Example: <c>FileType.Leaf(".NET", "cs", "razor")</c>
/// </summary>
public static FileType Leaf(string name, params string[] extensions) =>
new(name, extensions, []);
/// <summary>
/// Factory for a parent node that only has children.
/// Example: <c>FileType.Parent("Source Code", dotnet, java)</c>
/// </summary>
public static FileType Parent(string name, params FileType[]? children) =>
new(name, [], children ?? []);
/// <summary>
/// Factory for a composite node that has its own extensions in addition to children.
/// </summary>
public static FileType Composite(string name, string[] extensions, params FileType[] children) =>
new(name, extensions, children);
/// <summary>
/// Collects all extensions for this type, including children.
/// </summary>
public IEnumerable<string> FlattenExtensions()
{
return this.FilterExtensions
.Concat(this.Children.SelectMany(child => child.FlattenExtensions()))
.Distinct(StringComparer.OrdinalIgnoreCase);
}
}

View File

@ -127,4 +127,4 @@ public static class FileTypes
return false;
}
}
}

View File

@ -0,0 +1,3 @@
namespace AIStudio.Tools.Rust;
public readonly record struct TokenizerHandlingResponse(int Success, string Response);

View File

@ -0,0 +1,3 @@
namespace AIStudio.Tools.Rust;
public readonly record struct TokenizerResponse(bool Success, int TokenCount, string Message);

View File

@ -0,0 +1,69 @@
using AIStudio.Tools.Rust;
namespace AIStudio.Tools.Services;
public sealed partial class RustService
{
public async Task<TokenizerResponse> ValidateTokenizer(string filePath)
{
var result = await this.http.PostAsJsonAsync("/tokenizer/validate", new {
file_path = filePath,
}, this.jsonRustSerializerOptions);
if (!result.IsSuccessStatusCode)
{
this.logger!.LogError($"Failed to validate the tokenizer '{result.StatusCode}'");
return new TokenizerResponse
{
Success = false,
Message = "An error occured while sending the path to the Rust framework for validation: "+result.StatusCode,
TokenCount = 0
};
}
return await result.Content.ReadFromJsonAsync<TokenizerResponse>(this.jsonRustSerializerOptions);
}
public async Task<TokenizerResponse> StoreTokenizer(string modelId, string previousmodelId, string filePath)
{
Console.WriteLine($"Storing tokenizer for model '{modelId}' with previous model '{previousmodelId}' from file '{filePath}'");
var result = await this.http.PostAsJsonAsync("/tokenizer/store", new {
model_id = modelId,
previous_model_id = previousmodelId,
file_path = filePath,
}, this.jsonRustSerializerOptions);
if (!result.IsSuccessStatusCode)
{
this.logger!.LogError($"Failed to store the tokenizer '{result.StatusCode}'");
return new TokenizerResponse{
Success = false,
Message = "An error occured while sending the path to the Rust framework for storing: "+result.StatusCode,
TokenCount = 0
};
}
return await result.Content.ReadFromJsonAsync<TokenizerResponse>(this.jsonRustSerializerOptions);
}
public async Task<TokenizerResponse?> GetTokenCount(string text)
{
try
{
var cts = new CancellationTokenSource(TimeSpan.FromSeconds(5));
var payload = new { text };
var response = await this.http.PostAsJsonAsync("/tokenizer/count", payload, this.jsonRustSerializerOptions, cts.Token);
response.EnsureSuccessStatusCode();
return await response.Content.ReadFromJsonAsync<TokenizerResponse>(this.jsonRustSerializerOptions, cancellationToken: cts.Token);
}
catch (Exception e)
{
if(this.logger is not null)
this.logger.LogError(e, "Error while getting token count from Rust service.");
else
Console.WriteLine($"Error while getting token count from Rust service: '{e}'.");
return null;
}
}
}

View File

@ -22,6 +22,8 @@ public sealed class ProviderValidation
public Func<bool> IsModelProvidedManually { get; init; } = () => false;
public Func<string> GetCustomTokenizerValidationIssue { get; init; } = () => string.Empty;
public string? ValidatingHostname(string hostname)
{
if(this.GetProvider() != LLMProviders.SELF_HOSTED)
@ -120,4 +122,13 @@ public sealed class ProviderValidation
return null;
}
}
public string? ValidatingCustomTokenizer(string _)
{
var issue = this.GetCustomTokenizerValidationIssue();
if (string.IsNullOrWhiteSpace(issue))
return null;
return issue;
}
}

View File

@ -11,7 +11,6 @@ use serde::Deserialize;
use strum_macros::Display;
use tauri::updater::UpdateResponse;
use tauri::{FileDropEvent, GlobalShortcutManager, UpdaterEvent, RunEvent, Manager, PathResolver, Window, WindowEvent, generate_context};
use tauri::api::dialog::blocking::FileDialogBuilder;
use tokio::sync::broadcast;
use tokio::time;
use crate::api_token::APIToken;
@ -474,241 +473,6 @@ pub async fn install_update(_token: APIToken) {
}
}
/// Let the user select a directory.
#[post("/select/directory?<title>", data = "<previous_directory>")]
pub fn select_directory(_token: APIToken, title: &str, previous_directory: Option<Json<PreviousDirectory>>) -> Json<DirectorySelectionResponse> {
let folder_path = match previous_directory {
Some(previous) => {
let previous_path = previous.path.as_str();
FileDialogBuilder::new()
.set_title(title)
.set_directory(previous_path)
.pick_folder()
},
None => {
FileDialogBuilder::new()
.set_title(title)
.pick_folder()
},
};
match folder_path {
Some(path) => {
info!("User selected directory: {path:?}");
Json(DirectorySelectionResponse {
user_cancelled: false,
selected_directory: path.to_str().unwrap().to_string(),
})
},
None => {
info!("User cancelled directory selection.");
Json(DirectorySelectionResponse {
user_cancelled: true,
selected_directory: String::from(""),
})
},
}
}
#[derive(Clone, Deserialize)]
pub struct PreviousDirectory {
path: String,
}
#[derive(Clone, Deserialize)]
pub struct FileTypeFilter {
filter_name: String,
filter_extensions: Vec<String>,
}
#[derive(Clone, Deserialize)]
pub struct SelectFileOptions {
title: String,
previous_file: Option<PreviousFile>,
filter: Option<FileTypeFilter>,
}
#[derive(Clone, Deserialize)]
pub struct SaveFileOptions {
title: String,
name_file: Option<PreviousFile>,
filter: Option<FileTypeFilter>,
}
#[derive(Serialize)]
pub struct DirectorySelectionResponse {
user_cancelled: bool,
selected_directory: String,
}
/// Let the user select a file.
#[post("/select/file", data = "<payload>")]
pub fn select_file(_token: APIToken, payload: Json<SelectFileOptions>) -> Json<FileSelectionResponse> {
// Create a new file dialog builder:
let file_dialog = FileDialogBuilder::new();
// Set the title of the file dialog:
let file_dialog = file_dialog.set_title(&payload.title);
// Set the file type filter if provided:
let file_dialog = apply_filter(file_dialog, &payload.filter);
// Set the previous file path if provided:
let file_dialog = match &payload.previous_file {
Some(previous) => {
let previous_path = previous.file_path.as_str();
file_dialog.set_directory(previous_path)
},
None => file_dialog,
};
// Show the file dialog and get the selected file path:
let file_path = file_dialog.pick_file();
match file_path {
Some(path) => {
info!("User selected file: {path:?}");
Json(FileSelectionResponse {
user_cancelled: false,
selected_file_path: path.to_str().unwrap().to_string(),
})
},
None => {
info!("User cancelled file selection.");
Json(FileSelectionResponse {
user_cancelled: true,
selected_file_path: String::from(""),
})
},
}
}
/// Let the user select some files.
#[post("/select/files", data = "<payload>")]
pub fn select_files(_token: APIToken, payload: Json<SelectFileOptions>) -> Json<FilesSelectionResponse> {
// Create a new file dialog builder:
let file_dialog = FileDialogBuilder::new();
// Set the title of the file dialog:
let file_dialog = file_dialog.set_title(&payload.title);
// Set the file type filter if provided:
let file_dialog = apply_filter(file_dialog, &payload.filter);
// Set the previous file path if provided:
let file_dialog = match &payload.previous_file {
Some(previous) => {
let previous_path = previous.file_path.as_str();
file_dialog.set_directory(previous_path)
},
None => file_dialog,
};
// Show the file dialog and get the selected file path:
let file_paths = file_dialog.pick_files();
match file_paths {
Some(paths) => {
info!("User selected {} files.", paths.len());
Json(FilesSelectionResponse {
user_cancelled: false,
selected_file_paths: paths.iter().map(|p| p.to_str().unwrap().to_string()).collect(),
})
}
None => {
info!("User cancelled file selection.");
Json(FilesSelectionResponse {
user_cancelled: true,
selected_file_paths: Vec::new(),
})
},
}
}
#[post("/save/file", data = "<payload>")]
pub fn save_file(_token: APIToken, payload: Json<SaveFileOptions>) -> Json<FileSaveResponse> {
// Create a new file dialog builder:
let file_dialog = FileDialogBuilder::new();
// Set the title of the file dialog:
let file_dialog = file_dialog.set_title(&payload.title);
// Set the file type filter if provided:
let file_dialog = apply_filter(file_dialog, &payload.filter);
// Set the previous file path if provided:
let file_dialog = match &payload.name_file {
Some(previous) => {
let previous_path = previous.file_path.as_str();
file_dialog.set_directory(previous_path)
},
None => file_dialog,
};
// Displays the file dialogue box and select the file:
let file_path = file_dialog.save_file();
match file_path {
Some(path) => {
info!("User selected file for writing operation: {path:?}");
Json(FileSaveResponse {
user_cancelled: false,
save_file_path: path.to_str().unwrap().to_string(),
})
},
None => {
info!("User cancelled file selection.");
Json(FileSaveResponse {
user_cancelled: true,
save_file_path: String::from(""),
})
},
}
}
#[derive(Clone, Deserialize)]
pub struct PreviousFile {
file_path: String,
}
/// Applies an optional file type filter to a FileDialogBuilder.
fn apply_filter(file_dialog: FileDialogBuilder, filter: &Option<FileTypeFilter>) -> FileDialogBuilder {
match filter {
Some(f) => file_dialog.add_filter(
&f.filter_name,
&f.filter_extensions.iter().map(|s| s.as_str()).collect::<Vec<&str>>(),
),
None => file_dialog,
}
}
#[derive(Serialize)]
pub struct FileSelectionResponse {
user_cancelled: bool,
selected_file_path: String,
}
#[derive(Serialize)]
pub struct FilesSelectionResponse {
user_cancelled: bool,
selected_file_paths: Vec<String>,
}
#[derive(Serialize)]
pub struct FileSaveResponse {
user_cancelled: bool,
save_file_path: String,
}
/// Request payload for registering a global shortcut.
#[derive(Clone, Deserialize)]
pub struct RegisterShortcutRequest {

241
runtime/src/file_actions.rs Normal file
View File

@ -0,0 +1,241 @@
use log::info;
use rocket::post;
use rocket::serde::{Deserialize, Serialize};
use rocket::serde::json::Json;
use tauri::api::dialog::blocking::FileDialogBuilder;
use crate::api_token::APIToken;
#[derive(Clone, Deserialize)]
pub struct PreviousDirectory {
path: String,
}
#[derive(Clone, Deserialize)]
pub struct FileTypeFilter {
filter_name: String,
filter_extensions: Vec<String>,
}
#[derive(Clone, Deserialize)]
pub struct SelectFileOptions {
title: String,
previous_file: Option<PreviousFile>,
filter: Option<FileTypeFilter>,
}
#[derive(Clone, Deserialize)]
pub struct SaveFileOptions {
title: String,
name_file: Option<PreviousFile>,
filter: Option<FileTypeFilter>,
}
#[derive(Serialize)]
pub struct DirectorySelectionResponse {
user_cancelled: bool,
selected_directory: String,
}
#[derive(Serialize)]
pub struct FileSelectionResponse {
user_cancelled: bool,
selected_file_path: String,
}
#[derive(Serialize)]
pub struct FilesSelectionResponse {
user_cancelled: bool,
selected_file_paths: Vec<String>,
}
#[derive(Serialize)]
pub struct FileSaveResponse {
user_cancelled: bool,
save_file_path: String,
}
#[derive(Clone, Deserialize)]
pub struct PreviousFile {
file_path: String,
}
/// Let the user select a directory.
#[post("/select/directory?<title>", data = "<previous_directory>")]
pub fn select_directory(_token: APIToken, title: &str, previous_directory: Option<Json<PreviousDirectory>>) -> Json<DirectorySelectionResponse> {
let folder_path = match previous_directory {
Some(previous) => {
let previous_path = previous.path.as_str();
FileDialogBuilder::new()
.set_title(title)
.set_directory(previous_path)
.pick_folder()
},
None => {
FileDialogBuilder::new()
.set_title(title)
.pick_folder()
},
};
match folder_path {
Some(path) => {
info!("User selected directory: {path:?}");
Json(DirectorySelectionResponse {
user_cancelled: false,
selected_directory: path.to_str().unwrap().to_string(),
})
},
None => {
info!("User cancelled directory selection.");
Json(DirectorySelectionResponse {
user_cancelled: true,
selected_directory: String::from(""),
})
},
}
}
/// Let the user select a file.
#[post("/select/file", data = "<payload>")]
pub fn select_file(_token: APIToken, payload: Json<SelectFileOptions>) -> Json<FileSelectionResponse> {
// Create a new file dialog builder:
let file_dialog = FileDialogBuilder::new();
// Set the title of the file dialog:
let file_dialog = file_dialog.set_title(&payload.title);
// Set the file type filter if provided:
let file_dialog = apply_filter(file_dialog, &payload.filter);
// Set the previous file path if provided:
let file_dialog = match &payload.previous_file {
Some(previous) => {
let previous_path = previous.file_path.as_str();
file_dialog.set_directory(previous_path)
},
None => file_dialog,
};
// Show the file dialog and get the selected file path:
let file_path = file_dialog.pick_file();
match file_path {
Some(path) => {
info!("User selected file: {path:?}");
Json(FileSelectionResponse {
user_cancelled: false,
selected_file_path: path.to_str().unwrap().to_string(),
})
},
None => {
info!("User cancelled file selection.");
Json(FileSelectionResponse {
user_cancelled: true,
selected_file_path: String::from(""),
})
},
}
}
/// Let the user select some files.
#[post("/select/files", data = "<payload>")]
pub fn select_files(_token: APIToken, payload: Json<SelectFileOptions>) -> Json<FilesSelectionResponse> {
// Create a new file dialog builder:
let file_dialog = FileDialogBuilder::new();
// Set the title of the file dialog:
let file_dialog = file_dialog.set_title(&payload.title);
// Set the file type filter if provided:
let file_dialog = apply_filter(file_dialog, &payload.filter);
// Set the previous file path if provided:
let file_dialog = match &payload.previous_file {
Some(previous) => {
let previous_path = previous.file_path.as_str();
file_dialog.set_directory(previous_path)
},
None => file_dialog,
};
// Show the file dialog and get the selected file path:
let file_paths = file_dialog.pick_files();
match file_paths {
Some(paths) => {
info!("User selected {} files.", paths.len());
Json(FilesSelectionResponse {
user_cancelled: false,
selected_file_paths: paths.iter().map(|p| p.to_str().unwrap().to_string()).collect(),
})
}
None => {
info!("User cancelled file selection.");
Json(FilesSelectionResponse {
user_cancelled: true,
selected_file_paths: Vec::new(),
})
},
}
}
#[post("/save/file", data = "<payload>")]
pub fn save_file(_token: APIToken, payload: Json<SaveFileOptions>) -> Json<FileSaveResponse> {
// Create a new file dialog builder:
let file_dialog = FileDialogBuilder::new();
// Set the title of the file dialog:
let file_dialog = file_dialog.set_title(&payload.title);
// Set the file type filter if provided:
let file_dialog = apply_filter(file_dialog, &payload.filter);
// Set the previous file path if provided:
let file_dialog = match &payload.name_file {
Some(previous) => {
let previous_path = previous.file_path.as_str();
file_dialog.set_directory(previous_path)
},
None => file_dialog,
};
// Displays the file dialogue box and select the file:
let file_path = file_dialog.save_file();
match file_path {
Some(path) => {
info!("User selected file for writing operation: {path:?}");
Json(FileSaveResponse {
user_cancelled: false,
save_file_path: path.to_str().unwrap().to_string(),
})
},
None => {
info!("User cancelled file selection.");
Json(FileSaveResponse {
user_cancelled: true,
save_file_path: String::from(""),
})
},
}
}
/// Applies an optional file type filter to a FileDialogBuilder.
fn apply_filter(file_dialog: FileDialogBuilder, filter: &Option<FileTypeFilter>) -> FileDialogBuilder {
match filter {
Some(f) => file_dialog.add_filter(
&f.filter_name,
&f.filter_extensions.iter().map(|s| s.as_str()).collect::<Vec<&str>>(),
),
None => file_dialog,
}
}

View File

@ -17,4 +17,6 @@ pub mod qdrant;
pub mod certificate_factory;
pub mod runtime_api_token;
pub mod stale_process_cleanup;
mod sidecar_types;
mod sidecar_types;
pub mod tokenizer;
pub mod file_actions;

View File

@ -11,7 +11,7 @@ use mindwork_ai_studio::environment::is_dev;
use mindwork_ai_studio::log::init_logging;
use mindwork_ai_studio::metadata::MetaData;
use mindwork_ai_studio::runtime_api::start_runtime_api;
use mindwork_ai_studio::tokenizer::{init_tokenizer};
#[tokio::main]
async fn main() {
@ -43,8 +43,12 @@ async fn main() {
info!("Running in production mode.");
}
if let Err(e) = init_tokenizer() {
warn!(Source = "Tokenizer"; "Error during the initialisation of the tokenizer: {}", e);
}
generate_runtime_certificate();
start_runtime_api();
start_tauri();
}
}

View File

@ -72,10 +72,10 @@ pub fn start_runtime_api() {
crate::app_window::get_event_stream,
crate::app_window::check_for_update,
crate::app_window::install_update,
crate::app_window::select_directory,
crate::app_window::select_file,
crate::app_window::select_files,
crate::app_window::save_file,
crate::file_actions::select_directory,
crate::file_actions::select_file,
crate::file_actions::select_files,
crate::file_actions::save_file,
crate::secret::get_secret,
crate::secret::store_secret,
crate::secret::delete_secret,
@ -89,6 +89,9 @@ pub fn start_runtime_api() {
crate::file_data::extract_data,
crate::log::get_log_paths,
crate::log::log_event,
crate::tokenizer::token_count,
crate::tokenizer::validate_tokenizer,
crate::tokenizer::store_tokenizer,
crate::app_window::register_shortcut,
crate::app_window::validate_shortcut,
crate::app_window::suspend_shortcuts,

194
runtime/src/tokenizer.rs Normal file
View File

@ -0,0 +1,194 @@
use rocket::yansi::Paint;
use std::fs;
use std::path::{PathBuf};
use std::sync::OnceLock;
use rocket::{post};
use rocket::serde::json::Json;
use rocket::serde::Serialize;
use serde::Deserialize;
use tokenizers::Error;
use tokenizers::tokenizer::{Tokenizer, Error as TokenizerError};
use crate::api_token::APIToken;
use crate::environment::{DATA_DIRECTORY};
static TOKENIZER: OnceLock<Tokenizer> = OnceLock::new();
#[derive(Deserialize)]
pub struct SetTokenText {
pub text: String,
}
#[derive(Clone, Deserialize)]
pub struct TokenizerStorage {
model_id: String,
previous_model_id: String,
file_path: String,
}
#[derive(Clone, Deserialize)]
pub struct TokenizerValidation {
file_path: String,
}
#[derive(Serialize)]
pub struct TokenizerResponse {
success: bool,
token_count: usize,
message: String,
}
impl From<Result<usize, TokenizerError>> for TokenizerResponse {
fn from(result: Result<usize, TokenizerError>) -> Self {
match result {
Ok(count) => TokenizerResponse {
success: true,
token_count: count,
message: "Success".to_string(),
},
Err(e) => TokenizerResponse {
success: false,
token_count: 0,
message: e.to_string(),
},
}
}
}
pub fn init_tokenizer() -> Result<(), Error>{
let mut target_dir = PathBuf::from("target");
target_dir.push("tokenizers");
fs::create_dir_all(&target_dir)?;
let mut local_tokenizer_path = target_dir.clone();
local_tokenizer_path.push("tokenizer.json");
TOKENIZER.set(Tokenizer::from_file(local_tokenizer_path)?).expect("Could not set the tokenizer.");
Ok(())
}
fn validate_tokenizer_at_path(path: &PathBuf) -> Result<usize, TokenizerError> {
if !path.is_file() {
return Err(TokenizerError::from(format!(
"Tokenizer file was not found: {}",
path.display()
)));
}
let tokenizer = Tokenizer::from_file(path).map_err(|e| {
println!("Failed to load tokenizer from {}: {}", Paint::red(&path.display()), e);
TokenizerError::from(format!(
"Failed to load tokenizer from '{}': {}",
path.display(),
e
))
})?;
println!("Loaded tokenizer from {}", Paint::green(&path.display()));
let test_string = "Hello, world! This is a test string for tokenizer validation.";
let encoding = tokenizer.encode(test_string, true).map_err(|e| {
println!(
"Tokenizer failed to encode validation string for {}: {}",
Paint::red(&path.display()),
e
);
TokenizerError::from(format!(
"Tokenizer failed to encode validation string: {}",
e
))
})?;
let token_count = encoding.len();
if token_count == 0 {
return Err(TokenizerError::from(
"Tokenizer produced 0 tokens for test string. The tokenizer is likely invalid or misconfigured."
));
}
if encoding.get_tokens().iter().any(|t| t.is_empty()) {
return Err(TokenizerError::from(
"Tokenizer produced empty tokens. The tokenizer is invalid."
));
}
Ok(token_count)
}
fn handle_tokenizer_store(payload: &TokenizerStorage) -> Result<(), std::io::Error> {
let data_dir = DATA_DIRECTORY
.get()
.ok_or_else(|| std::io::Error::new(std::io::ErrorKind::Other, "DATA_DIRECTORY not initialized"))?;
let base_path = PathBuf::from(data_dir).join("tokenizers");
// Delete previous model if file_path is empty
if payload.file_path.trim().is_empty() {
if payload.previous_model_id.trim().is_empty() {
return Ok(()); // Nothing to delete
}
let previous_path = base_path.join(&payload.previous_model_id);
fs::remove_dir_all(previous_path)?;
return Ok(());
}
// Copy file
let source_path = PathBuf::from(&payload.file_path);
let source_name = source_path.file_name()
.and_then(|n| n.to_str())
.ok_or_else(|| std::io::Error::new(std::io::ErrorKind::InvalidInput, "Invalid tokenizer file path"))?;
fs::create_dir_all(&base_path.join(&payload.model_id))?;
let destination_path = base_path.join(&payload.model_id).join(source_name);
println!("Moving tokenizer file from {} to {}", source_path.display(), destination_path.display());
let previous_path = base_path.join(&payload.previous_model_id);
// Delete previous tokenizer folder if specified
if !payload.previous_model_id.trim().is_empty() && source_path.starts_with(&previous_path){
fs::rename(&source_path, &destination_path)?;
if previous_path.exists() {
fs::remove_dir_all(previous_path)?;
}
}else{
fs::copy( & source_path, & destination_path)?;
}
Ok(())
}
pub fn get_token_count(text: &str) -> Result<usize, TokenizerError> {
if text.trim().is_empty() {
return Err(TokenizerError::from("Input text is empty"));
}
let tokenizer = TOKENIZER.get().cloned().ok_or_else(|| TokenizerError::from("Tokenizer not initialized"))?;
let enc = tokenizer.encode(text, true)?;
Ok(enc.len())
}
#[post("/tokenizer/count", data = "<req>")]
pub fn token_count(_token: APIToken, req: Json<SetTokenText>) -> Json<TokenizerResponse> {
Json(get_token_count(&req.text).into())
}
#[post("/tokenizer/validate", data = "<payload>")]
pub fn validate_tokenizer(_token: APIToken, payload: Json<TokenizerValidation>) -> Json<TokenizerResponse>{
println!("Received tokenizer validation request: {}", payload.file_path);
Json(validate_tokenizer_at_path(&PathBuf::from(payload.file_path.clone())).into())
}
#[post("/tokenizer/store", data = "<payload>")]
pub fn store_tokenizer(_token: APIToken, payload: Json<TokenizerStorage>) -> Json<TokenizerResponse>{
println!("Received tokenizer store request: {}, {}, {}", payload.model_id, payload.previous_model_id, payload.file_path);
match handle_tokenizer_store(&payload) {
Ok(()) => Json(TokenizerResponse {
success: true,
token_count: 0,
message: "Success".to_string(),
}),
Err(e) => Json(TokenizerResponse {
success: false,
token_count: 0,
message: e.to_string(),
}),
}
}