diff --git a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua index 361fb0e6..7369d82d 100644 --- a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua +++ b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua @@ -3334,6 +3334,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2331453405"] = "(O -- Add UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2646845972"] = "Add" +-- Selected file path for the custom tokenizer +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T278585345"] = "Selected file path for the custom tokenizer" + -- No models loaded or available. UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2810182573"] = "No models loaded or available." @@ -3343,6 +3346,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2842060373"] = "In -- Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually. UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T290547799"] = "Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually." +-- Choose a custom tokenizer here +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T3787466119"] = "Choose a custom tokenizer here" + -- Model selection UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T416738168"] = "Model selection" diff --git a/app/MindWork AI Studio/Components/SelectFile.razor b/app/MindWork AI Studio/Components/SelectFile.razor index de3971e5..561b11c0 100644 --- a/app/MindWork AI Studio/Components/SelectFile.razor +++ b/app/MindWork AI Studio/Components/SelectFile.razor @@ -11,6 +11,7 @@ AdornmentIcon="@Icons.Material.Filled.AttachFile" UserAttributes="@SPELLCHECK_ATTRIBUTES" Variant="Variant.Outlined" + Clearable="this.IsClearable" /> diff --git a/app/MindWork AI Studio/Components/SelectFile.razor.cs b/app/MindWork AI Studio/Components/SelectFile.razor.cs index c7b4dace..309204be 100644 --- a/app/MindWork AI Studio/Components/SelectFile.razor.cs +++ b/app/MindWork AI Studio/Components/SelectFile.razor.cs @@ -27,6 +27,9 @@ public partial class SelectFile : MSGComponentBase [Parameter] public Func Validation { get; set; } = _ => null; + + [Parameter] + public bool IsClearable { get; set; } = false; [Inject] public RustService RustService { get; set; } = null!; diff --git a/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor b/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor index 6e5a595b..421dae83 100644 --- a/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor +++ b/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor @@ -8,7 +8,7 @@ @* ReSharper disable once CSharpWarnings::CS8974 *@ - + @foreach (LLMProviders provider in Enum.GetValues(typeof(LLMProviders))) { if (provider.ProvideEmbeddingAPI() || provider is LLMProviders.NONE) @@ -23,7 +23,7 @@ @T("Create account") - + @if (this.DataLLMProvider.IsAPIKeyNeeded(this.DataHost)) { @@ -72,15 +72,14 @@ AdornmentColor="Color.Info" Validation="@this.ValidateManuallyModel" UserAttributes="@SPELLCHECK_ATTRIBUTES" - HelperText="@T("Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.")" - /> + HelperText="@T("Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.")"/> } else { @T("Load") - @if(this.availableModels.Count is 0) + @if (this.availableModels.Count is 0) { @T("No models loaded or available.") @@ -123,10 +122,13 @@ AdornmentIcon="@Icons.Material.Filled.Lightbulb" AdornmentColor="Color.Info" Validation="@this.providerValidation.ValidatingInstanceName" - UserAttributes="@SPELLCHECK_ATTRIBUTES" - /> - - + UserAttributes="@SPELLCHECK_ATTRIBUTES"/> + + @T("For better embeddings and less storage usage, it's recommended to use a custom tokenizer to enable a more accurate token count.") + + @if (this.DataModel != default){ + + } @@ -135,7 +137,7 @@ @T("Cancel") - @if(this.IsEditing) + @if (this.IsEditing) { @T("Update") } @@ -145,4 +147,4 @@ } - \ No newline at end of file + diff --git a/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor.cs b/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor.cs index a3b66dbe..039df90d 100644 --- a/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor.cs +++ b/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor.cs @@ -90,6 +90,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId private string dataAPIKeyStorageIssue = string.Empty; private string dataEditingPreviousInstanceName = string.Empty; private string dataLoadingModelsIssue = string.Empty; + private string dataFilePath = string.Empty; // We get the form reference from Blazor code to validate it manually: private MudForm form = null!; @@ -266,6 +267,13 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId await this.form.Validate(); } } + + private async Task OnDataFilePathChanged(string filePath) + { + await this.RustService.ValidateAndStoreTokenizer(this.DataModel.DisplayName, filePath); + } + + private void OnHostChanged(Host selectedHost) { @@ -309,4 +317,4 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId }; private bool IsNoneProvider => this.DataLLMProvider is LLMProviders.NONE; -} \ No newline at end of file +} diff --git a/app/MindWork AI Studio/Tools/Rust/TokenizerUploadResponse.cs b/app/MindWork AI Studio/Tools/Rust/TokenizerUploadResponse.cs new file mode 100644 index 00000000..c141ec74 --- /dev/null +++ b/app/MindWork AI Studio/Tools/Rust/TokenizerUploadResponse.cs @@ -0,0 +1,3 @@ +namespace AIStudio.Tools.Rust; + +public readonly record struct TokenizerUploadResponse(int Success, string Response); \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/Services/RustService.FileSystem.cs b/app/MindWork AI Studio/Tools/Services/RustService.FileSystem.cs index c55b6a8b..161fae95 100644 --- a/app/MindWork AI Studio/Tools/Services/RustService.FileSystem.cs +++ b/app/MindWork AI Studio/Tools/Services/RustService.FileSystem.cs @@ -81,4 +81,21 @@ public sealed partial class RustService return await result.Content.ReadFromJsonAsync(this.jsonRustSerializerOptions); } + + public async Task ValidateAndStoreTokenizer(string? modelId, string filePath) + { + var result = await this.http.PostAsJsonAsync("/tokenizer/val-and-store", new { + model_id = modelId, + file_path = filePath, + }, this.jsonRustSerializerOptions); + + if (!result.IsSuccessStatusCode) + { + this.logger!.LogError($"Failed to validate and store the tokenizer '{result.StatusCode}'"); + return new TokenizerUploadResponse(-1, "An error occured while validating and storing the tokenizer"); + } + + return await result.Content.ReadFromJsonAsync(this.jsonRustSerializerOptions); + } + } \ No newline at end of file