diff --git a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua
index 361fb0e6..7369d82d 100644
--- a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua
+++ b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua
@@ -3334,6 +3334,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2331453405"] = "(O
-- Add
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2646845972"] = "Add"
+-- Selected file path for the custom tokenizer
+UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T278585345"] = "Selected file path for the custom tokenizer"
+
-- No models loaded or available.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2810182573"] = "No models loaded or available."
@@ -3343,6 +3346,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2842060373"] = "In
-- Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T290547799"] = "Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually."
+-- Choose a custom tokenizer here
+UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T3787466119"] = "Choose a custom tokenizer here"
+
-- Model selection
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T416738168"] = "Model selection"
diff --git a/app/MindWork AI Studio/Components/SelectFile.razor b/app/MindWork AI Studio/Components/SelectFile.razor
index de3971e5..561b11c0 100644
--- a/app/MindWork AI Studio/Components/SelectFile.razor
+++ b/app/MindWork AI Studio/Components/SelectFile.razor
@@ -11,6 +11,7 @@
AdornmentIcon="@Icons.Material.Filled.AttachFile"
UserAttributes="@SPELLCHECK_ATTRIBUTES"
Variant="Variant.Outlined"
+ Clearable="this.IsClearable"
/>
diff --git a/app/MindWork AI Studio/Components/SelectFile.razor.cs b/app/MindWork AI Studio/Components/SelectFile.razor.cs
index c7b4dace..309204be 100644
--- a/app/MindWork AI Studio/Components/SelectFile.razor.cs
+++ b/app/MindWork AI Studio/Components/SelectFile.razor.cs
@@ -27,6 +27,9 @@ public partial class SelectFile : MSGComponentBase
[Parameter]
public Func Validation { get; set; } = _ => null;
+
+ [Parameter]
+ public bool IsClearable { get; set; } = false;
[Inject]
public RustService RustService { get; set; } = null!;
diff --git a/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor b/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor
index 6e5a595b..421dae83 100644
--- a/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor
+++ b/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor
@@ -8,7 +8,7 @@
@* ReSharper disable once CSharpWarnings::CS8974 *@
-
+
@foreach (LLMProviders provider in Enum.GetValues(typeof(LLMProviders)))
{
if (provider.ProvideEmbeddingAPI() || provider is LLMProviders.NONE)
@@ -23,7 +23,7 @@
@T("Create account")
-
+
@if (this.DataLLMProvider.IsAPIKeyNeeded(this.DataHost))
{
@@ -72,15 +72,14 @@
AdornmentColor="Color.Info"
Validation="@this.ValidateManuallyModel"
UserAttributes="@SPELLCHECK_ATTRIBUTES"
- HelperText="@T("Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.")"
- />
+ HelperText="@T("Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.")"/>
}
else
{
@T("Load")
- @if(this.availableModels.Count is 0)
+ @if (this.availableModels.Count is 0)
{
@T("No models loaded or available.")
@@ -123,10 +122,13 @@
AdornmentIcon="@Icons.Material.Filled.Lightbulb"
AdornmentColor="Color.Info"
Validation="@this.providerValidation.ValidatingInstanceName"
- UserAttributes="@SPELLCHECK_ATTRIBUTES"
- />
-
-
+ UserAttributes="@SPELLCHECK_ATTRIBUTES"/>
+
+ @T("For better embeddings and less storage usage, it's recommended to use a custom tokenizer to enable a more accurate token count.")
+
+ @if (this.DataModel != default){
+
+ }
@@ -135,7 +137,7 @@
@T("Cancel")
- @if(this.IsEditing)
+ @if (this.IsEditing)
{
@T("Update")
}
@@ -145,4 +147,4 @@
}
-
\ No newline at end of file
+
diff --git a/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor.cs b/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor.cs
index a3b66dbe..039df90d 100644
--- a/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor.cs
+++ b/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor.cs
@@ -90,6 +90,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
private string dataAPIKeyStorageIssue = string.Empty;
private string dataEditingPreviousInstanceName = string.Empty;
private string dataLoadingModelsIssue = string.Empty;
+ private string dataFilePath = string.Empty;
// We get the form reference from Blazor code to validate it manually:
private MudForm form = null!;
@@ -266,6 +267,13 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
await this.form.Validate();
}
}
+
+ private async Task OnDataFilePathChanged(string filePath)
+ {
+ await this.RustService.ValidateAndStoreTokenizer(this.DataModel.DisplayName, filePath);
+ }
+
+
private void OnHostChanged(Host selectedHost)
{
@@ -309,4 +317,4 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
};
private bool IsNoneProvider => this.DataLLMProvider is LLMProviders.NONE;
-}
\ No newline at end of file
+}
diff --git a/app/MindWork AI Studio/Tools/Rust/TokenizerUploadResponse.cs b/app/MindWork AI Studio/Tools/Rust/TokenizerUploadResponse.cs
new file mode 100644
index 00000000..c141ec74
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/Rust/TokenizerUploadResponse.cs
@@ -0,0 +1,3 @@
+namespace AIStudio.Tools.Rust;
+
+public readonly record struct TokenizerUploadResponse(int Success, string Response);
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/Services/RustService.FileSystem.cs b/app/MindWork AI Studio/Tools/Services/RustService.FileSystem.cs
index c55b6a8b..161fae95 100644
--- a/app/MindWork AI Studio/Tools/Services/RustService.FileSystem.cs
+++ b/app/MindWork AI Studio/Tools/Services/RustService.FileSystem.cs
@@ -81,4 +81,21 @@ public sealed partial class RustService
return await result.Content.ReadFromJsonAsync(this.jsonRustSerializerOptions);
}
+
+ public async Task ValidateAndStoreTokenizer(string? modelId, string filePath)
+ {
+ var result = await this.http.PostAsJsonAsync("/tokenizer/val-and-store", new {
+ model_id = modelId,
+ file_path = filePath,
+ }, this.jsonRustSerializerOptions);
+
+ if (!result.IsSuccessStatusCode)
+ {
+ this.logger!.LogError($"Failed to validate and store the tokenizer '{result.StatusCode}'");
+ return new TokenizerUploadResponse(-1, "An error occured while validating and storing the tokenizer");
+ }
+
+ return await result.Content.ReadFromJsonAsync(this.jsonRustSerializerOptions);
+ }
+
}
\ No newline at end of file