add upload for tokenizer (in progress)

This commit is contained in:
PaulKoudelka 2026-03-27 15:09:05 +01:00
parent 562520cbf4
commit 409852907c
7 changed files with 52 additions and 12 deletions

View File

@ -3334,6 +3334,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2331453405"] = "(O
-- Add
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2646845972"] = "Add"
-- Selected file path for the custom tokenizer
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T278585345"] = "Selected file path for the custom tokenizer"
-- No models loaded or available.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2810182573"] = "No models loaded or available."
@ -3343,6 +3346,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2842060373"] = "In
-- Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T290547799"] = "Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually."
-- Choose a custom tokenizer here
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T3787466119"] = "Choose a custom tokenizer here"
-- Model selection
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T416738168"] = "Model selection"

View File

@ -11,6 +11,7 @@
AdornmentIcon="@Icons.Material.Filled.AttachFile"
UserAttributes="@SPELLCHECK_ATTRIBUTES"
Variant="Variant.Outlined"
Clearable="this.IsClearable"
/>
<MudButton StartIcon="@Icons.Material.Filled.FolderOpen" Variant="Variant.Outlined" Color="Color.Primary" Disabled="this.Disabled" OnClick="@this.OpenFileDialog">

View File

@ -28,6 +28,9 @@ public partial class SelectFile : MSGComponentBase
[Parameter]
public Func<string, string?> Validation { get; set; } = _ => null;
[Parameter]
public bool IsClearable { get; set; } = false;
[Inject]
public RustService RustService { get; set; } = null!;

View File

@ -72,8 +72,7 @@
AdornmentColor="Color.Info"
Validation="@this.ValidateManuallyModel"
UserAttributes="@SPELLCHECK_ATTRIBUTES"
HelperText="@T("Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.")"
/>
HelperText="@T("Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.")"/>
}
else
{
@ -123,10 +122,13 @@
AdornmentIcon="@Icons.Material.Filled.Lightbulb"
AdornmentColor="Color.Info"
Validation="@this.providerValidation.ValidatingInstanceName"
UserAttributes="@SPELLCHECK_ATTRIBUTES"
/>
<AttachDocuments Name="File Attachments" Layer="@DropLayers.PAGES" @bind-DocumentPaths="@this.chatDocumentPaths" CatchAllDocuments="true" UseSmallForm="true" ValidateMediaFileTypes="true" AllowedFileTypes="[FileTypes.JSON]"/>
UserAttributes="@SPELLCHECK_ATTRIBUTES"/>
<MudJustifiedText Typo="Typo.body1" Class="mb-3">
@T("For better embeddings and less storage usage, it's recommended to use a custom tokenizer to enable a more accurate token count.")
</MudJustifiedText>
@if (this.DataModel != default){
<SelectFile File="@this.dataFilePath" FileChanged="@this.OnDataFilePathChanged" Label="@T("Selected file path for the custom tokenizer")" FileDialogTitle="@T("Choose a custom tokenizer here")" Filter="[FileTypes.JSON]" IsClearable="true"/>
}
</MudForm>
<Issues IssuesData="@this.dataIssues"/>
</DialogContent>

View File

@ -90,6 +90,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
private string dataAPIKeyStorageIssue = string.Empty;
private string dataEditingPreviousInstanceName = string.Empty;
private string dataLoadingModelsIssue = string.Empty;
private string dataFilePath = string.Empty;
// We get the form reference from Blazor code to validate it manually:
private MudForm form = null!;
@ -267,6 +268,13 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
}
}
private async Task OnDataFilePathChanged(string filePath)
{
await this.RustService.ValidateAndStoreTokenizer(this.DataModel.DisplayName, filePath);
}
private void OnHostChanged(Host selectedHost)
{
// When the host changes, reset the model selection state:

View File

@ -0,0 +1,3 @@
namespace AIStudio.Tools.Rust;
public readonly record struct TokenizerUploadResponse(int Success, string Response);

View File

@ -81,4 +81,21 @@ public sealed partial class RustService
return await result.Content.ReadFromJsonAsync<FileSaveResponse>(this.jsonRustSerializerOptions);
}
public async Task<TokenizerUploadResponse> ValidateAndStoreTokenizer(string? modelId, string filePath)
{
var result = await this.http.PostAsJsonAsync("/tokenizer/val-and-store", new {
model_id = modelId,
file_path = filePath,
}, this.jsonRustSerializerOptions);
if (!result.IsSuccessStatusCode)
{
this.logger!.LogError($"Failed to validate and store the tokenizer '{result.StatusCode}'");
return new TokenizerUploadResponse(-1, "An error occured while validating and storing the tokenizer");
}
return await result.Content.ReadFromJsonAsync<TokenizerUploadResponse>(this.jsonRustSerializerOptions);
}
}