add upload for tokenizer (in progress)

This commit is contained in:
PaulKoudelka 2026-03-27 15:09:05 +01:00
parent 562520cbf4
commit 409852907c
7 changed files with 52 additions and 12 deletions

View File

@ -3334,6 +3334,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2331453405"] = "(O
-- Add
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2646845972"] = "Add"
-- Selected file path for the custom tokenizer
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T278585345"] = "Selected file path for the custom tokenizer"
-- No models loaded or available.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2810182573"] = "No models loaded or available."
@ -3343,6 +3346,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2842060373"] = "In
-- Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T290547799"] = "Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually."
-- Choose a custom tokenizer here
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T3787466119"] = "Choose a custom tokenizer here"
-- Model selection
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T416738168"] = "Model selection"

View File

@ -11,6 +11,7 @@
AdornmentIcon="@Icons.Material.Filled.AttachFile"
UserAttributes="@SPELLCHECK_ATTRIBUTES"
Variant="Variant.Outlined"
Clearable="this.IsClearable"
/>
<MudButton StartIcon="@Icons.Material.Filled.FolderOpen" Variant="Variant.Outlined" Color="Color.Primary" Disabled="this.Disabled" OnClick="@this.OpenFileDialog">

View File

@ -27,6 +27,9 @@ public partial class SelectFile : MSGComponentBase
[Parameter]
public Func<string, string?> Validation { get; set; } = _ => null;
[Parameter]
public bool IsClearable { get; set; } = false;
[Inject]
public RustService RustService { get; set; } = null!;

View File

@ -8,7 +8,7 @@
<MudForm @ref="@this.form" @bind-IsValid="@this.dataIsValid" @bind-Errors="@this.dataIssues">
<MudStack Row="@true" AlignItems="AlignItems.Center">
@* ReSharper disable once CSharpWarnings::CS8974 *@
<MudSelect @bind-Value="@this.DataLLMProvider" Label="@T("Provider")" Class="mb-3" OpenIcon="@Icons.Material.Filled.AccountBalance" AdornmentColor="Color.Info" Adornment="Adornment.Start" Validation="@this.providerValidation.ValidatingProvider">
<MudSelect @bind-Value="@this.DataLLMProvider" Label="@T("Provider")" Class="mb-3" OpenIcon="@Icons.Material.Filled.AccountBalance" AdornmentColor="Color.Info" Adornment="Adornment.Start" Validation="@this.providerValidation.ValidatingProvider">
@foreach (LLMProviders provider in Enum.GetValues(typeof(LLMProviders)))
{
if (provider.ProvideEmbeddingAPI() || provider is LLMProviders.NONE)
@ -23,7 +23,7 @@
@T("Create account")
</MudButton>
</MudStack>
@if (this.DataLLMProvider.IsAPIKeyNeeded(this.DataHost))
{
<SecretInputField Secret="@this.dataAPIKey" SecretChanged="@this.OnAPIKeyChanged" Label="@this.APIKeyText" Validation="@this.providerValidation.ValidatingAPIKey"/>
@ -72,15 +72,14 @@
AdornmentColor="Color.Info"
Validation="@this.ValidateManuallyModel"
UserAttributes="@SPELLCHECK_ATTRIBUTES"
HelperText="@T("Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.")"
/>
HelperText="@T("Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.")"/>
}
else
{
<MudButton Disabled="@(!this.DataLLMProvider.CanLoadModels(this.DataHost, this.dataAPIKey))" Variant="Variant.Filled" Size="Size.Small" StartIcon="@Icons.Material.Filled.Refresh" OnClick="@this.ReloadModels">
@T("Load")
</MudButton>
@if(this.availableModels.Count is 0)
@if (this.availableModels.Count is 0)
{
<MudText Typo="Typo.body1">
@T("No models loaded or available.")
@ -123,10 +122,13 @@
AdornmentIcon="@Icons.Material.Filled.Lightbulb"
AdornmentColor="Color.Info"
Validation="@this.providerValidation.ValidatingInstanceName"
UserAttributes="@SPELLCHECK_ATTRIBUTES"
/>
<AttachDocuments Name="File Attachments" Layer="@DropLayers.PAGES" @bind-DocumentPaths="@this.chatDocumentPaths" CatchAllDocuments="true" UseSmallForm="true" ValidateMediaFileTypes="true" AllowedFileTypes="[FileTypes.JSON]"/>
UserAttributes="@SPELLCHECK_ATTRIBUTES"/>
<MudJustifiedText Typo="Typo.body1" Class="mb-3">
@T("For better embeddings and less storage usage, it's recommended to use a custom tokenizer to enable a more accurate token count.")
</MudJustifiedText>
@if (this.DataModel != default){
<SelectFile File="@this.dataFilePath" FileChanged="@this.OnDataFilePathChanged" Label="@T("Selected file path for the custom tokenizer")" FileDialogTitle="@T("Choose a custom tokenizer here")" Filter="[FileTypes.JSON]" IsClearable="true"/>
}
</MudForm>
<Issues IssuesData="@this.dataIssues"/>
</DialogContent>
@ -135,7 +137,7 @@
@T("Cancel")
</MudButton>
<MudButton OnClick="@this.Store" Variant="Variant.Filled" Color="Color.Primary">
@if(this.IsEditing)
@if (this.IsEditing)
{
@T("Update")
}
@ -145,4 +147,4 @@
}
</MudButton>
</DialogActions>
</MudDialog>
</MudDialog>

View File

@ -90,6 +90,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
private string dataAPIKeyStorageIssue = string.Empty;
private string dataEditingPreviousInstanceName = string.Empty;
private string dataLoadingModelsIssue = string.Empty;
private string dataFilePath = string.Empty;
// We get the form reference from Blazor code to validate it manually:
private MudForm form = null!;
@ -266,6 +267,13 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
await this.form.Validate();
}
}
private async Task OnDataFilePathChanged(string filePath)
{
await this.RustService.ValidateAndStoreTokenizer(this.DataModel.DisplayName, filePath);
}
private void OnHostChanged(Host selectedHost)
{
@ -309,4 +317,4 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
};
private bool IsNoneProvider => this.DataLLMProvider is LLMProviders.NONE;
}
}

View File

@ -0,0 +1,3 @@
namespace AIStudio.Tools.Rust;
public readonly record struct TokenizerUploadResponse(int Success, string Response);

View File

@ -81,4 +81,21 @@ public sealed partial class RustService
return await result.Content.ReadFromJsonAsync<FileSaveResponse>(this.jsonRustSerializerOptions);
}
public async Task<TokenizerUploadResponse> ValidateAndStoreTokenizer(string? modelId, string filePath)
{
var result = await this.http.PostAsJsonAsync("/tokenizer/val-and-store", new {
model_id = modelId,
file_path = filePath,
}, this.jsonRustSerializerOptions);
if (!result.IsSuccessStatusCode)
{
this.logger!.LogError($"Failed to validate and store the tokenizer '{result.StatusCode}'");
return new TokenizerUploadResponse(-1, "An error occured while validating and storing the tokenizer");
}
return await result.Content.ReadFromJsonAsync<TokenizerUploadResponse>(this.jsonRustSerializerOptions);
}
}