add upload for tokenizer (in progress)

This commit is contained in:
PaulKoudelka 2026-03-27 15:09:05 +01:00
parent 562520cbf4
commit 409852907c
7 changed files with 52 additions and 12 deletions

View File

@ -3334,6 +3334,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2331453405"] = "(O
-- Add -- Add
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2646845972"] = "Add" UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2646845972"] = "Add"
-- Selected file path for the custom tokenizer
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T278585345"] = "Selected file path for the custom tokenizer"
-- No models loaded or available. -- No models loaded or available.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2810182573"] = "No models loaded or available." UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2810182573"] = "No models loaded or available."
@ -3343,6 +3346,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2842060373"] = "In
-- Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually. -- Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T290547799"] = "Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually." UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T290547799"] = "Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually."
-- Choose a custom tokenizer here
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T3787466119"] = "Choose a custom tokenizer here"
-- Model selection -- Model selection
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T416738168"] = "Model selection" UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T416738168"] = "Model selection"

View File

@ -11,6 +11,7 @@
AdornmentIcon="@Icons.Material.Filled.AttachFile" AdornmentIcon="@Icons.Material.Filled.AttachFile"
UserAttributes="@SPELLCHECK_ATTRIBUTES" UserAttributes="@SPELLCHECK_ATTRIBUTES"
Variant="Variant.Outlined" Variant="Variant.Outlined"
Clearable="this.IsClearable"
/> />
<MudButton StartIcon="@Icons.Material.Filled.FolderOpen" Variant="Variant.Outlined" Color="Color.Primary" Disabled="this.Disabled" OnClick="@this.OpenFileDialog"> <MudButton StartIcon="@Icons.Material.Filled.FolderOpen" Variant="Variant.Outlined" Color="Color.Primary" Disabled="this.Disabled" OnClick="@this.OpenFileDialog">

View File

@ -27,6 +27,9 @@ public partial class SelectFile : MSGComponentBase
[Parameter] [Parameter]
public Func<string, string?> Validation { get; set; } = _ => null; public Func<string, string?> Validation { get; set; } = _ => null;
[Parameter]
public bool IsClearable { get; set; } = false;
[Inject] [Inject]
public RustService RustService { get; set; } = null!; public RustService RustService { get; set; } = null!;

View File

@ -8,7 +8,7 @@
<MudForm @ref="@this.form" @bind-IsValid="@this.dataIsValid" @bind-Errors="@this.dataIssues"> <MudForm @ref="@this.form" @bind-IsValid="@this.dataIsValid" @bind-Errors="@this.dataIssues">
<MudStack Row="@true" AlignItems="AlignItems.Center"> <MudStack Row="@true" AlignItems="AlignItems.Center">
@* ReSharper disable once CSharpWarnings::CS8974 *@ @* ReSharper disable once CSharpWarnings::CS8974 *@
<MudSelect @bind-Value="@this.DataLLMProvider" Label="@T("Provider")" Class="mb-3" OpenIcon="@Icons.Material.Filled.AccountBalance" AdornmentColor="Color.Info" Adornment="Adornment.Start" Validation="@this.providerValidation.ValidatingProvider"> <MudSelect @bind-Value="@this.DataLLMProvider" Label="@T("Provider")" Class="mb-3" OpenIcon="@Icons.Material.Filled.AccountBalance" AdornmentColor="Color.Info" Adornment="Adornment.Start" Validation="@this.providerValidation.ValidatingProvider">
@foreach (LLMProviders provider in Enum.GetValues(typeof(LLMProviders))) @foreach (LLMProviders provider in Enum.GetValues(typeof(LLMProviders)))
{ {
if (provider.ProvideEmbeddingAPI() || provider is LLMProviders.NONE) if (provider.ProvideEmbeddingAPI() || provider is LLMProviders.NONE)
@ -23,7 +23,7 @@
@T("Create account") @T("Create account")
</MudButton> </MudButton>
</MudStack> </MudStack>
@if (this.DataLLMProvider.IsAPIKeyNeeded(this.DataHost)) @if (this.DataLLMProvider.IsAPIKeyNeeded(this.DataHost))
{ {
<SecretInputField Secret="@this.dataAPIKey" SecretChanged="@this.OnAPIKeyChanged" Label="@this.APIKeyText" Validation="@this.providerValidation.ValidatingAPIKey"/> <SecretInputField Secret="@this.dataAPIKey" SecretChanged="@this.OnAPIKeyChanged" Label="@this.APIKeyText" Validation="@this.providerValidation.ValidatingAPIKey"/>
@ -72,15 +72,14 @@
AdornmentColor="Color.Info" AdornmentColor="Color.Info"
Validation="@this.ValidateManuallyModel" Validation="@this.ValidateManuallyModel"
UserAttributes="@SPELLCHECK_ATTRIBUTES" UserAttributes="@SPELLCHECK_ATTRIBUTES"
HelperText="@T("Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.")" HelperText="@T("Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.")"/>
/>
} }
else else
{ {
<MudButton Disabled="@(!this.DataLLMProvider.CanLoadModels(this.DataHost, this.dataAPIKey))" Variant="Variant.Filled" Size="Size.Small" StartIcon="@Icons.Material.Filled.Refresh" OnClick="@this.ReloadModels"> <MudButton Disabled="@(!this.DataLLMProvider.CanLoadModels(this.DataHost, this.dataAPIKey))" Variant="Variant.Filled" Size="Size.Small" StartIcon="@Icons.Material.Filled.Refresh" OnClick="@this.ReloadModels">
@T("Load") @T("Load")
</MudButton> </MudButton>
@if(this.availableModels.Count is 0) @if (this.availableModels.Count is 0)
{ {
<MudText Typo="Typo.body1"> <MudText Typo="Typo.body1">
@T("No models loaded or available.") @T("No models loaded or available.")
@ -123,10 +122,13 @@
AdornmentIcon="@Icons.Material.Filled.Lightbulb" AdornmentIcon="@Icons.Material.Filled.Lightbulb"
AdornmentColor="Color.Info" AdornmentColor="Color.Info"
Validation="@this.providerValidation.ValidatingInstanceName" Validation="@this.providerValidation.ValidatingInstanceName"
UserAttributes="@SPELLCHECK_ATTRIBUTES" UserAttributes="@SPELLCHECK_ATTRIBUTES"/>
/> <MudJustifiedText Typo="Typo.body1" Class="mb-3">
<AttachDocuments Name="File Attachments" Layer="@DropLayers.PAGES" @bind-DocumentPaths="@this.chatDocumentPaths" CatchAllDocuments="true" UseSmallForm="true" ValidateMediaFileTypes="true" AllowedFileTypes="[FileTypes.JSON]"/> @T("For better embeddings and less storage usage, it's recommended to use a custom tokenizer to enable a more accurate token count.")
</MudJustifiedText>
@if (this.DataModel != default){
<SelectFile File="@this.dataFilePath" FileChanged="@this.OnDataFilePathChanged" Label="@T("Selected file path for the custom tokenizer")" FileDialogTitle="@T("Choose a custom tokenizer here")" Filter="[FileTypes.JSON]" IsClearable="true"/>
}
</MudForm> </MudForm>
<Issues IssuesData="@this.dataIssues"/> <Issues IssuesData="@this.dataIssues"/>
</DialogContent> </DialogContent>
@ -135,7 +137,7 @@
@T("Cancel") @T("Cancel")
</MudButton> </MudButton>
<MudButton OnClick="@this.Store" Variant="Variant.Filled" Color="Color.Primary"> <MudButton OnClick="@this.Store" Variant="Variant.Filled" Color="Color.Primary">
@if(this.IsEditing) @if (this.IsEditing)
{ {
@T("Update") @T("Update")
} }
@ -145,4 +147,4 @@
} }
</MudButton> </MudButton>
</DialogActions> </DialogActions>
</MudDialog> </MudDialog>

View File

@ -90,6 +90,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
private string dataAPIKeyStorageIssue = string.Empty; private string dataAPIKeyStorageIssue = string.Empty;
private string dataEditingPreviousInstanceName = string.Empty; private string dataEditingPreviousInstanceName = string.Empty;
private string dataLoadingModelsIssue = string.Empty; private string dataLoadingModelsIssue = string.Empty;
private string dataFilePath = string.Empty;
// We get the form reference from Blazor code to validate it manually: // We get the form reference from Blazor code to validate it manually:
private MudForm form = null!; private MudForm form = null!;
@ -266,6 +267,13 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
await this.form.Validate(); await this.form.Validate();
} }
} }
private async Task OnDataFilePathChanged(string filePath)
{
await this.RustService.ValidateAndStoreTokenizer(this.DataModel.DisplayName, filePath);
}
private void OnHostChanged(Host selectedHost) private void OnHostChanged(Host selectedHost)
{ {
@ -309,4 +317,4 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
}; };
private bool IsNoneProvider => this.DataLLMProvider is LLMProviders.NONE; private bool IsNoneProvider => this.DataLLMProvider is LLMProviders.NONE;
} }

View File

@ -0,0 +1,3 @@
namespace AIStudio.Tools.Rust;
public readonly record struct TokenizerUploadResponse(int Success, string Response);

View File

@ -81,4 +81,21 @@ public sealed partial class RustService
return await result.Content.ReadFromJsonAsync<FileSaveResponse>(this.jsonRustSerializerOptions); return await result.Content.ReadFromJsonAsync<FileSaveResponse>(this.jsonRustSerializerOptions);
} }
public async Task<TokenizerUploadResponse> ValidateAndStoreTokenizer(string? modelId, string filePath)
{
var result = await this.http.PostAsJsonAsync("/tokenizer/val-and-store", new {
model_id = modelId,
file_path = filePath,
}, this.jsonRustSerializerOptions);
if (!result.IsSuccessStatusCode)
{
this.logger!.LogError($"Failed to validate and store the tokenizer '{result.StatusCode}'");
return new TokenizerUploadResponse(-1, "An error occured while validating and storing the tokenizer");
}
return await result.Content.ReadFromJsonAsync<TokenizerUploadResponse>(this.jsonRustSerializerOptions);
}
} }