mirror of
https://github.com/MindWorkAI/AI-Studio.git
synced 2026-05-19 21:52:14 +00:00
added functionality to add tokenizer to LLM and embedding models
This commit is contained in:
parent
0854debc00
commit
e07ca378d4
@ -3817,6 +3817,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T1324664716"] = "AP
|
|||||||
-- Create account
|
-- Create account
|
||||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T1356621346"] = "Create account"
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T1356621346"] = "Create account"
|
||||||
|
|
||||||
|
-- Failed to validate the selected tokenizer. Please try again.
|
||||||
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T1384494471"] = "Failed to validate the selected tokenizer. Please try again."
|
||||||
|
|
||||||
-- Please enter an embedding model name.
|
-- Please enter an embedding model name.
|
||||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T1661085403"] = "Please enter an embedding model name."
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T1661085403"] = "Please enter an embedding model name."
|
||||||
|
|
||||||
@ -3838,6 +3841,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2189814010"] = "Mo
|
|||||||
-- (Optional) API Key
|
-- (Optional) API Key
|
||||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2331453405"] = "(Optional) API Key"
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2331453405"] = "(Optional) API Key"
|
||||||
|
|
||||||
|
-- Invalid tokenizer:
|
||||||
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2448302543"] = "Invalid tokenizer:"
|
||||||
|
|
||||||
-- Add
|
-- Add
|
||||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2646845972"] = "Add"
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2646845972"] = "Add"
|
||||||
|
|
||||||
@ -4036,6 +4042,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T1324664716"] = "API Key"
|
|||||||
-- Create account
|
-- Create account
|
||||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T1356621346"] = "Create account"
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T1356621346"] = "Create account"
|
||||||
|
|
||||||
|
-- Failed to validate the selected tokenizer. Please try again.
|
||||||
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T1384494471"] = "Failed to validate the selected tokenizer. Please try again."
|
||||||
|
|
||||||
-- Load models
|
-- Load models
|
||||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T15352225"] = "Load models"
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T15352225"] = "Load models"
|
||||||
|
|
||||||
@ -4063,12 +4072,18 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T2189814010"] = "Model"
|
|||||||
-- (Optional) API Key
|
-- (Optional) API Key
|
||||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T2331453405"] = "(Optional) API Key"
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T2331453405"] = "(Optional) API Key"
|
||||||
|
|
||||||
|
-- Invalid tokenizer:
|
||||||
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T2448302543"] = "Invalid tokenizer:"
|
||||||
|
|
||||||
-- Add
|
-- Add
|
||||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T2646845972"] = "Add"
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T2646845972"] = "Add"
|
||||||
|
|
||||||
-- Additional API parameters
|
-- Additional API parameters
|
||||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T2728244552"] = "Additional API parameters"
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T2728244552"] = "Additional API parameters"
|
||||||
|
|
||||||
|
-- Selected file path for the custom tokenizer
|
||||||
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T278585345"] = "Selected file path for the custom tokenizer"
|
||||||
|
|
||||||
-- No models loaded or available.
|
-- No models loaded or available.
|
||||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T2810182573"] = "No models loaded or available."
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T2810182573"] = "No models loaded or available."
|
||||||
|
|
||||||
@ -4087,6 +4102,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T3763891899"] = "Show availa
|
|||||||
-- This host uses the model configured at the provider level. No model selection is available.
|
-- This host uses the model configured at the provider level. No model selection is available.
|
||||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T3783329915"] = "This host uses the model configured at the provider level. No model selection is available."
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T3783329915"] = "This host uses the model configured at the provider level. No model selection is available."
|
||||||
|
|
||||||
|
-- Choose a custom tokenizer here
|
||||||
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T3787466119"] = "Choose a custom tokenizer here"
|
||||||
|
|
||||||
-- Duplicate key '{0}' found.
|
-- Duplicate key '{0}' found.
|
||||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T3804472591"] = "Duplicate key '{0}' found."
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T3804472591"] = "Duplicate key '{0}' found."
|
||||||
|
|
||||||
@ -4108,6 +4126,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T900237532"] = "Provider"
|
|||||||
-- Cancel
|
-- Cancel
|
||||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T900713019"] = "Cancel"
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T900713019"] = "Cancel"
|
||||||
|
|
||||||
|
-- For better token estimates, you can configure a custom tokenizer for this provider.
|
||||||
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T961454300"] = "For better token estimates, you can configure a custom tokenizer for this provider."
|
||||||
|
|
||||||
-- The parameter name. It must be unique within the retrieval process.
|
-- The parameter name. It must be unique within the retrieval process.
|
||||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::RETRIEVALPROCESSDIALOG::T100726215"] = "The parameter name. It must be unique within the retrieval process."
|
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::RETRIEVALPROCESSDIALOG::T100726215"] = "The parameter name. It must be unique within the retrieval process."
|
||||||
|
|
||||||
|
|||||||
@ -48,6 +48,9 @@ public partial class AttachDocuments : MSGComponentBase
|
|||||||
[Parameter]
|
[Parameter]
|
||||||
public bool UseSmallForm { get; set; }
|
public bool UseSmallForm { get; set; }
|
||||||
|
|
||||||
|
[Parameter]
|
||||||
|
public FileType[]? AllowedFileTypes { get; set; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// When true, validate media file types before attaching. Default is true. That means that
|
/// When true, validate media file types before attaching. Default is true. That means that
|
||||||
/// the user cannot attach unsupported media file types when the provider or model does not
|
/// the user cannot attach unsupported media file types when the provider or model does not
|
||||||
|
|||||||
@ -73,6 +73,7 @@ public partial class SettingsPanelEmbeddings : SettingsPanelProviderBase
|
|||||||
{ x => x.IsSelfHosted, embeddingProvider.IsSelfHosted },
|
{ x => x.IsSelfHosted, embeddingProvider.IsSelfHosted },
|
||||||
{ x => x.IsEditing, true },
|
{ x => x.IsEditing, true },
|
||||||
{ x => x.DataHost, embeddingProvider.Host },
|
{ x => x.DataHost, embeddingProvider.Host },
|
||||||
|
{ x => x.DataTokenizerPath, embeddingProvider.TokenizerPath },
|
||||||
};
|
};
|
||||||
|
|
||||||
var dialogReference = await this.DialogService.ShowAsync<EmbeddingProviderDialog>(T("Edit Embedding Provider"), dialogParameters, DialogOptions.FULLSCREEN);
|
var dialogReference = await this.DialogService.ShowAsync<EmbeddingProviderDialog>(T("Edit Embedding Provider"), dialogParameters, DialogOptions.FULLSCREEN);
|
||||||
|
|||||||
@ -73,6 +73,7 @@ public partial class SettingsPanelProviders : SettingsPanelProviderBase
|
|||||||
{ x => x.DataHost, provider.Host },
|
{ x => x.DataHost, provider.Host },
|
||||||
{ x => x.HFInferenceProviderId, provider.HFInferenceProvider },
|
{ x => x.HFInferenceProviderId, provider.HFInferenceProvider },
|
||||||
{ x => x.AdditionalJsonApiParameters, provider.AdditionalJsonApiParameters },
|
{ x => x.AdditionalJsonApiParameters, provider.AdditionalJsonApiParameters },
|
||||||
|
{ x => x.DataTokenizerPath, provider.TokenizerPath },
|
||||||
};
|
};
|
||||||
|
|
||||||
var dialogReference = await this.DialogService.ShowAsync<ProviderDialog>(T("Edit LLM Provider"), dialogParameters, DialogOptions.FULLSCREEN);
|
var dialogReference = await this.DialogService.ShowAsync<ProviderDialog>(T("Edit LLM Provider"), dialogParameters, DialogOptions.FULLSCREEN);
|
||||||
|
|||||||
@ -69,6 +69,9 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
[Parameter]
|
[Parameter]
|
||||||
public bool IsEditing { get; init; }
|
public bool IsEditing { get; init; }
|
||||||
|
|
||||||
|
[Parameter]
|
||||||
|
public string DataTokenizerPath { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Inject]
|
[Inject]
|
||||||
private RustService RustService { get; init; } = null!;
|
private RustService RustService { get; init; } = null!;
|
||||||
@ -143,6 +146,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
|
|||||||
Host = this.DataHost,
|
Host = this.DataHost,
|
||||||
IsEnterpriseConfiguration = false,
|
IsEnterpriseConfiguration = false,
|
||||||
EnterpriseConfigurationPluginId = Guid.Empty,
|
EnterpriseConfigurationPluginId = Guid.Empty,
|
||||||
|
TokenizerPath = this.dataFilePath,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -164,6 +168,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
|
|||||||
if(this.IsEditing)
|
if(this.IsEditing)
|
||||||
{
|
{
|
||||||
this.dataEditingPreviousInstanceName = this.DataName.ToLowerInvariant();
|
this.dataEditingPreviousInstanceName = this.DataName.ToLowerInvariant();
|
||||||
|
this.dataFilePath = this.DataTokenizerPath;
|
||||||
Console.WriteLine($"Previous instance name is '{this.dataEditingPreviousInstanceName}'");
|
Console.WriteLine($"Previous instance name is '{this.dataEditingPreviousInstanceName}'");
|
||||||
|
|
||||||
// When using self-hosted embedding, we must copy the model name:
|
// When using self-hosted embedding, we must copy the model name:
|
||||||
@ -241,7 +246,12 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
|
|||||||
var response = await this.RustService.StoreTokenizer(this.DataName, this.dataEditingPreviousInstanceName, this.dataFilePath);
|
var response = await this.RustService.StoreTokenizer(this.DataName, this.dataEditingPreviousInstanceName, this.dataFilePath);
|
||||||
Console.WriteLine($"Response from Rust: {response.Message}");
|
Console.WriteLine($"Response from Rust: {response.Message}");
|
||||||
if (!response.Success)
|
if (!response.Success)
|
||||||
|
{
|
||||||
|
this.dataCustomTokenizerValidationIssue = response.Message;
|
||||||
|
await this.form.Validate();
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
this.dataFilePath = response.Message;
|
||||||
|
|
||||||
// Use the data model to store the provider.
|
// Use the data model to store the provider.
|
||||||
// We just return this data to the parent component:
|
// We just return this data to the parent component:
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
@using AIStudio.Provider
|
@using AIStudio.Provider
|
||||||
@using AIStudio.Provider.HuggingFace
|
@using AIStudio.Provider.HuggingFace
|
||||||
@using AIStudio.Provider.SelfHosted
|
@using AIStudio.Provider.SelfHosted
|
||||||
|
@using AIStudio.Tools.Rust
|
||||||
@inherits MSGComponentBase
|
@inherits MSGComponentBase
|
||||||
<MudDialog>
|
<MudDialog>
|
||||||
<DialogContent>
|
<DialogContent>
|
||||||
@ -150,6 +151,24 @@
|
|||||||
Validation="@this.providerValidation.ValidatingInstanceName"
|
Validation="@this.providerValidation.ValidatingInstanceName"
|
||||||
UserAttributes="@SPELLCHECK_ATTRIBUTES"
|
UserAttributes="@SPELLCHECK_ATTRIBUTES"
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
@if (this.DataLLMProvider != LLMProviders.NONE)
|
||||||
|
{
|
||||||
|
<MudJustifiedText Typo="Typo.body1" Class="mb-3">
|
||||||
|
@T("For better token estimates, you can configure a custom tokenizer for this provider.")
|
||||||
|
</MudJustifiedText>
|
||||||
|
<SelectFile
|
||||||
|
File="@this.dataFilePath"
|
||||||
|
FileChanged="@this.OnDataFilePathChanged"
|
||||||
|
Label="@T("Selected file path for the custom tokenizer")"
|
||||||
|
FileDialogTitle="@T("Choose a custom tokenizer here")"
|
||||||
|
Filter="[FileTypes.JSON]"
|
||||||
|
IsClearable="@true"
|
||||||
|
Error="@(!string.IsNullOrWhiteSpace(this.dataCustomTokenizerValidationIssue))"
|
||||||
|
ErrorText="@(this.dataCustomTokenizerValidationIssue)"
|
||||||
|
Validation="@this.providerValidation.ValidatingCustomTokenizer"
|
||||||
|
OnClear="@this.ClearPathTokenizer" />
|
||||||
|
}
|
||||||
|
|
||||||
<MudStack>
|
<MudStack>
|
||||||
<MudButton OnClick="@this.ToggleExpertSettings">
|
<MudButton OnClick="@this.ToggleExpertSettings">
|
||||||
|
|||||||
@ -8,6 +8,7 @@ using AIStudio.Tools.Services;
|
|||||||
using AIStudio.Tools.Validation;
|
using AIStudio.Tools.Validation;
|
||||||
|
|
||||||
using Microsoft.AspNetCore.Components;
|
using Microsoft.AspNetCore.Components;
|
||||||
|
using Microsoft.AspNetCore.Components.Web;
|
||||||
|
|
||||||
using Host = AIStudio.Provider.SelfHosted.Host;
|
using Host = AIStudio.Provider.SelfHosted.Host;
|
||||||
|
|
||||||
@ -83,6 +84,9 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
|
|||||||
|
|
||||||
[Parameter]
|
[Parameter]
|
||||||
public string AdditionalJsonApiParameters { get; set; } = string.Empty;
|
public string AdditionalJsonApiParameters { get; set; } = string.Empty;
|
||||||
|
|
||||||
|
[Parameter]
|
||||||
|
public string DataTokenizerPath { get; set; } = string.Empty;
|
||||||
|
|
||||||
[Inject]
|
[Inject]
|
||||||
private RustService RustService { get; init; } = null!;
|
private RustService RustService { get; init; } = null!;
|
||||||
@ -104,6 +108,11 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
|
|||||||
private string dataAPIKeyStorageIssue = string.Empty;
|
private string dataAPIKeyStorageIssue = string.Empty;
|
||||||
private string dataEditingPreviousInstanceName = string.Empty;
|
private string dataEditingPreviousInstanceName = string.Empty;
|
||||||
private string dataLoadingModelsIssue = string.Empty;
|
private string dataLoadingModelsIssue = string.Empty;
|
||||||
|
private string dataFilePath = string.Empty;
|
||||||
|
private string dataCustomTokenizerValidationIssue = string.Empty;
|
||||||
|
private Task dataTokenizerValidationTask = Task.CompletedTask;
|
||||||
|
private bool dataStoreWasAttempted;
|
||||||
|
private int dataTokenizerValidationRevision;
|
||||||
private bool showExpertSettings;
|
private bool showExpertSettings;
|
||||||
|
|
||||||
// We get the form reference from Blazor code to validate it manually:
|
// We get the form reference from Blazor code to validate it manually:
|
||||||
@ -123,6 +132,7 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
|
|||||||
GetUsedInstanceNames = () => this.UsedInstanceNames,
|
GetUsedInstanceNames = () => this.UsedInstanceNames,
|
||||||
GetHost = () => this.DataHost,
|
GetHost = () => this.DataHost,
|
||||||
IsModelProvidedManually = () => this.DataLLMProvider.IsLLMModelProvidedManually(),
|
IsModelProvidedManually = () => this.DataLLMProvider.IsLLMModelProvidedManually(),
|
||||||
|
GetCustomTokenizerValidationIssue = () => this.dataCustomTokenizerValidationIssue,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -158,6 +168,7 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
|
|||||||
Host = this.DataHost,
|
Host = this.DataHost,
|
||||||
HFInferenceProvider = this.HFInferenceProviderId,
|
HFInferenceProvider = this.HFInferenceProviderId,
|
||||||
AdditionalJsonApiParameters = this.AdditionalJsonApiParameters,
|
AdditionalJsonApiParameters = this.AdditionalJsonApiParameters,
|
||||||
|
TokenizerPath = this.dataFilePath,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -182,6 +193,7 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
|
|||||||
if(this.IsEditing)
|
if(this.IsEditing)
|
||||||
{
|
{
|
||||||
this.dataEditingPreviousInstanceName = this.DataInstanceName.ToLowerInvariant();
|
this.dataEditingPreviousInstanceName = this.DataInstanceName.ToLowerInvariant();
|
||||||
|
this.dataFilePath = this.DataTokenizerPath;
|
||||||
|
|
||||||
// When using Fireworks or Hugging Face, we must copy the model name:
|
// When using Fireworks or Hugging Face, we must copy the model name:
|
||||||
if (this.DataLLMProvider.IsLLMModelProvidedManually())
|
if (this.DataLLMProvider.IsLLMModelProvidedManually())
|
||||||
@ -237,6 +249,8 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
|
|||||||
|
|
||||||
private async Task Store()
|
private async Task Store()
|
||||||
{
|
{
|
||||||
|
this.dataStoreWasAttempted = true;
|
||||||
|
await this.dataTokenizerValidationTask;
|
||||||
await this.form.Validate();
|
await this.form.Validate();
|
||||||
if (!string.IsNullOrWhiteSpace(this.dataAPIKeyStorageIssue))
|
if (!string.IsNullOrWhiteSpace(this.dataAPIKeyStorageIssue))
|
||||||
this.dataAPIKeyStorageIssue = string.Empty;
|
this.dataAPIKeyStorageIssue = string.Empty;
|
||||||
@ -253,6 +267,15 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
|
|||||||
// When the data is not valid, we don't store it:
|
// When the data is not valid, we don't store it:
|
||||||
if (!this.dataIsValid)
|
if (!this.dataIsValid)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
var tokenizerResponse = await this.RustService.StoreTokenizer(this.DataInstanceName, this.dataEditingPreviousInstanceName, this.dataFilePath);
|
||||||
|
if (!tokenizerResponse.Success)
|
||||||
|
{
|
||||||
|
this.dataCustomTokenizerValidationIssue = tokenizerResponse.Message;
|
||||||
|
await this.form.Validate();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.dataFilePath = tokenizerResponse.Message;
|
||||||
|
|
||||||
// Use the data model to store the provider.
|
// Use the data model to store the provider.
|
||||||
// We just return this data to the parent component:
|
// We just return this data to the parent component:
|
||||||
@ -292,6 +315,58 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Task ClearPathTokenizer(MouseEventArgs _)
|
||||||
|
{
|
||||||
|
return this.OnDataFilePathChanged(string.Empty);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task OnDataFilePathChanged(string filePath)
|
||||||
|
{
|
||||||
|
this.dataFilePath = filePath;
|
||||||
|
var validationRevision = ++this.dataTokenizerValidationRevision;
|
||||||
|
this.dataTokenizerValidationTask = this.ValidateCustomTokenizer(filePath, validationRevision);
|
||||||
|
await this.dataTokenizerValidationTask;
|
||||||
|
|
||||||
|
if (validationRevision != this.dataTokenizerValidationRevision)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (this.dataStoreWasAttempted)
|
||||||
|
await this.form.Validate();
|
||||||
|
else
|
||||||
|
this.form.ResetValidation();
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task ValidateCustomTokenizer(string filePath, int validationRevision)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrWhiteSpace(filePath))
|
||||||
|
{
|
||||||
|
if (validationRevision == this.dataTokenizerValidationRevision)
|
||||||
|
this.dataCustomTokenizerValidationIssue = string.Empty;
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var response = await this.RustService.ValidateTokenizer(filePath);
|
||||||
|
if (validationRevision != this.dataTokenizerValidationRevision)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (response.Success)
|
||||||
|
this.dataCustomTokenizerValidationIssue = string.Empty;
|
||||||
|
else
|
||||||
|
this.dataCustomTokenizerValidationIssue = T("Invalid tokenizer: ") + response.Message;
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
if (validationRevision != this.dataTokenizerValidationRevision)
|
||||||
|
return;
|
||||||
|
|
||||||
|
this.Logger.LogError(e, "Failed to validate custom tokenizer.");
|
||||||
|
this.dataCustomTokenizerValidationIssue = T("Failed to validate the selected tokenizer. Please try again.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void OnHostChanged(Host selectedHost)
|
private void OnHostChanged(Host selectedHost)
|
||||||
{
|
{
|
||||||
// When the host changes, reset the model selection state:
|
// When the host changes, reset the model selection state:
|
||||||
|
|||||||
@ -90,6 +90,9 @@ public abstract class BaseProvider : IProvider, ISecretId
|
|||||||
/// <inheritdoc />
|
/// <inheritdoc />
|
||||||
public string AdditionalJsonApiParameters { get; init; } = string.Empty;
|
public string AdditionalJsonApiParameters { get; init; } = string.Empty;
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
|
public string TokenizerPath { get; init; } = string.Empty;
|
||||||
|
|
||||||
/// <inheritdoc />
|
/// <inheritdoc />
|
||||||
public abstract IAsyncEnumerable<ContentStreamChunk> StreamChatCompletion(Model chatModel, ChatThread chatThread, SettingsManager settingsManager, CancellationToken token = default);
|
public abstract IAsyncEnumerable<ContentStreamChunk> StreamChatCompletion(Model chatModel, ChatThread chatThread, SettingsManager settingsManager, CancellationToken token = default);
|
||||||
|
|
||||||
|
|||||||
@ -28,6 +28,11 @@ public interface IProvider
|
|||||||
/// The additional API parameters.
|
/// The additional API parameters.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public string AdditionalJsonApiParameters { get; }
|
public string AdditionalJsonApiParameters { get; }
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The tokenizer path associated with this provider configuration.
|
||||||
|
/// </summary>
|
||||||
|
public string TokenizerPath { get; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Starts a chat completion stream.
|
/// Starts a chat completion stream.
|
||||||
@ -101,4 +106,4 @@ public interface IProvider
|
|||||||
/// <param name="token">>The cancellation token.</param>
|
/// <param name="token">>The cancellation token.</param>
|
||||||
/// <returns>>The list of transcription models.</returns>
|
/// <returns>>The list of transcription models.</returns>
|
||||||
public Task<IEnumerable<Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default);
|
public Task<IEnumerable<Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -186,7 +186,7 @@ public static class LLMProvidersExtensions
|
|||||||
/// <returns>The provider instance.</returns>
|
/// <returns>The provider instance.</returns>
|
||||||
public static IProvider CreateProvider(this AIStudio.Settings.Provider providerSettings)
|
public static IProvider CreateProvider(this AIStudio.Settings.Provider providerSettings)
|
||||||
{
|
{
|
||||||
return providerSettings.UsedLLMProvider.CreateProvider(providerSettings.InstanceName, providerSettings.Host, providerSettings.Hostname, providerSettings.Model, providerSettings.HFInferenceProvider, providerSettings.AdditionalJsonApiParameters, providerSettings.IsEnterpriseConfiguration);
|
return providerSettings.UsedLLMProvider.CreateProvider(providerSettings.InstanceName, providerSettings.Host, providerSettings.Hostname, providerSettings.Model, providerSettings.HFInferenceProvider, providerSettings.TokenizerPath, providerSettings.AdditionalJsonApiParameters, providerSettings.IsEnterpriseConfiguration);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@ -196,7 +196,7 @@ public static class LLMProvidersExtensions
|
|||||||
/// <returns>The provider instance.</returns>
|
/// <returns>The provider instance.</returns>
|
||||||
public static IProvider CreateProvider(this EmbeddingProvider embeddingProviderSettings)
|
public static IProvider CreateProvider(this EmbeddingProvider embeddingProviderSettings)
|
||||||
{
|
{
|
||||||
return embeddingProviderSettings.UsedLLMProvider.CreateProvider(embeddingProviderSettings.Name, embeddingProviderSettings.Host, embeddingProviderSettings.Hostname, embeddingProviderSettings.Model, HFInferenceProvider.NONE, isEnterpriseConfiguration: embeddingProviderSettings.IsEnterpriseConfiguration);
|
return embeddingProviderSettings.UsedLLMProvider.CreateProvider(embeddingProviderSettings.Name, embeddingProviderSettings.Host, embeddingProviderSettings.Hostname, embeddingProviderSettings.Model, HFInferenceProvider.NONE, embeddingProviderSettings.TokenizerPath, isEnterpriseConfiguration: embeddingProviderSettings.IsEnterpriseConfiguration);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@ -206,33 +206,33 @@ public static class LLMProvidersExtensions
|
|||||||
/// <returns>The provider instance.</returns>
|
/// <returns>The provider instance.</returns>
|
||||||
public static IProvider CreateProvider(this TranscriptionProvider transcriptionProviderSettings)
|
public static IProvider CreateProvider(this TranscriptionProvider transcriptionProviderSettings)
|
||||||
{
|
{
|
||||||
return transcriptionProviderSettings.UsedLLMProvider.CreateProvider(transcriptionProviderSettings.Name, transcriptionProviderSettings.Host, transcriptionProviderSettings.Hostname, transcriptionProviderSettings.Model, HFInferenceProvider.NONE, isEnterpriseConfiguration: transcriptionProviderSettings.IsEnterpriseConfiguration);
|
return transcriptionProviderSettings.UsedLLMProvider.CreateProvider(transcriptionProviderSettings.Name, transcriptionProviderSettings.Host, transcriptionProviderSettings.Hostname, transcriptionProviderSettings.Model, HFInferenceProvider.NONE, string.Empty, isEnterpriseConfiguration: transcriptionProviderSettings.IsEnterpriseConfiguration);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static IProvider CreateProvider(this LLMProviders provider, string instanceName, Host host, string hostname, Model model, HFInferenceProvider inferenceProvider, string expertProviderApiParameter = "", bool isEnterpriseConfiguration = false)
|
private static IProvider CreateProvider(this LLMProviders provider, string instanceName, Host host, string hostname, Model model, HFInferenceProvider inferenceProvider, string tokenizerPath = "", string expertProviderApiParameter = "", bool isEnterpriseConfiguration = false)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
return provider switch
|
return provider switch
|
||||||
{
|
{
|
||||||
LLMProviders.OPEN_AI => new ProviderOpenAI { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
LLMProviders.OPEN_AI => new ProviderOpenAI { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, TokenizerPath = tokenizerPath, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
||||||
LLMProviders.ANTHROPIC => new ProviderAnthropic { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
LLMProviders.ANTHROPIC => new ProviderAnthropic { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, TokenizerPath = tokenizerPath, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
||||||
LLMProviders.MISTRAL => new ProviderMistral { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
LLMProviders.MISTRAL => new ProviderMistral { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, TokenizerPath = tokenizerPath, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
||||||
LLMProviders.GOOGLE => new ProviderGoogle { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
LLMProviders.GOOGLE => new ProviderGoogle { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, TokenizerPath = tokenizerPath, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
||||||
LLMProviders.X => new ProviderX { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
LLMProviders.X => new ProviderX { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, TokenizerPath = tokenizerPath, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
||||||
LLMProviders.DEEP_SEEK => new ProviderDeepSeek { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
LLMProviders.DEEP_SEEK => new ProviderDeepSeek { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, TokenizerPath = tokenizerPath, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
||||||
LLMProviders.ALIBABA_CLOUD => new ProviderAlibabaCloud { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
LLMProviders.ALIBABA_CLOUD => new ProviderAlibabaCloud { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, TokenizerPath = tokenizerPath, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
||||||
LLMProviders.PERPLEXITY => new ProviderPerplexity { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
LLMProviders.PERPLEXITY => new ProviderPerplexity { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, TokenizerPath = tokenizerPath, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
||||||
LLMProviders.OPEN_ROUTER => new ProviderOpenRouter { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
LLMProviders.OPEN_ROUTER => new ProviderOpenRouter { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, TokenizerPath = tokenizerPath, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
||||||
|
|
||||||
LLMProviders.GROQ => new ProviderGroq { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
LLMProviders.GROQ => new ProviderGroq { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, TokenizerPath = tokenizerPath, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
||||||
LLMProviders.FIREWORKS => new ProviderFireworks { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
LLMProviders.FIREWORKS => new ProviderFireworks { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, TokenizerPath = tokenizerPath, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
||||||
LLMProviders.HUGGINGFACE => new ProviderHuggingFace(inferenceProvider, model) { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
LLMProviders.HUGGINGFACE => new ProviderHuggingFace(inferenceProvider, model) { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, TokenizerPath = tokenizerPath, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
||||||
|
|
||||||
LLMProviders.SELF_HOSTED => new ProviderSelfHosted(host, hostname) { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
LLMProviders.SELF_HOSTED => new ProviderSelfHosted(host, hostname) { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, TokenizerPath = tokenizerPath, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
||||||
|
|
||||||
LLMProviders.HELMHOLTZ => new ProviderHelmholtz { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
LLMProviders.HELMHOLTZ => new ProviderHelmholtz { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, TokenizerPath = tokenizerPath, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
||||||
LLMProviders.GWDG => new ProviderGWDG { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
LLMProviders.GWDG => new ProviderGWDG { InstanceName = instanceName, AdditionalJsonApiParameters = expertProviderApiParameter, TokenizerPath = tokenizerPath, IsEnterpriseConfiguration = isEnterpriseConfiguration },
|
||||||
|
|
||||||
_ => new NoProvider(),
|
_ => new NoProvider(),
|
||||||
};
|
};
|
||||||
@ -442,4 +442,4 @@ public static class LLMProvidersExtensions
|
|||||||
LLMProviders.HUGGINGFACE => true,
|
LLMProviders.HUGGINGFACE => true,
|
||||||
_ => false,
|
_ => false,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@ -18,6 +18,9 @@ public class NoProvider : IProvider
|
|||||||
/// <inheritdoc />
|
/// <inheritdoc />
|
||||||
public string AdditionalJsonApiParameters { get; init; } = string.Empty;
|
public string AdditionalJsonApiParameters { get; init; } = string.Empty;
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
|
public string TokenizerPath { get; init; } = string.Empty;
|
||||||
|
|
||||||
public Task<IEnumerable<Model>> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default) => Task.FromResult<IEnumerable<Model>>([]);
|
public Task<IEnumerable<Model>> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default) => Task.FromResult<IEnumerable<Model>>([]);
|
||||||
|
|
||||||
public Task<IEnumerable<Model>> GetImageModels(string? apiKeyProvisional = null, CancellationToken token = default) => Task.FromResult<IEnumerable<Model>>([]);
|
public Task<IEnumerable<Model>> GetImageModels(string? apiKeyProvisional = null, CancellationToken token = default) => Task.FromResult<IEnumerable<Model>>([]);
|
||||||
@ -45,4 +48,4 @@ public class NoProvider : IProvider
|
|||||||
public IReadOnlyCollection<Capability> GetModelCapabilities(Model model) => [ Capability.NONE ];
|
public IReadOnlyCollection<Capability> GetModelCapabilities(Model model) => [ Capability.NONE ];
|
||||||
|
|
||||||
#endregion
|
#endregion
|
||||||
}
|
}
|
||||||
|
|||||||
@ -19,7 +19,8 @@ public sealed record EmbeddingProvider(
|
|||||||
bool IsEnterpriseConfiguration = false,
|
bool IsEnterpriseConfiguration = false,
|
||||||
Guid EnterpriseConfigurationPluginId = default,
|
Guid EnterpriseConfigurationPluginId = default,
|
||||||
string Hostname = "http://localhost:1234",
|
string Hostname = "http://localhost:1234",
|
||||||
Host Host = Host.NONE) : ConfigurationBaseObject, ISecretId
|
Host Host = Host.NONE,
|
||||||
|
string TokenizerPath = "") : ConfigurationBaseObject, ISecretId
|
||||||
{
|
{
|
||||||
private static readonly ILogger<EmbeddingProvider> LOGGER = Program.LOGGER_FACTORY.CreateLogger<EmbeddingProvider>();
|
private static readonly ILogger<EmbeddingProvider> LOGGER = Program.LOGGER_FACTORY.CreateLogger<EmbeddingProvider>();
|
||||||
|
|
||||||
@ -96,6 +97,13 @@ public sealed record EmbeddingProvider(
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var tokenizerPath = string.Empty;
|
||||||
|
if (table.TryGetValue("TokenizerPath", out var tokenizerPathValue) && !tokenizerPathValue.TryRead<string>(out tokenizerPath))
|
||||||
|
{
|
||||||
|
LOGGER.LogWarning($"The configured embedding provider {idx} does not contain a valid tokenizer path. (Plugin ID: {configPluginId})");
|
||||||
|
tokenizerPath = string.Empty;
|
||||||
|
}
|
||||||
|
|
||||||
provider = new EmbeddingProvider
|
provider = new EmbeddingProvider
|
||||||
{
|
{
|
||||||
Num = 0, // will be set later by the PluginConfigurationObject
|
Num = 0, // will be set later by the PluginConfigurationObject
|
||||||
@ -108,6 +116,7 @@ public sealed record EmbeddingProvider(
|
|||||||
EnterpriseConfigurationPluginId = configPluginId,
|
EnterpriseConfigurationPluginId = configPluginId,
|
||||||
Hostname = hostname,
|
Hostname = hostname,
|
||||||
Host = host,
|
Host = host,
|
||||||
|
TokenizerPath = tokenizerPath,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Handle encrypted API key if present:
|
// Handle encrypted API key if present:
|
||||||
|
|||||||
@ -32,7 +32,8 @@ public sealed record Provider(
|
|||||||
string Hostname = "http://localhost:1234",
|
string Hostname = "http://localhost:1234",
|
||||||
Host Host = Host.NONE,
|
Host Host = Host.NONE,
|
||||||
HFInferenceProvider HFInferenceProvider = HFInferenceProvider.NONE,
|
HFInferenceProvider HFInferenceProvider = HFInferenceProvider.NONE,
|
||||||
string AdditionalJsonApiParameters = "") : ConfigurationBaseObject, ISecretId
|
string AdditionalJsonApiParameters = "",
|
||||||
|
string TokenizerPath = "") : ConfigurationBaseObject, ISecretId
|
||||||
{
|
{
|
||||||
private static readonly ILogger<Provider> LOGGER = Program.LOGGER_FACTORY.CreateLogger<Provider>();
|
private static readonly ILogger<Provider> LOGGER = Program.LOGGER_FACTORY.CreateLogger<Provider>();
|
||||||
|
|
||||||
@ -151,6 +152,13 @@ public sealed record Provider(
|
|||||||
additionalJsonApiParameters = string.Empty;
|
additionalJsonApiParameters = string.Empty;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var tokenizerPath = string.Empty;
|
||||||
|
if (table.TryGetValue("TokenizerPath", out var tokenizerPathValue) && !tokenizerPathValue.TryRead<string>(out tokenizerPath))
|
||||||
|
{
|
||||||
|
LOGGER.LogWarning($"The configured provider {idx} does not contain a valid tokenizer path. (Plugin ID: {configPluginId})");
|
||||||
|
tokenizerPath = string.Empty;
|
||||||
|
}
|
||||||
|
|
||||||
provider = new Provider
|
provider = new Provider
|
||||||
{
|
{
|
||||||
Num = 0, // will be set later by the PluginConfigurationObject
|
Num = 0, // will be set later by the PluginConfigurationObject
|
||||||
@ -165,6 +173,7 @@ public sealed record Provider(
|
|||||||
Host = host,
|
Host = host,
|
||||||
HFInferenceProvider = hfInferenceProvider,
|
HFInferenceProvider = hfInferenceProvider,
|
||||||
AdditionalJsonApiParameters = additionalJsonApiParameters,
|
AdditionalJsonApiParameters = additionalJsonApiParameters,
|
||||||
|
TokenizerPath = tokenizerPath,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Handle encrypted API key if present:
|
// Handle encrypted API key if present:
|
||||||
|
|||||||
@ -41,6 +41,7 @@ pptx-to-md = "0.4.0"
|
|||||||
tempfile = "3.27.0"
|
tempfile = "3.27.0"
|
||||||
strum_macros = "0.28.0"
|
strum_macros = "0.28.0"
|
||||||
sysinfo = "0.38.4"
|
sysinfo = "0.38.4"
|
||||||
|
tokenizers = "0.22.2"
|
||||||
|
|
||||||
# Fixes security vulnerability downstream, where the upstream is not fixed yet:
|
# Fixes security vulnerability downstream, where the upstream is not fixed yet:
|
||||||
time = "0.3.47" # -> Rocket
|
time = "0.3.47" # -> Rocket
|
||||||
|
|||||||
@ -1,5 +1,4 @@
|
|||||||
use rocket::yansi::Paint;
|
use std::fs;
|
||||||
use std::fs;
|
|
||||||
use std::path::{PathBuf};
|
use std::path::{PathBuf};
|
||||||
use std::sync::OnceLock;
|
use std::sync::OnceLock;
|
||||||
use rocket::{post};
|
use rocket::{post};
|
||||||
@ -75,23 +74,16 @@ fn validate_tokenizer_at_path(path: &PathBuf) -> Result<usize, TokenizerError> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let tokenizer = Tokenizer::from_file(path).map_err(|e| {
|
let tokenizer = Tokenizer::from_file(path).map_err(|e| {
|
||||||
println!("Failed to load tokenizer from {}: {}", Paint::red(&path.display()), e);
|
|
||||||
TokenizerError::from(format!(
|
TokenizerError::from(format!(
|
||||||
"Failed to load tokenizer from '{}': {}",
|
"Failed to load tokenizer from '{}': {}",
|
||||||
path.display(),
|
path.display(),
|
||||||
e
|
e
|
||||||
))
|
))
|
||||||
})?;
|
})?;
|
||||||
println!("Loaded tokenizer from {}", Paint::green(&path.display()));
|
|
||||||
|
|
||||||
let test_string = "Hello, world! This is a test string for tokenizer validation.";
|
let test_string = "Hello, world! This is a test string for tokenizer validation.";
|
||||||
|
|
||||||
let encoding = tokenizer.encode(test_string, true).map_err(|e| {
|
let encoding = tokenizer.encode(test_string, true).map_err(|e| {
|
||||||
println!(
|
|
||||||
"Tokenizer failed to encode validation string for {}: {}",
|
|
||||||
Paint::red(&path.display()),
|
|
||||||
e
|
|
||||||
);
|
|
||||||
TokenizerError::from(format!(
|
TokenizerError::from(format!(
|
||||||
"Tokenizer failed to encode validation string: {}",
|
"Tokenizer failed to encode validation string: {}",
|
||||||
e
|
e
|
||||||
@ -114,7 +106,7 @@ fn validate_tokenizer_at_path(path: &PathBuf) -> Result<usize, TokenizerError> {
|
|||||||
Ok(token_count)
|
Ok(token_count)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn handle_tokenizer_store(payload: &TokenizerStorage) -> Result<(), std::io::Error> {
|
fn handle_tokenizer_store(payload: &TokenizerStorage) -> Result<String, std::io::Error> {
|
||||||
let data_dir = DATA_DIRECTORY
|
let data_dir = DATA_DIRECTORY
|
||||||
.get()
|
.get()
|
||||||
.ok_or_else(|| std::io::Error::new(std::io::ErrorKind::Other, "DATA_DIRECTORY not initialized"))?;
|
.ok_or_else(|| std::io::Error::new(std::io::ErrorKind::Other, "DATA_DIRECTORY not initialized"))?;
|
||||||
@ -124,11 +116,11 @@ fn handle_tokenizer_store(payload: &TokenizerStorage) -> Result<(), std::io::Err
|
|||||||
// Delete previous model if file_path is empty
|
// Delete previous model if file_path is empty
|
||||||
if payload.file_path.trim().is_empty() {
|
if payload.file_path.trim().is_empty() {
|
||||||
if payload.previous_model_id.trim().is_empty() {
|
if payload.previous_model_id.trim().is_empty() {
|
||||||
return Ok(()); // Nothing to delete
|
return Ok(String::from("")); // Nothing to delete
|
||||||
}
|
}
|
||||||
let previous_path = base_path.join(&payload.previous_model_id);
|
let previous_path = base_path.join(&payload.previous_model_id);
|
||||||
fs::remove_dir_all(previous_path)?;
|
fs::remove_dir_all(previous_path)?;
|
||||||
return Ok(());
|
return Ok(String::from(""));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy file
|
// Copy file
|
||||||
@ -136,22 +128,28 @@ fn handle_tokenizer_store(payload: &TokenizerStorage) -> Result<(), std::io::Err
|
|||||||
let source_name = source_path.file_name()
|
let source_name = source_path.file_name()
|
||||||
.and_then(|n| n.to_str())
|
.and_then(|n| n.to_str())
|
||||||
.ok_or_else(|| std::io::Error::new(std::io::ErrorKind::InvalidInput, "Invalid tokenizer file path"))?;
|
.ok_or_else(|| std::io::Error::new(std::io::ErrorKind::InvalidInput, "Invalid tokenizer file path"))?;
|
||||||
fs::create_dir_all(&base_path.join(&payload.model_id))?;
|
let model_path = &base_path.join(&payload.model_id);
|
||||||
let destination_path = base_path.join(&payload.model_id).join(source_name);
|
let destination_path = &model_path.join(source_name);
|
||||||
println!("Moving tokenizer file from {} to {}", source_path.display(), destination_path.display());
|
println!("source_path: {}, destination_path: {}", source_path.display(), destination_path.display());
|
||||||
|
println!("equals {}", source_path.eq(destination_path));
|
||||||
|
|
||||||
|
if !source_path.eq(destination_path) && model_path.exists() {
|
||||||
|
fs::remove_dir_all(model_path)?;
|
||||||
|
}
|
||||||
|
fs::create_dir_all(model_path)?;
|
||||||
|
println!("Moving tokenizer file from {} to {}", source_path.display(), destination_path.display());
|
||||||
let previous_path = base_path.join(&payload.previous_model_id);
|
let previous_path = base_path.join(&payload.previous_model_id);
|
||||||
|
|
||||||
// Delete previous tokenizer folder if specified
|
// Delete previous tokenizer folder if specified
|
||||||
if !payload.previous_model_id.trim().is_empty() && source_path.starts_with(&previous_path){
|
if !payload.previous_model_id.trim().is_empty() && source_path.starts_with(&previous_path){
|
||||||
fs::rename(&source_path, &destination_path)?;
|
fs::rename(&source_path, &destination_path)?;
|
||||||
if previous_path.exists() {
|
if previous_path.exists() && !previous_path.eq(model_path) {
|
||||||
fs::remove_dir_all(previous_path)?;
|
fs::remove_dir_all(previous_path)?;
|
||||||
}
|
}
|
||||||
}else{
|
}else{
|
||||||
fs::copy( & source_path, & destination_path)?;
|
fs::copy( & source_path, & destination_path)?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(destination_path.to_str().unwrap().to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_token_count(text: &str) -> Result<usize, TokenizerError> {
|
pub fn get_token_count(text: &str) -> Result<usize, TokenizerError> {
|
||||||
@ -179,10 +177,10 @@ pub fn validate_tokenizer(_token: APIToken, payload: Json<TokenizerValidation>)
|
|||||||
pub fn store_tokenizer(_token: APIToken, payload: Json<TokenizerStorage>) -> Json<TokenizerResponse>{
|
pub fn store_tokenizer(_token: APIToken, payload: Json<TokenizerStorage>) -> Json<TokenizerResponse>{
|
||||||
println!("Received tokenizer store request: {}, {}, {}", payload.model_id, payload.previous_model_id, payload.file_path);
|
println!("Received tokenizer store request: {}, {}, {}", payload.model_id, payload.previous_model_id, payload.file_path);
|
||||||
match handle_tokenizer_store(&payload) {
|
match handle_tokenizer_store(&payload) {
|
||||||
Ok(()) => Json(TokenizerResponse {
|
Ok(dest_path) => Json(TokenizerResponse {
|
||||||
success: true,
|
success: true,
|
||||||
token_count: 0,
|
token_count: 0,
|
||||||
message: "Success".to_string(),
|
message: dest_path,
|
||||||
}),
|
}),
|
||||||
Err(e) => Json(TokenizerResponse {
|
Err(e) => Json(TokenizerResponse {
|
||||||
success: false,
|
success: false,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user