mirror of
https://github.com/MindWorkAI/AI-Studio.git
synced 2026-05-13 21:44:21 +00:00
added possibility to add tokenizer via plugin and refactored earlier changes
This commit is contained in:
parent
95fb213069
commit
aa554d4a53
@ -2584,6 +2584,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELEMBEDDINGS::T32678
|
||||
-- Close
|
||||
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELEMBEDDINGS::T3448155331"] = "Close"
|
||||
|
||||
-- Couldn't delete the embedding provider '{0}'. The issue: {1}. We can ignore this issue and delete the embedding provider anyway. Do you want to ignore it and delete this embedding provider?
|
||||
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELEMBEDDINGS::T3703173892"] = "Couldn't delete the embedding provider '{0}'. The issue: {1}. We can ignore this issue and delete the embedding provider anyway. Do you want to ignore it and delete this embedding provider?"
|
||||
|
||||
-- Actions
|
||||
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELEMBEDDINGS::T3865031940"] = "Actions"
|
||||
|
||||
|
||||
@ -2,6 +2,8 @@ using System.Globalization;
|
||||
using AIStudio.Dialogs;
|
||||
using AIStudio.Provider;
|
||||
using AIStudio.Settings;
|
||||
using AIStudio.Tools.Services;
|
||||
using AIStudio.Tools.Rust;
|
||||
|
||||
using Microsoft.AspNetCore.Components;
|
||||
|
||||
@ -108,16 +110,44 @@ public partial class SettingsPanelEmbeddings : SettingsPanelProviderBase
|
||||
return;
|
||||
|
||||
var deleteSecretResponse = await this.RustService.DeleteAPIKey(provider, SecretStoreType.EMBEDDING_PROVIDER);
|
||||
if(deleteSecretResponse.Success)
|
||||
var deleteTokenizerResponse = await this.RustService.DeleteTokenizer(TokenizerModelId.ForEmbeddingProvider(provider));
|
||||
if(deleteSecretResponse.Success && deleteTokenizerResponse.Success)
|
||||
{
|
||||
this.SettingsManager.ConfigurationData.EmbeddingProviders.Remove(provider);
|
||||
await this.SettingsManager.StoreSettings();
|
||||
}
|
||||
else
|
||||
{
|
||||
var issueDialogParameters = new DialogParameters<ConfirmDialog>
|
||||
{
|
||||
{ x => x.Message, string.Format(T("Couldn't delete the embedding provider '{0}'. The issue: {1}. We can ignore this issue and delete the embedding provider anyway. Do you want to ignore it and delete this embedding provider?"), provider.Name, BuildDeleteIssue(deleteSecretResponse, deleteTokenizerResponse)) },
|
||||
};
|
||||
|
||||
var issueDialogReference = await this.DialogService.ShowAsync<ConfirmDialog>(T("Delete Embedding Provider"), issueDialogParameters, DialogOptions.FULLSCREEN);
|
||||
var issueDialogResult = await issueDialogReference.Result;
|
||||
if (issueDialogResult is null || issueDialogResult.Canceled)
|
||||
return;
|
||||
|
||||
this.SettingsManager.ConfigurationData.EmbeddingProviders.Remove(provider);
|
||||
await this.SettingsManager.StoreSettings();
|
||||
}
|
||||
|
||||
await this.UpdateEmbeddingProviders();
|
||||
await this.MessageBus.SendMessage<bool>(this, Event.CONFIGURATION_CHANGED);
|
||||
}
|
||||
|
||||
private static string BuildDeleteIssue(DeleteSecretResponse deleteSecretResponse, TokenizerResponse deleteTokenizerResponse)
|
||||
{
|
||||
var issues = new List<string>();
|
||||
if (!deleteSecretResponse.Success)
|
||||
issues.Add(deleteSecretResponse.Issue);
|
||||
|
||||
if (!deleteTokenizerResponse.Success)
|
||||
issues.Add(deleteTokenizerResponse.Message);
|
||||
|
||||
return string.Join(" | ", issues);
|
||||
}
|
||||
|
||||
private async Task ExportEmbeddingProvider(EmbeddingProvider provider)
|
||||
{
|
||||
if (!this.SettingsManager.ConfigurationData.App.ShowAdminSettings)
|
||||
|
||||
@ -3,6 +3,8 @@ using System.Diagnostics.CodeAnalysis;
|
||||
using AIStudio.Dialogs;
|
||||
using AIStudio.Provider;
|
||||
using AIStudio.Settings;
|
||||
using AIStudio.Tools.Rust;
|
||||
using AIStudio.Tools.Services;
|
||||
|
||||
using Microsoft.AspNetCore.Components;
|
||||
|
||||
@ -109,7 +111,8 @@ public partial class SettingsPanelProviders : SettingsPanelProviderBase
|
||||
return;
|
||||
|
||||
var deleteSecretResponse = await this.RustService.DeleteAPIKey(provider, SecretStoreType.LLM_PROVIDER);
|
||||
if(deleteSecretResponse.Success)
|
||||
var deleteTokenizerResponse = await this.RustService.DeleteTokenizer(TokenizerModelId.ForProvider(provider));
|
||||
if(deleteSecretResponse.Success && deleteTokenizerResponse.Success)
|
||||
{
|
||||
this.SettingsManager.ConfigurationData.Providers.Remove(provider);
|
||||
await this.SettingsManager.StoreSettings();
|
||||
@ -118,7 +121,7 @@ public partial class SettingsPanelProviders : SettingsPanelProviderBase
|
||||
{
|
||||
var issueDialogParameters = new DialogParameters<ConfirmDialog>
|
||||
{
|
||||
{ x => x.Message, string.Format(T("Couldn't delete the provider '{0}'. The issue: {1}. We can ignore this issue and delete the provider anyway. Do you want to ignore it and delete this provider?"), provider.InstanceName, deleteSecretResponse.Issue) },
|
||||
{ x => x.Message, string.Format(T("Couldn't delete the provider '{0}'. The issue: {1}. We can ignore this issue and delete the provider anyway. Do you want to ignore it and delete this provider?"), provider.InstanceName, BuildDeleteIssue(deleteSecretResponse, deleteTokenizerResponse)) },
|
||||
};
|
||||
|
||||
var issueDialogReference = await this.DialogService.ShowAsync<ConfirmDialog>(T("Delete LLM Provider"), issueDialogParameters, DialogOptions.FULLSCREEN);
|
||||
@ -135,6 +138,18 @@ public partial class SettingsPanelProviders : SettingsPanelProviderBase
|
||||
await this.MessageBus.SendMessage<bool>(this, Event.CONFIGURATION_CHANGED);
|
||||
}
|
||||
|
||||
private static string BuildDeleteIssue(DeleteSecretResponse deleteSecretResponse, TokenizerResponse deleteTokenizerResponse)
|
||||
{
|
||||
var issues = new List<string>();
|
||||
if (!deleteSecretResponse.Success)
|
||||
issues.Add(deleteSecretResponse.Issue);
|
||||
|
||||
if (!deleteTokenizerResponse.Success)
|
||||
issues.Add(deleteTokenizerResponse.Message);
|
||||
|
||||
return string.Join(" | ", issues);
|
||||
}
|
||||
|
||||
private async Task ExportLLMProvider(AIStudio.Settings.Provider provider)
|
||||
{
|
||||
if (!this.SettingsManager.ConfigurationData.App.ShowAdminSettings)
|
||||
|
||||
@ -163,13 +163,11 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
|
||||
// Load the used instance names:
|
||||
this.UsedInstanceNames = this.SettingsManager.ConfigurationData.EmbeddingProviders.Select(x => x.Name.ToLowerInvariant()).ToList();
|
||||
|
||||
Console.WriteLine($"Previous instance names: {this.dataEditingPreviousInstanceName}");
|
||||
// When editing, we need to load the data:
|
||||
if(this.IsEditing)
|
||||
{
|
||||
this.dataEditingPreviousInstanceName = this.DataName.ToLowerInvariant();
|
||||
this.dataFilePath = this.DataTokenizerPath;
|
||||
Console.WriteLine($"Previous instance name is '{this.dataEditingPreviousInstanceName}'");
|
||||
|
||||
// When using self-hosted embedding, we must copy the model name:
|
||||
if (this.DataLLMProvider is LLMProviders.SELF_HOSTED)
|
||||
@ -243,8 +241,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
|
||||
if (!this.dataIsValid)
|
||||
return;
|
||||
|
||||
var response = await this.RustService.StoreTokenizer("embedding_"+this.DataName, "embedding_"+this.dataEditingPreviousInstanceName, this.dataFilePath);
|
||||
Console.WriteLine($"Response from Rust: {response.Message}");
|
||||
var response = await this.RustService.StoreTokenizer(TokenizerModelId.ForEmbeddingProviderId(this.DataId), this.dataFilePath);
|
||||
if (!response.Success)
|
||||
{
|
||||
this.dataCustomTokenizerValidationIssue = response.Message;
|
||||
|
||||
@ -268,7 +268,7 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
|
||||
if (!this.dataIsValid)
|
||||
return;
|
||||
|
||||
var tokenizerResponse = await this.RustService.StoreTokenizer("chat_"+this.DataInstanceName, "chat_"+this.dataEditingPreviousInstanceName, this.dataFilePath);
|
||||
var tokenizerResponse = await this.RustService.StoreTokenizer(TokenizerModelId.ForProviderId(this.DataId), this.dataFilePath);
|
||||
if (!tokenizerResponse.Success)
|
||||
{
|
||||
this.dataCustomTokenizerValidationIssue = tokenizerResponse.Message;
|
||||
|
||||
@ -73,6 +73,9 @@ CONFIG["LLM_PROVIDERS"] = {}
|
||||
-- -- Please do not add the enclosing curly braces {} here. Also, no trailing comma is allowed.
|
||||
-- ["AdditionalJsonApiParameters"] = "",
|
||||
--
|
||||
-- -- Optional: tokenizer path for this provider relative to the plugin directory.
|
||||
-- -- ["TokenizerPath"] = "",
|
||||
--
|
||||
-- -- Optional: Hugging Face inference provider. Only relevant for UsedLLMProvider = HUGGINGFACE.
|
||||
-- -- Allowed values are: CEREBRAS, NEBIUS_AI_STUDIO, SAMBANOVA, NOVITA, HYPERBOLIC, TOGETHER_AI, FIREWORKS, HF_INFERENCE_API
|
||||
-- -- ["HFInferenceProvider"] = "NOVITA",
|
||||
@ -129,6 +132,9 @@ CONFIG["EMBEDDING_PROVIDERS"] = {}
|
||||
--
|
||||
-- -- Optional: Encrypted API key (see LLM_PROVIDERS example for details)
|
||||
-- -- ["APIKey"] = "ENC:v1:<base64-encoded encrypted data>",
|
||||
|
||||
-- -- Optional: tokenizer path for this provider relative to the plugin directory.
|
||||
-- -- ["TokenizerPath"] = "",
|
||||
--
|
||||
-- ["Model"] = {
|
||||
-- ["Id"] = "<the model ID, e.g., nomic-embed-text>",
|
||||
|
||||
@ -189,6 +189,8 @@ public sealed record EmbeddingProvider(
|
||||
["Id"] = "{{Guid.NewGuid().ToString()}}",
|
||||
["Name"] = "{{LuaTools.EscapeLuaString(this.Name)}}",
|
||||
["UsedLLMProvider"] = "{{this.UsedLLMProvider}}",
|
||||
|
||||
["TokenizerPath"] = "{{this.TokenizerPath}}",
|
||||
|
||||
["Host"] = "{{this.Host}}",
|
||||
["Hostname"] = "{{LuaTools.EscapeLuaString(this.Hostname)}}",
|
||||
|
||||
@ -254,6 +254,8 @@ public sealed record Provider(
|
||||
["Id"] = "{{Guid.NewGuid().ToString()}}",
|
||||
["InstanceName"] = "{{LuaTools.EscapeLuaString(this.InstanceName)}}",
|
||||
["UsedLLMProvider"] = "{{this.UsedLLMProvider}}",
|
||||
|
||||
["TokenizerPath"] = "{{this.TokenizerPath}}",
|
||||
|
||||
["Host"] = "{{this.Host}}",
|
||||
["Hostname"] = "{{LuaTools.EscapeLuaString(this.Hostname)}}",
|
||||
|
||||
@ -30,6 +30,8 @@ public sealed class PluginConfiguration(bool isInternal, LuaState state, PluginT
|
||||
|
||||
if (!dryRun)
|
||||
{
|
||||
await PluginConfigurationObject.SyncManagedTokenizersAsync(this.Id, this.PluginPath);
|
||||
|
||||
// Store any decrypted API keys from enterprise configuration in the OS keyring:
|
||||
await StoreEnterpriseApiKeysAsync();
|
||||
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Linq.Expressions;
|
||||
|
||||
using AIStudio.Settings;
|
||||
@ -162,6 +163,42 @@ public sealed record PluginConfigurationObject
|
||||
return true;
|
||||
}
|
||||
|
||||
[SuppressMessage("Usage", "MWAIS0001:Direct access to `Providers` is not allowed", Justification = "Tokenizer synchronization needs indexed access to update enterprise-managed providers in place.")]
|
||||
public static async Task<bool> SyncManagedTokenizersAsync(Guid configPluginId, string pluginPath)
|
||||
{
|
||||
var wasConfigurationChanged = false;
|
||||
|
||||
for (var i = 0; i < SETTINGS_MANAGER.ConfigurationData.Providers.Count; i++)
|
||||
{
|
||||
var provider = SETTINGS_MANAGER.ConfigurationData.Providers[i];
|
||||
if (!provider.IsEnterpriseConfiguration || provider.EnterpriseConfigurationPluginId != configPluginId)
|
||||
continue;
|
||||
|
||||
var syncedProvider = await SyncProviderTokenizerAsync(provider, pluginPath);
|
||||
if (syncedProvider == provider)
|
||||
continue;
|
||||
|
||||
SETTINGS_MANAGER.ConfigurationData.Providers[i] = syncedProvider;
|
||||
wasConfigurationChanged = true;
|
||||
}
|
||||
|
||||
for (var i = 0; i < SETTINGS_MANAGER.ConfigurationData.EmbeddingProviders.Count; i++)
|
||||
{
|
||||
var provider = SETTINGS_MANAGER.ConfigurationData.EmbeddingProviders[i];
|
||||
if (!provider.IsEnterpriseConfiguration || provider.EnterpriseConfigurationPluginId != configPluginId)
|
||||
continue;
|
||||
|
||||
var syncedProvider = await SyncEmbeddingTokenizerAsync(provider, pluginPath);
|
||||
if (syncedProvider == provider)
|
||||
continue;
|
||||
|
||||
SETTINGS_MANAGER.ConfigurationData.EmbeddingProviders[i] = syncedProvider;
|
||||
wasConfigurationChanged = true;
|
||||
}
|
||||
|
||||
return wasConfigurationChanged;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Cleans up configuration objects of a specified type that are no longer associated with any available plugin.
|
||||
/// </summary>
|
||||
@ -217,6 +254,19 @@ public sealed record PluginConfigurationObject
|
||||
var wasConfigurationChanged = leftOverObjects.Count > 0;
|
||||
foreach (var item in leftOverObjects.Distinct())
|
||||
{
|
||||
if (item is Settings.Provider provider)
|
||||
{
|
||||
var deleteTokenizerResult = await RUST_SERVICE.DeleteTokenizer(TokenizerModelId.ForProvider(provider));
|
||||
if (!deleteTokenizerResult.Success)
|
||||
LOG.LogWarning("Failed to delete tokenizer for removed enterprise provider '{ProviderName}': {Issue}", provider.InstanceName, deleteTokenizerResult.Message);
|
||||
}
|
||||
else if (item is EmbeddingProvider embeddingProvider)
|
||||
{
|
||||
var deleteTokenizerResult = await RUST_SERVICE.DeleteTokenizer(TokenizerModelId.ForEmbeddingProvider(embeddingProvider));
|
||||
if (!deleteTokenizerResult.Success)
|
||||
LOG.LogWarning("Failed to delete tokenizer for removed enterprise embedding provider '{ProviderName}': {Issue}", embeddingProvider.Name, deleteTokenizerResult.Message);
|
||||
}
|
||||
|
||||
configuredObjects.Remove(item);
|
||||
|
||||
// Delete the API key from the OS keyring if the removed object has one:
|
||||
@ -232,4 +282,89 @@ public sealed record PluginConfigurationObject
|
||||
|
||||
return wasConfigurationChanged;
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task<Settings.Provider> SyncProviderTokenizerAsync(Settings.Provider provider, string pluginPath)
|
||||
{
|
||||
var syncedTokenizerPath = await SyncTokenizerAsync(
|
||||
provider.TokenizerPath,
|
||||
pluginPath,
|
||||
TokenizerModelId.ForProvider(provider),
|
||||
$"provider '{provider.InstanceName}'");
|
||||
|
||||
return provider with { TokenizerPath = syncedTokenizerPath };
|
||||
}
|
||||
|
||||
private static async Task<EmbeddingProvider> SyncEmbeddingTokenizerAsync(EmbeddingProvider provider, string pluginPath)
|
||||
{
|
||||
var syncedTokenizerPath = await SyncTokenizerAsync(
|
||||
provider.TokenizerPath,
|
||||
pluginPath,
|
||||
TokenizerModelId.ForEmbeddingProvider(provider),
|
||||
$"embedding provider '{provider.Name}'");
|
||||
|
||||
return provider with { TokenizerPath = syncedTokenizerPath };
|
||||
}
|
||||
|
||||
private static async Task<string> SyncTokenizerAsync(string configuredTokenizerPath, string pluginPath, string modelId, string logName)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(configuredTokenizerPath))
|
||||
{
|
||||
var deleteResult = await RUST_SERVICE.DeleteTokenizer(modelId);
|
||||
if (!deleteResult.Success)
|
||||
LOG.LogWarning("Failed to delete tokenizer for {LogName}: {Issue}", logName, deleteResult.Message);
|
||||
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var resolvedPath = ResolvePluginTokenizerPath(configuredTokenizerPath, pluginPath);
|
||||
if (resolvedPath is null)
|
||||
{
|
||||
var deleteResult = await RUST_SERVICE.DeleteTokenizer(modelId);
|
||||
if (!deleteResult.Success)
|
||||
LOG.LogWarning("Failed to delete tokenizer after invalid path for {LogName}: {Issue}", logName, deleteResult.Message);
|
||||
|
||||
LOG.LogWarning("The configured tokenizer path '{TokenizerPath}' for {LogName} is invalid. The tokenizer path must stay within the plugin directory '{PluginPath}'.", configuredTokenizerPath, logName, pluginPath);
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var validateResult = await RUST_SERVICE.ValidateTokenizer(resolvedPath);
|
||||
if (!validateResult.Success)
|
||||
{
|
||||
var deleteResult = await RUST_SERVICE.DeleteTokenizer(modelId);
|
||||
if (!deleteResult.Success)
|
||||
LOG.LogWarning("Failed to delete tokenizer after validation failure for {LogName}: {Issue}", logName, deleteResult.Message);
|
||||
|
||||
LOG.LogWarning("The configured tokenizer for {LogName} is invalid. Path='{TokenizerPath}', issue='{Issue}'", logName, resolvedPath, validateResult.Message);
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var storeResult = await RUST_SERVICE.StoreTokenizer(modelId, resolvedPath);
|
||||
if (!storeResult.Success)
|
||||
{
|
||||
LOG.LogWarning("Failed to store tokenizer for {LogName}. Path='{TokenizerPath}', issue='{Issue}'", logName, resolvedPath, storeResult.Message);
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
return storeResult.Message;
|
||||
}
|
||||
|
||||
private static string? ResolvePluginTokenizerPath(string configuredTokenizerPath, string pluginPath)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(pluginPath))
|
||||
return null;
|
||||
|
||||
var fullPluginPath = Path.GetFullPath(pluginPath);
|
||||
var candidatePath = Path.GetFullPath(Path.Combine(fullPluginPath, configuredTokenizerPath));
|
||||
|
||||
if (candidatePath.Equals(fullPluginPath, StringComparison.OrdinalIgnoreCase))
|
||||
return null;
|
||||
|
||||
var pluginPrefix = fullPluginPath.EndsWith(Path.DirectorySeparatorChar)
|
||||
? fullPluginPath
|
||||
: fullPluginPath + Path.DirectorySeparatorChar;
|
||||
|
||||
return candidatePath.StartsWith(pluginPrefix, StringComparison.OrdinalIgnoreCase)
|
||||
? candidatePath
|
||||
: null;
|
||||
}
|
||||
}
|
||||
|
||||
@ -29,12 +29,11 @@ public sealed partial class RustService
|
||||
return await result.Content.ReadFromJsonAsync<TokenizerResponse>(this.jsonRustSerializerOptions);
|
||||
}
|
||||
|
||||
public async Task<TokenizerResponse> StoreTokenizer(string modelId, string previousmodelId, string filePath)
|
||||
public async Task<TokenizerResponse> StoreTokenizer(string modelId, string filePath)
|
||||
{
|
||||
this.logger!.LogInformation($"Storing tokenizer for model '{modelId}' with previous model '{previousmodelId}' from file '{filePath}'");
|
||||
this.logger!.LogInformation($"Storing tokenizer for model '{modelId}' from file '{filePath}'");
|
||||
var result = await this.http.PostAsJsonAsync("/tokenizer/store", new {
|
||||
model_id = modelId,
|
||||
previous_model_id = previousmodelId,
|
||||
file_path = filePath,
|
||||
}, this.jsonRustSerializerOptions);
|
||||
|
||||
@ -50,6 +49,26 @@ public sealed partial class RustService
|
||||
|
||||
return await result.Content.ReadFromJsonAsync<TokenizerResponse>(this.jsonRustSerializerOptions);
|
||||
}
|
||||
|
||||
public async Task<TokenizerResponse> DeleteTokenizer(string modelId)
|
||||
{
|
||||
this.logger!.LogInformation($"Deleting tokenizer for model '{modelId}'");
|
||||
var result = await this.http.PostAsJsonAsync("/tokenizer/delete", new {
|
||||
model_id = modelId,
|
||||
}, this.jsonRustSerializerOptions);
|
||||
|
||||
if (!result.IsSuccessStatusCode)
|
||||
{
|
||||
this.logger!.LogError($"Failed to delete the tokenizer '{result.StatusCode}'");
|
||||
return new TokenizerResponse{
|
||||
Success = false,
|
||||
Message = "An error occured while sending the tokenizer delete request to the Rust framework: "+result.StatusCode,
|
||||
TokenCount = 0
|
||||
};
|
||||
}
|
||||
|
||||
return await result.Content.ReadFromJsonAsync<TokenizerResponse>(this.jsonRustSerializerOptions);
|
||||
}
|
||||
|
||||
public async Task<TokenizerResponse?> GetTokenCount(string text)
|
||||
{
|
||||
|
||||
20
app/MindWork AI Studio/Tools/Services/TokenizerModelId.cs
Normal file
20
app/MindWork AI Studio/Tools/Services/TokenizerModelId.cs
Normal file
@ -0,0 +1,20 @@
|
||||
namespace AIStudio.Tools.Services;
|
||||
|
||||
public static class TokenizerModelId
|
||||
{
|
||||
public static string ForProvider(Settings.Provider provider) => ForProviderId(provider.Id);
|
||||
|
||||
public static string ForProviderId(string guid) => "chat_" + NormalizeGuid(guid);
|
||||
|
||||
public static string ForEmbeddingProvider(Settings.EmbeddingProvider provider) => ForEmbeddingProviderId(provider.Id);
|
||||
|
||||
public static string ForEmbeddingProviderId(string guid) => "embedding_" + NormalizeGuid(guid);
|
||||
|
||||
private static string NormalizeGuid(string guid)
|
||||
{
|
||||
if (Guid.TryParse(guid, out var parsedGuid))
|
||||
return parsedGuid.ToString("D");
|
||||
|
||||
return guid.Trim();
|
||||
}
|
||||
}
|
||||
@ -11,7 +11,6 @@ use mindwork_ai_studio::environment::is_dev;
|
||||
use mindwork_ai_studio::log::init_logging;
|
||||
use mindwork_ai_studio::metadata::MetaData;
|
||||
use mindwork_ai_studio::runtime_api::start_runtime_api;
|
||||
use mindwork_ai_studio::tokenizer::{init_tokenizer};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
|
||||
@ -92,6 +92,7 @@ pub fn start_runtime_api() {
|
||||
crate::tokenizer::token_count,
|
||||
crate::tokenizer::validate_tokenizer,
|
||||
crate::tokenizer::store_tokenizer,
|
||||
crate::tokenizer::delete_tokenizer,
|
||||
crate::tokenizer::set_tokenizer,
|
||||
crate::app_window::register_shortcut,
|
||||
crate::app_window::validate_shortcut,
|
||||
|
||||
@ -9,6 +9,7 @@ use serde::Deserialize;
|
||||
use tauri::PathResolver;
|
||||
use tokenizers::Error;
|
||||
use tokenizers::tokenizer::{Tokenizer, Error as TokenizerError};
|
||||
use tokio::fs::try_exists;
|
||||
use crate::api_token::APIToken;
|
||||
use crate::environment::DATA_DIRECTORY;
|
||||
|
||||
@ -24,10 +25,14 @@ pub struct SetTokenText {
|
||||
#[derive(Clone, Deserialize)]
|
||||
pub struct TokenizerStorage {
|
||||
model_id: String,
|
||||
previous_model_id: String,
|
||||
file_path: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize)]
|
||||
pub struct TokenizerDelete {
|
||||
model_id: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize)]
|
||||
pub struct TokenizerPath {
|
||||
file_path: String,
|
||||
@ -68,7 +73,7 @@ fn tokenizer_state() -> &'static RwLock<Option<Tokenizer>> {
|
||||
TOKENIZER.get_or_init(|| RwLock::new(None))
|
||||
}
|
||||
|
||||
pub fn init_tokenizer(path: &str) -> Result<(), Error> {
|
||||
pub fn handle_tokenizer_set(path: &str) -> Result<(), Error> {
|
||||
let tokenizer_path = if path.trim().is_empty() {
|
||||
let relative_source_path = String::from("resources/tokenizers/tokenizer.json");
|
||||
let path_resolver = TOKENIZER_PATH_RESOLVER
|
||||
@ -90,7 +95,7 @@ pub fn init_tokenizer(path: &str) -> Result<(), Error> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_tokenizer_at_path(path: &PathBuf) -> Result<usize, TokenizerError> {
|
||||
fn handle_tokenizer_validate(path: &PathBuf) -> Result<usize, TokenizerError> {
|
||||
if !path.is_file() {
|
||||
return Err(TokenizerError::from(format!(
|
||||
"Tokenizer file was not found: {}",
|
||||
@ -138,16 +143,6 @@ fn handle_tokenizer_store(payload: &TokenizerStorage) -> Result<String, std::io:
|
||||
|
||||
let base_path = PathBuf::from(data_dir).join("tokenizers");
|
||||
|
||||
if payload.file_path.trim().is_empty() {
|
||||
if payload.previous_model_id.trim().is_empty() {
|
||||
return Ok(String::from(""));
|
||||
}
|
||||
|
||||
let previous_path = base_path.join(&payload.previous_model_id);
|
||||
fs::remove_dir_all(previous_path)?;
|
||||
return Ok(String::from(""));
|
||||
}
|
||||
|
||||
let source_path = PathBuf::from(&payload.file_path);
|
||||
let source_name = source_path
|
||||
.file_name()
|
||||
@ -155,23 +150,46 @@ fn handle_tokenizer_store(payload: &TokenizerStorage) -> Result<String, std::io:
|
||||
.ok_or_else(|| std::io::Error::new(std::io::ErrorKind::InvalidInput, "Invalid tokenizer file path"))?;
|
||||
let model_path = &base_path.join(&payload.model_id);
|
||||
let destination_path = &model_path.join(source_name);
|
||||
if !source_path.eq(destination_path) && model_path.exists() {
|
||||
fs::remove_dir_all(model_path)?;
|
||||
|
||||
if source_path.eq(destination_path) {
|
||||
return Ok(destination_path.to_str().unwrap().to_string());
|
||||
}
|
||||
|
||||
match model_path.try_exists()? {
|
||||
true => fs::remove_dir_all(model_path.clone())?,
|
||||
false => (),
|
||||
}
|
||||
|
||||
if payload.file_path.trim().is_empty() {
|
||||
return Ok(String::from(""));
|
||||
}
|
||||
fs::create_dir_all(model_path)?;
|
||||
let previous_path = base_path.join(&payload.previous_model_id);
|
||||
|
||||
if !payload.previous_model_id.trim().is_empty() && source_path.starts_with(&previous_path) {
|
||||
fs::rename(&source_path, &destination_path)?;
|
||||
if previous_path.exists() && !previous_path.eq(model_path) {
|
||||
fs::remove_dir_all(previous_path)?;
|
||||
}
|
||||
} else {
|
||||
fs::copy(&source_path, &destination_path)?;
|
||||
}
|
||||
fs::copy(&source_path, &destination_path)?;
|
||||
|
||||
Ok(destination_path.to_str().unwrap().to_string())
|
||||
}
|
||||
|
||||
fn handle_tokenizer_delete(payload: &TokenizerDelete) -> Result<(), std::io::Error> {
|
||||
if payload.model_id.trim().is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let data_dir = DATA_DIRECTORY
|
||||
.get()
|
||||
.ok_or_else(|| std::io::Error::new(std::io::ErrorKind::Other, "DATA_DIRECTORY not initialized"))?;
|
||||
|
||||
let tokenizer_path = PathBuf::from(data_dir)
|
||||
.join("tokenizers")
|
||||
.join(&payload.model_id);
|
||||
|
||||
if tokenizer_path.exists() {
|
||||
fs::remove_dir_all(tokenizer_path)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_token_count(text: &str) -> Result<usize, TokenizerError> {
|
||||
if text.trim().is_empty() {
|
||||
return Err(TokenizerError::from("Input text is empty"));
|
||||
@ -193,7 +211,7 @@ pub fn token_count(_token: APIToken, req: Json<SetTokenText>) -> Json<TokenizerR
|
||||
|
||||
#[post("/tokenizer/validate", data = "<payload>")]
|
||||
pub fn validate_tokenizer(_token: APIToken, payload: Json<TokenizerPath>) -> Json<TokenizerResponse> {
|
||||
Json(validate_tokenizer_at_path(&PathBuf::from(payload.file_path.clone())).into())
|
||||
Json(handle_tokenizer_validate(&PathBuf::from(payload.file_path.clone())).into())
|
||||
}
|
||||
|
||||
#[post("/tokenizer/store", data = "<payload>")]
|
||||
@ -212,9 +230,25 @@ pub fn store_tokenizer(_token: APIToken, payload: Json<TokenizerStorage>) -> Jso
|
||||
}
|
||||
}
|
||||
|
||||
#[post("/tokenizer/set", data = "<payload>")]
|
||||
pub fn set_tokenizer(_token: APIToken, payload: Json<TokenizerPath>) -> Json<TokenizerResponse> {
|
||||
match init_tokenizer(&payload.file_path) {
|
||||
#[post("/tokenizer/delete", data = "<payload>")]
|
||||
pub fn delete_tokenizer(_token: APIToken, payload: Json<TokenizerDelete>) -> Json<TokenizerResponse> {
|
||||
match handle_tokenizer_delete(&payload) {
|
||||
Ok(_) => Json(TokenizerResponse {
|
||||
success: true,
|
||||
token_count: 0,
|
||||
message: "Success".to_string(),
|
||||
}),
|
||||
Err(e) => Json(TokenizerResponse {
|
||||
success: false,
|
||||
token_count: 0,
|
||||
message: e.to_string(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
#[post("/tokenizer/set", data = "<payload>")]
|
||||
pub fn set_tokenizer(_token: APIToken, payload: Json<TokenizerPath>) -> Json<TokenizerResponse> {
|
||||
match handle_tokenizer_set(&payload.file_path) {
|
||||
Ok(_) => Json(TokenizerResponse {
|
||||
success: true,
|
||||
token_count: 0,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user