diff --git a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua index 791f11b8..5a088dd7 100644 --- a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua +++ b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua @@ -2584,6 +2584,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELEMBEDDINGS::T32678 -- Close UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELEMBEDDINGS::T3448155331"] = "Close" +-- Couldn't delete the embedding provider '{0}'. The issue: {1}. We can ignore this issue and delete the embedding provider anyway. Do you want to ignore it and delete this embedding provider? +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELEMBEDDINGS::T3703173892"] = "Couldn't delete the embedding provider '{0}'. The issue: {1}. We can ignore this issue and delete the embedding provider anyway. Do you want to ignore it and delete this embedding provider?" + -- Actions UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELEMBEDDINGS::T3865031940"] = "Actions" diff --git a/app/MindWork AI Studio/Components/Settings/SettingsPanelEmbeddings.razor.cs b/app/MindWork AI Studio/Components/Settings/SettingsPanelEmbeddings.razor.cs index 43c5eaea..96a1c0ee 100644 --- a/app/MindWork AI Studio/Components/Settings/SettingsPanelEmbeddings.razor.cs +++ b/app/MindWork AI Studio/Components/Settings/SettingsPanelEmbeddings.razor.cs @@ -2,6 +2,8 @@ using System.Globalization; using AIStudio.Dialogs; using AIStudio.Provider; using AIStudio.Settings; +using AIStudio.Tools.Services; +using AIStudio.Tools.Rust; using Microsoft.AspNetCore.Components; @@ -108,16 +110,44 @@ public partial class SettingsPanelEmbeddings : SettingsPanelProviderBase return; var deleteSecretResponse = await this.RustService.DeleteAPIKey(provider, SecretStoreType.EMBEDDING_PROVIDER); - if(deleteSecretResponse.Success) + var deleteTokenizerResponse = await this.RustService.DeleteTokenizer(TokenizerModelId.ForEmbeddingProvider(provider)); + if(deleteSecretResponse.Success && deleteTokenizerResponse.Success) { this.SettingsManager.ConfigurationData.EmbeddingProviders.Remove(provider); await this.SettingsManager.StoreSettings(); } + else + { + var issueDialogParameters = new DialogParameters + { + { x => x.Message, string.Format(T("Couldn't delete the embedding provider '{0}'. The issue: {1}. We can ignore this issue and delete the embedding provider anyway. Do you want to ignore it and delete this embedding provider?"), provider.Name, BuildDeleteIssue(deleteSecretResponse, deleteTokenizerResponse)) }, + }; + + var issueDialogReference = await this.DialogService.ShowAsync(T("Delete Embedding Provider"), issueDialogParameters, DialogOptions.FULLSCREEN); + var issueDialogResult = await issueDialogReference.Result; + if (issueDialogResult is null || issueDialogResult.Canceled) + return; + + this.SettingsManager.ConfigurationData.EmbeddingProviders.Remove(provider); + await this.SettingsManager.StoreSettings(); + } await this.UpdateEmbeddingProviders(); await this.MessageBus.SendMessage(this, Event.CONFIGURATION_CHANGED); } + private static string BuildDeleteIssue(DeleteSecretResponse deleteSecretResponse, TokenizerResponse deleteTokenizerResponse) + { + var issues = new List(); + if (!deleteSecretResponse.Success) + issues.Add(deleteSecretResponse.Issue); + + if (!deleteTokenizerResponse.Success) + issues.Add(deleteTokenizerResponse.Message); + + return string.Join(" | ", issues); + } + private async Task ExportEmbeddingProvider(EmbeddingProvider provider) { if (!this.SettingsManager.ConfigurationData.App.ShowAdminSettings) diff --git a/app/MindWork AI Studio/Components/Settings/SettingsPanelProviders.razor.cs b/app/MindWork AI Studio/Components/Settings/SettingsPanelProviders.razor.cs index f4f4d9bd..e00b5211 100644 --- a/app/MindWork AI Studio/Components/Settings/SettingsPanelProviders.razor.cs +++ b/app/MindWork AI Studio/Components/Settings/SettingsPanelProviders.razor.cs @@ -3,6 +3,8 @@ using System.Diagnostics.CodeAnalysis; using AIStudio.Dialogs; using AIStudio.Provider; using AIStudio.Settings; +using AIStudio.Tools.Rust; +using AIStudio.Tools.Services; using Microsoft.AspNetCore.Components; @@ -109,7 +111,8 @@ public partial class SettingsPanelProviders : SettingsPanelProviderBase return; var deleteSecretResponse = await this.RustService.DeleteAPIKey(provider, SecretStoreType.LLM_PROVIDER); - if(deleteSecretResponse.Success) + var deleteTokenizerResponse = await this.RustService.DeleteTokenizer(TokenizerModelId.ForProvider(provider)); + if(deleteSecretResponse.Success && deleteTokenizerResponse.Success) { this.SettingsManager.ConfigurationData.Providers.Remove(provider); await this.SettingsManager.StoreSettings(); @@ -118,7 +121,7 @@ public partial class SettingsPanelProviders : SettingsPanelProviderBase { var issueDialogParameters = new DialogParameters { - { x => x.Message, string.Format(T("Couldn't delete the provider '{0}'. The issue: {1}. We can ignore this issue and delete the provider anyway. Do you want to ignore it and delete this provider?"), provider.InstanceName, deleteSecretResponse.Issue) }, + { x => x.Message, string.Format(T("Couldn't delete the provider '{0}'. The issue: {1}. We can ignore this issue and delete the provider anyway. Do you want to ignore it and delete this provider?"), provider.InstanceName, BuildDeleteIssue(deleteSecretResponse, deleteTokenizerResponse)) }, }; var issueDialogReference = await this.DialogService.ShowAsync(T("Delete LLM Provider"), issueDialogParameters, DialogOptions.FULLSCREEN); @@ -135,6 +138,18 @@ public partial class SettingsPanelProviders : SettingsPanelProviderBase await this.MessageBus.SendMessage(this, Event.CONFIGURATION_CHANGED); } + private static string BuildDeleteIssue(DeleteSecretResponse deleteSecretResponse, TokenizerResponse deleteTokenizerResponse) + { + var issues = new List(); + if (!deleteSecretResponse.Success) + issues.Add(deleteSecretResponse.Issue); + + if (!deleteTokenizerResponse.Success) + issues.Add(deleteTokenizerResponse.Message); + + return string.Join(" | ", issues); + } + private async Task ExportLLMProvider(AIStudio.Settings.Provider provider) { if (!this.SettingsManager.ConfigurationData.App.ShowAdminSettings) diff --git a/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor.cs b/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor.cs index 740194ef..650d16e1 100644 --- a/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor.cs +++ b/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor.cs @@ -163,13 +163,11 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId // Load the used instance names: this.UsedInstanceNames = this.SettingsManager.ConfigurationData.EmbeddingProviders.Select(x => x.Name.ToLowerInvariant()).ToList(); - Console.WriteLine($"Previous instance names: {this.dataEditingPreviousInstanceName}"); // When editing, we need to load the data: if(this.IsEditing) { this.dataEditingPreviousInstanceName = this.DataName.ToLowerInvariant(); this.dataFilePath = this.DataTokenizerPath; - Console.WriteLine($"Previous instance name is '{this.dataEditingPreviousInstanceName}'"); // When using self-hosted embedding, we must copy the model name: if (this.DataLLMProvider is LLMProviders.SELF_HOSTED) @@ -243,8 +241,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId if (!this.dataIsValid) return; - var response = await this.RustService.StoreTokenizer("embedding_"+this.DataName, "embedding_"+this.dataEditingPreviousInstanceName, this.dataFilePath); - Console.WriteLine($"Response from Rust: {response.Message}"); + var response = await this.RustService.StoreTokenizer(TokenizerModelId.ForEmbeddingProviderId(this.DataId), this.dataFilePath); if (!response.Success) { this.dataCustomTokenizerValidationIssue = response.Message; diff --git a/app/MindWork AI Studio/Dialogs/ProviderDialog.razor.cs b/app/MindWork AI Studio/Dialogs/ProviderDialog.razor.cs index f8b8c943..e14cd203 100644 --- a/app/MindWork AI Studio/Dialogs/ProviderDialog.razor.cs +++ b/app/MindWork AI Studio/Dialogs/ProviderDialog.razor.cs @@ -268,7 +268,7 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId if (!this.dataIsValid) return; - var tokenizerResponse = await this.RustService.StoreTokenizer("chat_"+this.DataInstanceName, "chat_"+this.dataEditingPreviousInstanceName, this.dataFilePath); + var tokenizerResponse = await this.RustService.StoreTokenizer(TokenizerModelId.ForProviderId(this.DataId), this.dataFilePath); if (!tokenizerResponse.Success) { this.dataCustomTokenizerValidationIssue = tokenizerResponse.Message; diff --git a/app/MindWork AI Studio/Plugins/configuration/plugin.lua b/app/MindWork AI Studio/Plugins/configuration/plugin.lua index 03a9b0f4..655a88cf 100644 --- a/app/MindWork AI Studio/Plugins/configuration/plugin.lua +++ b/app/MindWork AI Studio/Plugins/configuration/plugin.lua @@ -73,6 +73,9 @@ CONFIG["LLM_PROVIDERS"] = {} -- -- Please do not add the enclosing curly braces {} here. Also, no trailing comma is allowed. -- ["AdditionalJsonApiParameters"] = "", -- +-- -- Optional: tokenizer path for this provider relative to the plugin directory. +-- -- ["TokenizerPath"] = "", +-- -- -- Optional: Hugging Face inference provider. Only relevant for UsedLLMProvider = HUGGINGFACE. -- -- Allowed values are: CEREBRAS, NEBIUS_AI_STUDIO, SAMBANOVA, NOVITA, HYPERBOLIC, TOGETHER_AI, FIREWORKS, HF_INFERENCE_API -- -- ["HFInferenceProvider"] = "NOVITA", @@ -129,6 +132,9 @@ CONFIG["EMBEDDING_PROVIDERS"] = {} -- -- -- Optional: Encrypted API key (see LLM_PROVIDERS example for details) -- -- ["APIKey"] = "ENC:v1:", + +-- -- Optional: tokenizer path for this provider relative to the plugin directory. +-- -- ["TokenizerPath"] = "", -- -- ["Model"] = { -- ["Id"] = "", diff --git a/app/MindWork AI Studio/Settings/EmbeddingProvider.cs b/app/MindWork AI Studio/Settings/EmbeddingProvider.cs index 0f72c6c7..8d6042df 100644 --- a/app/MindWork AI Studio/Settings/EmbeddingProvider.cs +++ b/app/MindWork AI Studio/Settings/EmbeddingProvider.cs @@ -189,6 +189,8 @@ public sealed record EmbeddingProvider( ["Id"] = "{{Guid.NewGuid().ToString()}}", ["Name"] = "{{LuaTools.EscapeLuaString(this.Name)}}", ["UsedLLMProvider"] = "{{this.UsedLLMProvider}}", + + ["TokenizerPath"] = "{{this.TokenizerPath}}", ["Host"] = "{{this.Host}}", ["Hostname"] = "{{LuaTools.EscapeLuaString(this.Hostname)}}", diff --git a/app/MindWork AI Studio/Settings/Provider.cs b/app/MindWork AI Studio/Settings/Provider.cs index c8276bcd..f7ac1a07 100644 --- a/app/MindWork AI Studio/Settings/Provider.cs +++ b/app/MindWork AI Studio/Settings/Provider.cs @@ -254,6 +254,8 @@ public sealed record Provider( ["Id"] = "{{Guid.NewGuid().ToString()}}", ["InstanceName"] = "{{LuaTools.EscapeLuaString(this.InstanceName)}}", ["UsedLLMProvider"] = "{{this.UsedLLMProvider}}", + + ["TokenizerPath"] = "{{this.TokenizerPath}}", ["Host"] = "{{this.Host}}", ["Hostname"] = "{{LuaTools.EscapeLuaString(this.Hostname)}}", diff --git a/app/MindWork AI Studio/Tools/PluginSystem/PluginConfiguration.cs b/app/MindWork AI Studio/Tools/PluginSystem/PluginConfiguration.cs index 9e07452d..b5056f0f 100644 --- a/app/MindWork AI Studio/Tools/PluginSystem/PluginConfiguration.cs +++ b/app/MindWork AI Studio/Tools/PluginSystem/PluginConfiguration.cs @@ -30,6 +30,8 @@ public sealed class PluginConfiguration(bool isInternal, LuaState state, PluginT if (!dryRun) { + await PluginConfigurationObject.SyncManagedTokenizersAsync(this.Id, this.PluginPath); + // Store any decrypted API keys from enterprise configuration in the OS keyring: await StoreEnterpriseApiKeysAsync(); diff --git a/app/MindWork AI Studio/Tools/PluginSystem/PluginConfigurationObject.cs b/app/MindWork AI Studio/Tools/PluginSystem/PluginConfigurationObject.cs index d0b299d3..724ce5c8 100644 --- a/app/MindWork AI Studio/Tools/PluginSystem/PluginConfigurationObject.cs +++ b/app/MindWork AI Studio/Tools/PluginSystem/PluginConfigurationObject.cs @@ -1,3 +1,4 @@ +using System.Diagnostics.CodeAnalysis; using System.Linq.Expressions; using AIStudio.Settings; @@ -162,6 +163,42 @@ public sealed record PluginConfigurationObject return true; } + [SuppressMessage("Usage", "MWAIS0001:Direct access to `Providers` is not allowed", Justification = "Tokenizer synchronization needs indexed access to update enterprise-managed providers in place.")] + public static async Task SyncManagedTokenizersAsync(Guid configPluginId, string pluginPath) + { + var wasConfigurationChanged = false; + + for (var i = 0; i < SETTINGS_MANAGER.ConfigurationData.Providers.Count; i++) + { + var provider = SETTINGS_MANAGER.ConfigurationData.Providers[i]; + if (!provider.IsEnterpriseConfiguration || provider.EnterpriseConfigurationPluginId != configPluginId) + continue; + + var syncedProvider = await SyncProviderTokenizerAsync(provider, pluginPath); + if (syncedProvider == provider) + continue; + + SETTINGS_MANAGER.ConfigurationData.Providers[i] = syncedProvider; + wasConfigurationChanged = true; + } + + for (var i = 0; i < SETTINGS_MANAGER.ConfigurationData.EmbeddingProviders.Count; i++) + { + var provider = SETTINGS_MANAGER.ConfigurationData.EmbeddingProviders[i]; + if (!provider.IsEnterpriseConfiguration || provider.EnterpriseConfigurationPluginId != configPluginId) + continue; + + var syncedProvider = await SyncEmbeddingTokenizerAsync(provider, pluginPath); + if (syncedProvider == provider) + continue; + + SETTINGS_MANAGER.ConfigurationData.EmbeddingProviders[i] = syncedProvider; + wasConfigurationChanged = true; + } + + return wasConfigurationChanged; + } + /// /// Cleans up configuration objects of a specified type that are no longer associated with any available plugin. /// @@ -217,6 +254,19 @@ public sealed record PluginConfigurationObject var wasConfigurationChanged = leftOverObjects.Count > 0; foreach (var item in leftOverObjects.Distinct()) { + if (item is Settings.Provider provider) + { + var deleteTokenizerResult = await RUST_SERVICE.DeleteTokenizer(TokenizerModelId.ForProvider(provider)); + if (!deleteTokenizerResult.Success) + LOG.LogWarning("Failed to delete tokenizer for removed enterprise provider '{ProviderName}': {Issue}", provider.InstanceName, deleteTokenizerResult.Message); + } + else if (item is EmbeddingProvider embeddingProvider) + { + var deleteTokenizerResult = await RUST_SERVICE.DeleteTokenizer(TokenizerModelId.ForEmbeddingProvider(embeddingProvider)); + if (!deleteTokenizerResult.Success) + LOG.LogWarning("Failed to delete tokenizer for removed enterprise embedding provider '{ProviderName}': {Issue}", embeddingProvider.Name, deleteTokenizerResult.Message); + } + configuredObjects.Remove(item); // Delete the API key from the OS keyring if the removed object has one: @@ -232,4 +282,89 @@ public sealed record PluginConfigurationObject return wasConfigurationChanged; } -} \ No newline at end of file + + private static async Task SyncProviderTokenizerAsync(Settings.Provider provider, string pluginPath) + { + var syncedTokenizerPath = await SyncTokenizerAsync( + provider.TokenizerPath, + pluginPath, + TokenizerModelId.ForProvider(provider), + $"provider '{provider.InstanceName}'"); + + return provider with { TokenizerPath = syncedTokenizerPath }; + } + + private static async Task SyncEmbeddingTokenizerAsync(EmbeddingProvider provider, string pluginPath) + { + var syncedTokenizerPath = await SyncTokenizerAsync( + provider.TokenizerPath, + pluginPath, + TokenizerModelId.ForEmbeddingProvider(provider), + $"embedding provider '{provider.Name}'"); + + return provider with { TokenizerPath = syncedTokenizerPath }; + } + + private static async Task SyncTokenizerAsync(string configuredTokenizerPath, string pluginPath, string modelId, string logName) + { + if (string.IsNullOrWhiteSpace(configuredTokenizerPath)) + { + var deleteResult = await RUST_SERVICE.DeleteTokenizer(modelId); + if (!deleteResult.Success) + LOG.LogWarning("Failed to delete tokenizer for {LogName}: {Issue}", logName, deleteResult.Message); + + return string.Empty; + } + + var resolvedPath = ResolvePluginTokenizerPath(configuredTokenizerPath, pluginPath); + if (resolvedPath is null) + { + var deleteResult = await RUST_SERVICE.DeleteTokenizer(modelId); + if (!deleteResult.Success) + LOG.LogWarning("Failed to delete tokenizer after invalid path for {LogName}: {Issue}", logName, deleteResult.Message); + + LOG.LogWarning("The configured tokenizer path '{TokenizerPath}' for {LogName} is invalid. The tokenizer path must stay within the plugin directory '{PluginPath}'.", configuredTokenizerPath, logName, pluginPath); + return string.Empty; + } + + var validateResult = await RUST_SERVICE.ValidateTokenizer(resolvedPath); + if (!validateResult.Success) + { + var deleteResult = await RUST_SERVICE.DeleteTokenizer(modelId); + if (!deleteResult.Success) + LOG.LogWarning("Failed to delete tokenizer after validation failure for {LogName}: {Issue}", logName, deleteResult.Message); + + LOG.LogWarning("The configured tokenizer for {LogName} is invalid. Path='{TokenizerPath}', issue='{Issue}'", logName, resolvedPath, validateResult.Message); + return string.Empty; + } + + var storeResult = await RUST_SERVICE.StoreTokenizer(modelId, resolvedPath); + if (!storeResult.Success) + { + LOG.LogWarning("Failed to store tokenizer for {LogName}. Path='{TokenizerPath}', issue='{Issue}'", logName, resolvedPath, storeResult.Message); + return string.Empty; + } + + return storeResult.Message; + } + + private static string? ResolvePluginTokenizerPath(string configuredTokenizerPath, string pluginPath) + { + if (string.IsNullOrWhiteSpace(pluginPath)) + return null; + + var fullPluginPath = Path.GetFullPath(pluginPath); + var candidatePath = Path.GetFullPath(Path.Combine(fullPluginPath, configuredTokenizerPath)); + + if (candidatePath.Equals(fullPluginPath, StringComparison.OrdinalIgnoreCase)) + return null; + + var pluginPrefix = fullPluginPath.EndsWith(Path.DirectorySeparatorChar) + ? fullPluginPath + : fullPluginPath + Path.DirectorySeparatorChar; + + return candidatePath.StartsWith(pluginPrefix, StringComparison.OrdinalIgnoreCase) + ? candidatePath + : null; + } +} diff --git a/app/MindWork AI Studio/Tools/Services/RustService.Tokenizer.cs b/app/MindWork AI Studio/Tools/Services/RustService.Tokenizer.cs index 02770311..76125dc7 100644 --- a/app/MindWork AI Studio/Tools/Services/RustService.Tokenizer.cs +++ b/app/MindWork AI Studio/Tools/Services/RustService.Tokenizer.cs @@ -29,12 +29,11 @@ public sealed partial class RustService return await result.Content.ReadFromJsonAsync(this.jsonRustSerializerOptions); } - public async Task StoreTokenizer(string modelId, string previousmodelId, string filePath) + public async Task StoreTokenizer(string modelId, string filePath) { - this.logger!.LogInformation($"Storing tokenizer for model '{modelId}' with previous model '{previousmodelId}' from file '{filePath}'"); + this.logger!.LogInformation($"Storing tokenizer for model '{modelId}' from file '{filePath}'"); var result = await this.http.PostAsJsonAsync("/tokenizer/store", new { model_id = modelId, - previous_model_id = previousmodelId, file_path = filePath, }, this.jsonRustSerializerOptions); @@ -50,6 +49,26 @@ public sealed partial class RustService return await result.Content.ReadFromJsonAsync(this.jsonRustSerializerOptions); } + + public async Task DeleteTokenizer(string modelId) + { + this.logger!.LogInformation($"Deleting tokenizer for model '{modelId}'"); + var result = await this.http.PostAsJsonAsync("/tokenizer/delete", new { + model_id = modelId, + }, this.jsonRustSerializerOptions); + + if (!result.IsSuccessStatusCode) + { + this.logger!.LogError($"Failed to delete the tokenizer '{result.StatusCode}'"); + return new TokenizerResponse{ + Success = false, + Message = "An error occured while sending the tokenizer delete request to the Rust framework: "+result.StatusCode, + TokenCount = 0 + }; + } + + return await result.Content.ReadFromJsonAsync(this.jsonRustSerializerOptions); + } public async Task GetTokenCount(string text) { diff --git a/app/MindWork AI Studio/Tools/Services/TokenizerModelId.cs b/app/MindWork AI Studio/Tools/Services/TokenizerModelId.cs new file mode 100644 index 00000000..57db51ca --- /dev/null +++ b/app/MindWork AI Studio/Tools/Services/TokenizerModelId.cs @@ -0,0 +1,20 @@ +namespace AIStudio.Tools.Services; + +public static class TokenizerModelId +{ + public static string ForProvider(Settings.Provider provider) => ForProviderId(provider.Id); + + public static string ForProviderId(string guid) => "chat_" + NormalizeGuid(guid); + + public static string ForEmbeddingProvider(Settings.EmbeddingProvider provider) => ForEmbeddingProviderId(provider.Id); + + public static string ForEmbeddingProviderId(string guid) => "embedding_" + NormalizeGuid(guid); + + private static string NormalizeGuid(string guid) + { + if (Guid.TryParse(guid, out var parsedGuid)) + return parsedGuid.ToString("D"); + + return guid.Trim(); + } +} diff --git a/runtime/src/main.rs b/runtime/src/main.rs index 0852833f..04d1b6a1 100644 --- a/runtime/src/main.rs +++ b/runtime/src/main.rs @@ -11,7 +11,6 @@ use mindwork_ai_studio::environment::is_dev; use mindwork_ai_studio::log::init_logging; use mindwork_ai_studio::metadata::MetaData; use mindwork_ai_studio::runtime_api::start_runtime_api; -use mindwork_ai_studio::tokenizer::{init_tokenizer}; #[tokio::main] async fn main() { diff --git a/runtime/src/runtime_api.rs b/runtime/src/runtime_api.rs index bceef476..ac8b332d 100644 --- a/runtime/src/runtime_api.rs +++ b/runtime/src/runtime_api.rs @@ -92,6 +92,7 @@ pub fn start_runtime_api() { crate::tokenizer::token_count, crate::tokenizer::validate_tokenizer, crate::tokenizer::store_tokenizer, + crate::tokenizer::delete_tokenizer, crate::tokenizer::set_tokenizer, crate::app_window::register_shortcut, crate::app_window::validate_shortcut, diff --git a/runtime/src/tokenizer.rs b/runtime/src/tokenizer.rs index 49373ab0..ecf2f86b 100644 --- a/runtime/src/tokenizer.rs +++ b/runtime/src/tokenizer.rs @@ -9,6 +9,7 @@ use serde::Deserialize; use tauri::PathResolver; use tokenizers::Error; use tokenizers::tokenizer::{Tokenizer, Error as TokenizerError}; +use tokio::fs::try_exists; use crate::api_token::APIToken; use crate::environment::DATA_DIRECTORY; @@ -24,10 +25,14 @@ pub struct SetTokenText { #[derive(Clone, Deserialize)] pub struct TokenizerStorage { model_id: String, - previous_model_id: String, file_path: String, } +#[derive(Clone, Deserialize)] +pub struct TokenizerDelete { + model_id: String, +} + #[derive(Clone, Deserialize)] pub struct TokenizerPath { file_path: String, @@ -68,7 +73,7 @@ fn tokenizer_state() -> &'static RwLock> { TOKENIZER.get_or_init(|| RwLock::new(None)) } -pub fn init_tokenizer(path: &str) -> Result<(), Error> { +pub fn handle_tokenizer_set(path: &str) -> Result<(), Error> { let tokenizer_path = if path.trim().is_empty() { let relative_source_path = String::from("resources/tokenizers/tokenizer.json"); let path_resolver = TOKENIZER_PATH_RESOLVER @@ -90,7 +95,7 @@ pub fn init_tokenizer(path: &str) -> Result<(), Error> { Ok(()) } -fn validate_tokenizer_at_path(path: &PathBuf) -> Result { +fn handle_tokenizer_validate(path: &PathBuf) -> Result { if !path.is_file() { return Err(TokenizerError::from(format!( "Tokenizer file was not found: {}", @@ -138,16 +143,6 @@ fn handle_tokenizer_store(payload: &TokenizerStorage) -> Result Result fs::remove_dir_all(model_path.clone())?, + false => (), + } + + if payload.file_path.trim().is_empty() { + return Ok(String::from("")); } fs::create_dir_all(model_path)?; - let previous_path = base_path.join(&payload.previous_model_id); - if !payload.previous_model_id.trim().is_empty() && source_path.starts_with(&previous_path) { - fs::rename(&source_path, &destination_path)?; - if previous_path.exists() && !previous_path.eq(model_path) { - fs::remove_dir_all(previous_path)?; - } - } else { - fs::copy(&source_path, &destination_path)?; - } + fs::copy(&source_path, &destination_path)?; + Ok(destination_path.to_str().unwrap().to_string()) } +fn handle_tokenizer_delete(payload: &TokenizerDelete) -> Result<(), std::io::Error> { + if payload.model_id.trim().is_empty() { + return Ok(()); + } + + let data_dir = DATA_DIRECTORY + .get() + .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::Other, "DATA_DIRECTORY not initialized"))?; + + let tokenizer_path = PathBuf::from(data_dir) + .join("tokenizers") + .join(&payload.model_id); + + if tokenizer_path.exists() { + fs::remove_dir_all(tokenizer_path)?; + } + + Ok(()) +} + pub fn get_token_count(text: &str) -> Result { if text.trim().is_empty() { return Err(TokenizerError::from("Input text is empty")); @@ -193,7 +211,7 @@ pub fn token_count(_token: APIToken, req: Json) -> Json) -> Json { - Json(validate_tokenizer_at_path(&PathBuf::from(payload.file_path.clone())).into()) + Json(handle_tokenizer_validate(&PathBuf::from(payload.file_path.clone())).into()) } #[post("/tokenizer/store", data = "")] @@ -212,9 +230,25 @@ pub fn store_tokenizer(_token: APIToken, payload: Json) -> Jso } } -#[post("/tokenizer/set", data = "")] -pub fn set_tokenizer(_token: APIToken, payload: Json) -> Json { - match init_tokenizer(&payload.file_path) { +#[post("/tokenizer/delete", data = "")] +pub fn delete_tokenizer(_token: APIToken, payload: Json) -> Json { + match handle_tokenizer_delete(&payload) { + Ok(_) => Json(TokenizerResponse { + success: true, + token_count: 0, + message: "Success".to_string(), + }), + Err(e) => Json(TokenizerResponse { + success: false, + token_count: 0, + message: e.to_string(), + }), + } +} + +#[post("/tokenizer/set", data = "")] +pub fn set_tokenizer(_token: APIToken, payload: Json) -> Json { + match handle_tokenizer_set(&payload.file_path) { Ok(_) => Json(TokenizerResponse { success: true, token_count: 0,