From 529986837e9842e307201b9915e6c606cd0caf38 Mon Sep 17 00:00:00 2001 From: Thorsten Sommer Date: Sun, 11 Jan 2026 16:02:28 +0100 Subject: [PATCH] Implemented the transcription API (#623) --- .../Assistants/I18N/allTexts.lua | 33 ++++- .../Components/SecretInputField.razor | 2 +- .../Settings/SettingsPanelEmbeddings.razor.cs | 4 +- .../Settings/SettingsPanelProviders.razor.cs | 2 +- .../Settings/SettingsPanelTranscription.razor | 2 +- .../SettingsPanelTranscription.razor.cs | 4 +- .../Components/VoiceRecorder.razor | 19 ++- .../Components/VoiceRecorder.razor.cs | 132 +++++++++++++++++- .../Dialogs/EmbeddingProviderDialog.razor | 2 +- .../Dialogs/EmbeddingProviderDialog.razor.cs | 21 ++- .../Dialogs/ProviderDialog.razor | 2 +- .../Dialogs/ProviderDialog.razor.cs | 19 ++- .../Dialogs/TranscriptionProviderDialog.razor | 2 +- .../TranscriptionProviderDialog.razor.cs | 21 ++- .../Plugins/configuration/plugin.lua | 2 +- .../plugin.lua | 29 +++- .../plugin.lua | 33 ++++- .../AlibabaCloud/ProviderAlibabaCloud.cs | 16 ++- .../Provider/Anthropic/ProviderAnthropic.cs | 14 +- .../Provider/BaseProvider.cs | 81 +++++++++++ .../Provider/DeepSeek/ProviderDeepSeek.cs | 14 +- .../Provider/Fireworks/ProviderFireworks.cs | 12 +- .../Provider/GWDG/ProviderGWDG.cs | 17 ++- .../Provider/Google/ProviderGoogle.cs | 16 ++- .../Provider/Groq/ProviderGroq.cs | 16 ++- .../Provider/Helmholtz/ProviderHelmholtz.cs | 16 ++- .../HuggingFace/ProviderHuggingFace.cs | 8 +- app/MindWork AI Studio/Provider/IProvider.cs | 10 ++ .../Provider/Mistral/ProviderMistral.cs | 17 ++- app/MindWork AI Studio/Provider/NoProvider.cs | 2 + .../Provider/OpenAI/ProviderOpenAI.cs | 21 ++- .../Provider/OpenRouter/ProviderOpenRouter.cs | 16 ++- .../Provider/Perplexity/ProviderPerplexity.cs | 8 +- .../Provider/SelfHosted/ProviderSelfHosted.cs | 19 ++- .../Provider/TranscriptionResponse.cs | 3 + .../Provider/X/ProviderX.cs | 16 ++- app/MindWork AI Studio/Tools/MIME/Builder.cs | 62 ++++++++ .../Tools/SecretStoreType.cs | 32 +++++ .../Tools/SecretStoreTypeExtensions.cs | 21 +++ .../Tools/Services/RustService.APIKeys.cs | 44 +++--- app/MindWork AI Studio/wwwroot/app.js | 26 ++-- .../wwwroot/changelog/v26.1.1.md | 2 + 42 files changed, 700 insertions(+), 138 deletions(-) create mode 100644 app/MindWork AI Studio/Provider/TranscriptionResponse.cs create mode 100644 app/MindWork AI Studio/Tools/SecretStoreType.cs create mode 100644 app/MindWork AI Studio/Tools/SecretStoreTypeExtensions.cs diff --git a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua index d596fb44..1ec8c022 100644 --- a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua +++ b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua @@ -2128,6 +2128,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELEMBEDDINGS::T32678 -- Actions UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELEMBEDDINGS::T3865031940"] = "Actions" +-- This embedding provider is managed by your organization. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELEMBEDDINGS::T4062656589"] = "This embedding provider is managed by your organization." + -- No embeddings configured yet. UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELEMBEDDINGS::T4068015588"] = "No embeddings configured yet." @@ -2287,15 +2290,18 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T40 -- Configured Transcription Providers UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T4210863523"] = "Configured Transcription Providers" +-- With the support of transcription models, MindWork AI Studio can convert human speech into text. This is useful, for example, when you need to dictate text. You can choose from dedicated transcription models, but not multimodal LLMs (large language models) that can handle both speech and text. The configuration of multimodal models is done in the 'Configure providers' section. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T584860404"] = "With the support of transcription models, MindWork AI Studio can convert human speech into text. This is useful, for example, when you need to dictate text. You can choose from dedicated transcription models, but not multimodal LLMs (large language models) that can handle both speech and text. The configuration of multimodal models is done in the 'Configure providers' section." + +-- This transcription provider is managed by your organization. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T756131076"] = "This transcription provider is managed by your organization." + -- Open Dashboard UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T78223861"] = "Open Dashboard" -- Are you sure you want to delete the transcription provider '{0}'? UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T789660305"] = "Are you sure you want to delete the transcription provider '{0}'?" --- With the support of transcription models, MindWork AI Studio can convert human speech into text. This is useful, for example, when you need to dictate text. You can choose from dedicated transcription models, but not multimodal LLMs (large language models) that can handle both speech and text. The configuration of multimodal models is done in the \"Configure providers\" section. -UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T799338148"] = "With the support of transcription models, MindWork AI Studio can convert human speech into text. This is useful, for example, when you need to dictate text. You can choose from dedicated transcription models, but not multimodal LLMs (large language models) that can handle both speech and text. The configuration of multimodal models is done in the \\\"Configure providers\\\" section." - -- Provider UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T900237532"] = "Provider" @@ -2380,12 +2386,33 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VISION::T428040679"] = "Content creation" -- Useful assistants UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VISION::T586430036"] = "Useful assistants" +-- Failed to create the transcription provider. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T1689988905"] = "Failed to create the transcription provider." + -- Stop recording and start transcription UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T224155287"] = "Stop recording and start transcription" -- Start recording your voice for a transcription UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T2372624045"] = "Start recording your voice for a transcription" +-- Transcription in progress... +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T2851219233"] = "Transcription in progress..." + +-- The configured transcription provider was not found. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T331613105"] = "The configured transcription provider was not found." + +-- The configured transcription provider does not meet the minimum confidence level. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T3834149033"] = "The configured transcription provider does not meet the minimum confidence level." + +-- An error occurred during transcription. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T588743762"] = "An error occurred during transcription." + +-- No transcription provider is configured. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T663630295"] = "No transcription provider is configured." + +-- The transcription result is empty. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T974954792"] = "The transcription result is empty." + -- Are you sure you want to delete the chat '{0}' in the workspace '{1}'? UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::WORKSPACES::T1016188706"] = "Are you sure you want to delete the chat '{0}' in the workspace '{1}'?" diff --git a/app/MindWork AI Studio/Components/SecretInputField.razor b/app/MindWork AI Studio/Components/SecretInputField.razor index c1e9ddba..36945cab 100644 --- a/app/MindWork AI Studio/Components/SecretInputField.razor +++ b/app/MindWork AI Studio/Components/SecretInputField.razor @@ -15,6 +15,6 @@ UserAttributes="@SPELLCHECK_ATTRIBUTES"/> - + \ No newline at end of file diff --git a/app/MindWork AI Studio/Components/Settings/SettingsPanelEmbeddings.razor.cs b/app/MindWork AI Studio/Components/Settings/SettingsPanelEmbeddings.razor.cs index 06ce34ea..50ebeb13 100644 --- a/app/MindWork AI Studio/Components/Settings/SettingsPanelEmbeddings.razor.cs +++ b/app/MindWork AI Studio/Components/Settings/SettingsPanelEmbeddings.razor.cs @@ -100,13 +100,13 @@ public partial class SettingsPanelEmbeddings : SettingsPanelBase if (dialogResult is null || dialogResult.Canceled) return; - var deleteSecretResponse = await this.RustService.DeleteAPIKey(provider); + var deleteSecretResponse = await this.RustService.DeleteAPIKey(provider, SecretStoreType.EMBEDDING_PROVIDER); if(deleteSecretResponse.Success) { this.SettingsManager.ConfigurationData.EmbeddingProviders.Remove(provider); await this.SettingsManager.StoreSettings(); } - + await this.UpdateEmbeddingProviders(); await this.MessageBus.SendMessage(this, Event.CONFIGURATION_CHANGED); } diff --git a/app/MindWork AI Studio/Components/Settings/SettingsPanelProviders.razor.cs b/app/MindWork AI Studio/Components/Settings/SettingsPanelProviders.razor.cs index 0b68a736..035543dc 100644 --- a/app/MindWork AI Studio/Components/Settings/SettingsPanelProviders.razor.cs +++ b/app/MindWork AI Studio/Components/Settings/SettingsPanelProviders.razor.cs @@ -107,7 +107,7 @@ public partial class SettingsPanelProviders : SettingsPanelBase if (dialogResult is null || dialogResult.Canceled) return; - var deleteSecretResponse = await this.RustService.DeleteAPIKey(provider); + var deleteSecretResponse = await this.RustService.DeleteAPIKey(provider, SecretStoreType.LLM_PROVIDER); if(deleteSecretResponse.Success) { this.SettingsManager.ConfigurationData.Providers.Remove(provider); diff --git a/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor b/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor index 273bee0b..82421e94 100644 --- a/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor +++ b/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor @@ -10,7 +10,7 @@ @T("Configured Transcription Providers") - @T("With the support of transcription models, MindWork AI Studio can convert human speech into text. This is useful, for example, when you need to dictate text. You can choose from dedicated transcription models, but not multimodal LLMs (large language models) that can handle both speech and text. The configuration of multimodal models is done in the \"Configure providers\" section.") + @T("With the support of transcription models, MindWork AI Studio can convert human speech into text. This is useful, for example, when you need to dictate text. You can choose from dedicated transcription models, but not multimodal LLMs (large language models) that can handle both speech and text. The configuration of multimodal models is done in the 'Configure providers' section.") diff --git a/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor.cs b/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor.cs index b9e699f7..d564d5cd 100644 --- a/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor.cs +++ b/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor.cs @@ -100,13 +100,13 @@ public partial class SettingsPanelTranscription : SettingsPanelBase if (dialogResult is null || dialogResult.Canceled) return; - var deleteSecretResponse = await this.RustService.DeleteAPIKey(provider); + var deleteSecretResponse = await this.RustService.DeleteAPIKey(provider, SecretStoreType.TRANSCRIPTION_PROVIDER); if(deleteSecretResponse.Success) { this.SettingsManager.ConfigurationData.TranscriptionProviders.Remove(provider); await this.SettingsManager.StoreSettings(); } - + await this.UpdateTranscriptionProviders(); await this.MessageBus.SendMessage(this, Event.CONFIGURATION_CHANGED); } diff --git a/app/MindWork AI Studio/Components/VoiceRecorder.razor b/app/MindWork AI Studio/Components/VoiceRecorder.razor index b1e5a07b..fb7a1334 100644 --- a/app/MindWork AI Studio/Components/VoiceRecorder.razor +++ b/app/MindWork AI Studio/Components/VoiceRecorder.razor @@ -6,11 +6,18 @@ @if (PreviewFeatures.PRE_SPEECH_TO_TEXT_2026.IsEnabled(this.SettingsManager) && !string.IsNullOrWhiteSpace(this.SettingsManager.ConfigurationData.App.UseTranscriptionProvider)) { - + @if (this.isTranscribing) + { + + } + else + { + + } } diff --git a/app/MindWork AI Studio/Components/VoiceRecorder.razor.cs b/app/MindWork AI Studio/Components/VoiceRecorder.razor.cs index 3cfa787b..4ea00180 100644 --- a/app/MindWork AI Studio/Components/VoiceRecorder.razor.cs +++ b/app/MindWork AI Studio/Components/VoiceRecorder.razor.cs @@ -1,3 +1,4 @@ +using AIStudio.Provider; using AIStudio.Tools.MIME; using AIStudio.Tools.Services; @@ -9,21 +10,30 @@ public partial class VoiceRecorder : MSGComponentBase { [Inject] private ILogger Logger { get; init; } = null!; - + [Inject] private IJSRuntime JsRuntime { get; init; } = null!; - + [Inject] private RustService RustService { get; init; } = null!; + [Inject] + private ISnackbar Snackbar { get; init; } = null!; + private uint numReceivedChunks; private bool isRecording; + private bool isTranscribing; private FileStream? currentRecordingStream; private string? currentRecordingPath; private string? currentRecordingMimeType; + private string? finalRecordingPath; private DotNetObjectReference? dotNetReference; - - private string Tooltip => this.isRecording ? T("Stop recording and start transcription") : T("Start recording your voice for a transcription"); + + private string Tooltip => this.isTranscribing + ? T("Transcription in progress...") + : this.isRecording + ? T("Stop recording and start transcription") + : T("Start recording your voice for a transcription"); private async Task OnRecordingToggled(bool toggled) { @@ -66,6 +76,10 @@ public partial class VoiceRecorder : MSGComponentBase this.isRecording = false; this.StateHasChanged(); + + // Start transcription if we have a recording and a configured provider: + if (this.finalRecordingPath is not null) + await this.TranscribeRecordingAsync(); } } @@ -127,6 +141,7 @@ public partial class VoiceRecorder : MSGComponentBase private async Task FinalizeRecordingStream() { + this.finalRecordingPath = null; if (this.currentRecordingStream is not null) { await this.currentRecordingStream.FlushAsync(); @@ -142,6 +157,7 @@ public partial class VoiceRecorder : MSGComponentBase if (File.Exists(this.currentRecordingPath)) { File.Move(this.currentRecordingPath, newPath, overwrite: true); + this.finalRecordingPath = newPath; this.Logger.LogInformation("Finalized audio recording over {NumChunks} streamed audio chunks to the file '{RecordingPath}'.", this.numReceivedChunks, newPath); } } @@ -170,6 +186,114 @@ public partial class VoiceRecorder : MSGComponentBase }; } + private async Task TranscribeRecordingAsync() + { + if (this.finalRecordingPath is null) + return; + + this.isTranscribing = true; + this.StateHasChanged(); + + try + { + // Get the configured transcription provider ID: + var transcriptionProviderId = this.SettingsManager.ConfigurationData.App.UseTranscriptionProvider; + if (string.IsNullOrWhiteSpace(transcriptionProviderId)) + { + this.Logger.LogWarning("No transcription provider is configured."); + await this.MessageBus.SendError(new(Icons.Material.Filled.VoiceChat, this.T("No transcription provider is configured."))); + return; + } + + // Find the transcription provider in the list of configured providers: + var transcriptionProviderSettings = this.SettingsManager.ConfigurationData.TranscriptionProviders + .FirstOrDefault(x => x.Id == transcriptionProviderId); + + if (transcriptionProviderSettings is null) + { + this.Logger.LogWarning("The configured transcription provider with ID '{ProviderId}' was not found.", transcriptionProviderId); + await this.MessageBus.SendError(new(Icons.Material.Filled.VoiceChat, this.T("The configured transcription provider was not found."))); + return; + } + + // Check the confidence level: + var minimumLevel = this.SettingsManager.GetMinimumConfidenceLevel(Tools.Components.NONE); + var providerConfidence = transcriptionProviderSettings.UsedLLMProvider.GetConfidence(this.SettingsManager); + if (providerConfidence.Level < minimumLevel) + { + this.Logger.LogWarning( + "The configured transcription provider '{ProviderName}' has a confidence level of '{ProviderLevel}', which is below the minimum required level of '{MinimumLevel}'.", + transcriptionProviderSettings.Name, + providerConfidence.Level, + minimumLevel); + await this.MessageBus.SendError(new(Icons.Material.Filled.VoiceChat, this.T("The configured transcription provider does not meet the minimum confidence level."))); + return; + } + + // Create the provider instance: + var provider = transcriptionProviderSettings.CreateProvider(); + if (provider.Provider is LLMProviders.NONE) + { + this.Logger.LogError("Failed to create the transcription provider instance."); + await this.MessageBus.SendError(new(Icons.Material.Filled.VoiceChat, this.T("Failed to create the transcription provider."))); + return; + } + + // Call the transcription API: + this.Logger.LogInformation("Starting transcription with provider '{ProviderName}' and model '{ModelName}'.", transcriptionProviderSettings.Name, transcriptionProviderSettings.Model.DisplayName); + var transcribedText = await provider.TranscribeAudioAsync(transcriptionProviderSettings.Model, this.finalRecordingPath, this.SettingsManager); + + if (string.IsNullOrWhiteSpace(transcribedText)) + { + this.Logger.LogWarning("The transcription result is empty."); + await this.MessageBus.SendWarning(new(Icons.Material.Filled.VoiceChat, this.T("The transcription result is empty."))); + return; + } + + // Remove trailing and leading whitespace: + transcribedText = transcribedText.Trim(); + + // Replace line breaks with spaces: + transcribedText = transcribedText.Replace("\r", " ").Replace("\n", " "); + + // Replace two spaces with a single space: + transcribedText = transcribedText.Replace(" ", " "); + + this.Logger.LogInformation("Transcription completed successfully. Result length: {Length} characters.", transcribedText.Length); + + // Play the transcription done sound effect: + await this.JsRuntime.InvokeVoidAsync("playSound", "/sounds/transcription_done.ogg"); + + // Copy the transcribed text to the clipboard: + await this.RustService.CopyText2Clipboard(this.Snackbar, transcribedText); + + // Delete the recording file: + try + { + if (File.Exists(this.finalRecordingPath)) + { + File.Delete(this.finalRecordingPath); + this.Logger.LogInformation("Deleted the recording file '{RecordingPath}'.", this.finalRecordingPath); + } + } + catch (Exception ex) + { + this.Logger.LogError(ex, "Failed to delete the recording file '{RecordingPath}'.", this.finalRecordingPath); + } + } + catch (Exception ex) + { + this.Logger.LogError(ex, "An error occurred during transcription."); + await this.MessageBus.SendError(new(Icons.Material.Filled.VoiceChat, this.T("An error occurred during transcription."))); + } + finally + { + this.finalRecordingPath = null; + this.isTranscribing = false; + this.StateHasChanged(); + } + } + private sealed class AudioRecordingResult { public string MimeType { get; init; } = string.Empty; diff --git a/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor b/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor index a94b9ebd..aaf2cec7 100644 --- a/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor +++ b/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor @@ -25,7 +25,7 @@ @if (this.DataLLMProvider.IsAPIKeyNeeded(this.DataHost)) { - + } @if (this.DataLLMProvider.IsHostnameNeeded()) diff --git a/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor.cs b/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor.cs index a6677686..8a0180c5 100644 --- a/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor.cs +++ b/app/MindWork AI Studio/Dialogs/EmbeddingProviderDialog.razor.cs @@ -138,6 +138,9 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId protected override async Task OnInitializedAsync() { + // Call the base initialization first so that the I18N is ready: + await base.OnInitializedAsync(); + // Configure the spellchecking for the instance name input: this.SettingsManager.InjectSpellchecking(SPELLCHECK_ATTRIBUTES); @@ -164,7 +167,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId } // Load the API key: - var requestedSecret = await this.RustService.GetAPIKey(this, isTrying: this.DataLLMProvider is LLMProviders.SELF_HOSTED); + var requestedSecret = await this.RustService.GetAPIKey(this, SecretStoreType.EMBEDDING_PROVIDER, isTrying: this.DataLLMProvider is LLMProviders.SELF_HOSTED); if (requestedSecret.Success) this.dataAPIKey = await requestedSecret.Secret.Decrypt(this.encryption); else @@ -179,8 +182,6 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId await this.ReloadModels(); } - - await base.OnInitializedAsync(); } protected override async Task OnAfterRenderAsync(bool firstRender) @@ -197,7 +198,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId #region Implementation of ISecretId - public string SecretId => this.DataId; + public string SecretId => this.DataLLMProvider.ToName(); public string SecretName => this.DataName; @@ -218,7 +219,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId if (!string.IsNullOrWhiteSpace(this.dataAPIKey)) { // Store the API key in the OS secure storage: - var storeResponse = await this.RustService.SetAPIKey(this, this.dataAPIKey); + var storeResponse = await this.RustService.SetAPIKey(this, this.dataAPIKey, SecretStoreType.EMBEDDING_PROVIDER); if (!storeResponse.Success) { this.dataAPIKeyStorageIssue = string.Format(T("Failed to store the API key in the operating system. The message was: {0}. Please try again."), storeResponse.Issue); @@ -239,6 +240,16 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId } private void Cancel() => this.MudDialog.Cancel(); + + private async Task OnAPIKeyChanged(string apiKey) + { + this.dataAPIKey = apiKey; + if (!string.IsNullOrWhiteSpace(this.dataAPIKeyStorageIssue)) + { + this.dataAPIKeyStorageIssue = string.Empty; + await this.form.Validate(); + } + } private async Task ReloadModels() { diff --git a/app/MindWork AI Studio/Dialogs/ProviderDialog.razor b/app/MindWork AI Studio/Dialogs/ProviderDialog.razor index dc92f441..b424202d 100644 --- a/app/MindWork AI Studio/Dialogs/ProviderDialog.razor +++ b/app/MindWork AI Studio/Dialogs/ProviderDialog.razor @@ -22,7 +22,7 @@ @if (this.DataLLMProvider.IsAPIKeyNeeded(this.DataHost)) { - + } @if (this.DataLLMProvider.IsHostnameNeeded()) diff --git a/app/MindWork AI Studio/Dialogs/ProviderDialog.razor.cs b/app/MindWork AI Studio/Dialogs/ProviderDialog.razor.cs index 0c3d2b1f..f00a43d5 100644 --- a/app/MindWork AI Studio/Dialogs/ProviderDialog.razor.cs +++ b/app/MindWork AI Studio/Dialogs/ProviderDialog.razor.cs @@ -147,6 +147,9 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId protected override async Task OnInitializedAsync() { + // Call the base initialization first so that the I18N is ready: + await base.OnInitializedAsync(); + // Configure the spellchecking for the instance name input: this.SettingsManager.InjectSpellchecking(SPELLCHECK_ATTRIBUTES); @@ -177,7 +180,7 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId } // Load the API key: - var requestedSecret = await this.RustService.GetAPIKey(this, isTrying: this.DataLLMProvider is LLMProviders.SELF_HOSTED); + var requestedSecret = await this.RustService.GetAPIKey(this, SecretStoreType.LLM_PROVIDER, isTrying: this.DataLLMProvider is LLMProviders.SELF_HOSTED); if (requestedSecret.Success) this.dataAPIKey = await requestedSecret.Secret.Decrypt(this.encryption); else @@ -192,8 +195,6 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId await this.ReloadModels(); } - - await base.OnInitializedAsync(); } protected override async Task OnAfterRenderAsync(bool firstRender) @@ -232,7 +233,7 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId if (!string.IsNullOrWhiteSpace(this.dataAPIKey)) { // Store the API key in the OS secure storage: - var storeResponse = await this.RustService.SetAPIKey(this, this.dataAPIKey); + var storeResponse = await this.RustService.SetAPIKey(this, this.dataAPIKey, SecretStoreType.LLM_PROVIDER); if (!storeResponse.Success) { this.dataAPIKeyStorageIssue = string.Format(T("Failed to store the API key in the operating system. The message was: {0}. Please try again."), storeResponse.Issue); @@ -253,6 +254,16 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId } private void Cancel() => this.MudDialog.Cancel(); + + private async Task OnAPIKeyChanged(string apiKey) + { + this.dataAPIKey = apiKey; + if (!string.IsNullOrWhiteSpace(this.dataAPIKeyStorageIssue)) + { + this.dataAPIKeyStorageIssue = string.Empty; + await this.form.Validate(); + } + } private async Task ReloadModels() { diff --git a/app/MindWork AI Studio/Dialogs/TranscriptionProviderDialog.razor b/app/MindWork AI Studio/Dialogs/TranscriptionProviderDialog.razor index 5c862365..0a2aa3bb 100644 --- a/app/MindWork AI Studio/Dialogs/TranscriptionProviderDialog.razor +++ b/app/MindWork AI Studio/Dialogs/TranscriptionProviderDialog.razor @@ -25,7 +25,7 @@ @if (this.DataLLMProvider.IsAPIKeyNeeded(this.DataHost)) { - + } @if (this.DataLLMProvider.IsHostnameNeeded()) diff --git a/app/MindWork AI Studio/Dialogs/TranscriptionProviderDialog.razor.cs b/app/MindWork AI Studio/Dialogs/TranscriptionProviderDialog.razor.cs index b5b02e07..e2f4110d 100644 --- a/app/MindWork AI Studio/Dialogs/TranscriptionProviderDialog.razor.cs +++ b/app/MindWork AI Studio/Dialogs/TranscriptionProviderDialog.razor.cs @@ -146,6 +146,9 @@ public partial class TranscriptionProviderDialog : MSGComponentBase, ISecretId protected override async Task OnInitializedAsync() { + // Call the base initialization first so that the I18N is ready: + await base.OnInitializedAsync(); + // Configure the spellchecking for the instance name input: this.SettingsManager.InjectSpellchecking(SPELLCHECK_ATTRIBUTES); @@ -172,7 +175,7 @@ public partial class TranscriptionProviderDialog : MSGComponentBase, ISecretId } // Load the API key: - var requestedSecret = await this.RustService.GetAPIKey(this, isTrying: this.DataLLMProvider is LLMProviders.SELF_HOSTED); + var requestedSecret = await this.RustService.GetAPIKey(this, SecretStoreType.TRANSCRIPTION_PROVIDER, isTrying: this.DataLLMProvider is LLMProviders.SELF_HOSTED); if (requestedSecret.Success) this.dataAPIKey = await requestedSecret.Secret.Decrypt(this.encryption); else @@ -187,8 +190,6 @@ public partial class TranscriptionProviderDialog : MSGComponentBase, ISecretId await this.ReloadModels(); } - - await base.OnInitializedAsync(); } protected override async Task OnAfterRenderAsync(bool firstRender) @@ -205,7 +206,7 @@ public partial class TranscriptionProviderDialog : MSGComponentBase, ISecretId #region Implementation of ISecretId - public string SecretId => this.DataId; + public string SecretId => this.DataLLMProvider.ToName(); public string SecretName => this.DataName; @@ -226,7 +227,7 @@ public partial class TranscriptionProviderDialog : MSGComponentBase, ISecretId if (!string.IsNullOrWhiteSpace(this.dataAPIKey)) { // Store the API key in the OS secure storage: - var storeResponse = await this.RustService.SetAPIKey(this, this.dataAPIKey); + var storeResponse = await this.RustService.SetAPIKey(this, this.dataAPIKey, SecretStoreType.TRANSCRIPTION_PROVIDER); if (!storeResponse.Success) { this.dataAPIKeyStorageIssue = string.Format(T("Failed to store the API key in the operating system. The message was: {0}. Please try again."), storeResponse.Issue); @@ -247,6 +248,16 @@ public partial class TranscriptionProviderDialog : MSGComponentBase, ISecretId } private void Cancel() => this.MudDialog.Cancel(); + + private async Task OnAPIKeyChanged(string apiKey) + { + this.dataAPIKey = apiKey; + if (!string.IsNullOrWhiteSpace(this.dataAPIKeyStorageIssue)) + { + this.dataAPIKeyStorageIssue = string.Empty; + await this.form.Validate(); + } + } private async Task ReloadModels() { diff --git a/app/MindWork AI Studio/Plugins/configuration/plugin.lua b/app/MindWork AI Studio/Plugins/configuration/plugin.lua index 7441eabc..8cd62555 100644 --- a/app/MindWork AI Studio/Plugins/configuration/plugin.lua +++ b/app/MindWork AI Studio/Plugins/configuration/plugin.lua @@ -75,7 +75,7 @@ CONFIG["TRANSCRIPTION_PROVIDERS"] = {} -- An example of a transcription provider configuration: -- CONFIG["TRANSCRIPTION_PROVIDERS"][#CONFIG["TRANSCRIPTION_PROVIDERS"]+1] = { --- ["Id"] = "00000000-0000-0000-0000-000000000001", +-- ["Id"] = "00000000-0000-0000-0000-000000000000", -- ["Name"] = "", -- ["UsedLLMProvider"] = "SELF_HOSTED", -- diff --git a/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua b/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua index e1319c24..ebab2f8c 100644 --- a/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua +++ b/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua @@ -2130,6 +2130,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELEMBEDDINGS::T32678 -- Actions UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELEMBEDDINGS::T3865031940"] = "Aktionen" +-- This embedding provider is managed by your organization. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELEMBEDDINGS::T4062656589"] = "Dieser Einbettungsanbieter wird von Ihrer Organisation verwaltet." + -- No embeddings configured yet. UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELEMBEDDINGS::T4068015588"] = "Es wurden bislang keine Einbettungen konfiguriert." @@ -2289,6 +2292,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T40 -- Configured Transcription Providers UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T4210863523"] = "Konfigurierte Anbieter für Transkriptionen" +-- This transcription provider is managed by your organization. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T756131076"] = "Dieser Anbieter für Transkriptionen wird von Ihrer Organisation verwaltet." + -- Open Dashboard UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T78223861"] = "Dashboard öffnen" @@ -2296,7 +2302,7 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T78 UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T789660305"] = "Möchten Sie den Anbieter für Transkriptionen „{0}“ wirklich löschen?" -- With the support of transcription models, MindWork AI Studio can convert human speech into text. This is useful, for example, when you need to dictate text. You can choose from dedicated transcription models, but not multimodal LLMs (large language models) that can handle both speech and text. The configuration of multimodal models is done in the \"Configure providers\" section. -UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T799338148"] = "Mit Unterstützung von Modellen für Transkriptionen kann MindWork AI Studio menschliche Sprache in Text umwandeln. Das ist zum Beispiel hilfreich, wenn Sie Texte diktieren möchten. Sie können aus speziellen Modellen für Transkriptionen wählen, jedoch nicht aus multimodalen LLMs (Large Language Models), die sowohl Sprache als auch Text verarbeiten können. Die Einrichtung multimodaler Modelle erfolgt im Abschnitt „Anbieter konfigurieren“." +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T584860404"] = "Mit Unterstützung von Modellen für Transkriptionen kann MindWork AI Studio menschliche Sprache in Text umwandeln. Das ist zum Beispiel hilfreich, wenn Sie Texte diktieren möchten. Sie können aus speziellen Modellen für Transkriptionen wählen, jedoch nicht aus multimodalen LLMs (Large Language Models), die sowohl Sprache als auch Text verarbeiten können. Die Einrichtung multimodaler Modelle erfolgt im Abschnitt „Anbieter für LLM konfigurieren“." -- Provider UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T900237532"] = "Anbieter" @@ -2382,12 +2388,33 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VISION::T428040679"] = "Erstellung von In -- Useful assistants UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VISION::T586430036"] = "Nützliche Assistenten" +-- Failed to create the transcription provider. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T1689988905"] = "Der Anbieter für die Transkription konnte nicht erstellt werden." + -- Stop recording and start transcription UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T224155287"] = "Aufnahme beenden und Transkription starten" -- Start recording your voice for a transcription UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T2372624045"] = "Beginnen Sie mit der Aufnahme Ihrer Stimme für eine Transkription" +-- Transcription in progress... +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T2851219233"] = "Transkription läuft …" + +-- The configured transcription provider was not found. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T331613105"] = "Der konfigurierte Anbieter für die Transkription wurde nicht gefunden." + +-- The configured transcription provider does not meet the minimum confidence level. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T3834149033"] = "Der konfigurierte Anbieter für die Transkription erfüllt nicht das erforderliche Mindestmaß an Vertrauenswürdigkeit." + +-- An error occurred during transcription. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T588743762"] = "Während der Transkription ist ein Fehler aufgetreten." + +-- No transcription provider is configured. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T663630295"] = "Es ist kein Anbieter für die Transkription konfiguriert." + +-- The transcription result is empty. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T974954792"] = "Das Ergebnis der Transkription ist leer." + -- Are you sure you want to delete the chat '{0}' in the workspace '{1}'? UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::WORKSPACES::T1016188706"] = "Möchten Sie den Chat „{0}“ im Arbeitsbereich „{1}“ wirklich löschen?" diff --git a/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua b/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua index 6d74996b..6e0ba76c 100644 --- a/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua +++ b/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua @@ -2130,6 +2130,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELEMBEDDINGS::T32678 -- Actions UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELEMBEDDINGS::T3865031940"] = "Actions" +-- This embedding provider is managed by your organization. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELEMBEDDINGS::T4062656589"] = "This embedding provider is managed by your organization." + -- No embeddings configured yet. UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELEMBEDDINGS::T4068015588"] = "No embeddings configured yet." @@ -2289,15 +2292,18 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T40 -- Configured Transcription Providers UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T4210863523"] = "Configured Transcription Providers" +-- With the support of transcription models, MindWork AI Studio can convert human speech into text. This is useful, for example, when you need to dictate text. You can choose from dedicated transcription models, but not multimodal LLMs (large language models) that can handle both speech and text. The configuration of multimodal models is done in the 'Configure providers' section. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T584860404"] = "With the support of transcription models, MindWork AI Studio can convert human speech into text. This is useful, for example, when you need to dictate text. You can choose from dedicated transcription models, but not multimodal LLMs (large language models) that can handle both speech and text. The configuration of multimodal models is done in the 'Configure LLM providers' section." + +-- This transcription provider is managed by your organization. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T756131076"] = "This transcription provider is managed by your organization." + -- Open Dashboard UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T78223861"] = "Open Dashboard" -- Are you sure you want to delete the transcription provider '{0}'? UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T789660305"] = "Are you sure you want to delete the transcription provider '{0}'?" --- With the support of transcription models, MindWork AI Studio can convert human speech into text. This is useful, for example, when you need to dictate text. You can choose from dedicated transcription models, but not multimodal LLMs (large language models) that can handle both speech and text. The configuration of multimodal models is done in the \"Configure providers\" section. -UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T799338148"] = "With the support of transcription models, MindWork AI Studio can convert human speech into text. This is useful, for example, when you need to dictate text. You can choose from dedicated transcription models, but not multimodal LLMs (large language models) that can handle both speech and text. The configuration of multimodal models is done in the \\\"Configure providers\\\" section." - -- Provider UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T900237532"] = "Provider" @@ -2382,12 +2388,33 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VISION::T428040679"] = "Content creation" -- Useful assistants UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VISION::T586430036"] = "Useful assistants" +-- Failed to create the transcription provider. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T1689988905"] = "Failed to create the transcription provider." + -- Stop recording and start transcription UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T224155287"] = "Stop recording and start transcription" -- Start recording your voice for a transcription UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T2372624045"] = "Start recording your voice for a transcription" +-- Transcription in progress... +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T2851219233"] = "Transcription in progress..." + +-- The configured transcription provider was not found. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T331613105"] = "The configured transcription provider was not found." + +-- The configured transcription provider does not meet the minimum confidence level. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T3834149033"] = "The configured transcription provider does not meet the minimum confidence level." + +-- An error occurred during transcription. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T588743762"] = "An error occurred during transcription." + +-- No transcription provider is configured. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T663630295"] = "No transcription provider is configured." + +-- The transcription result is empty. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T974954792"] = "The transcription result is empty." + -- Are you sure you want to delete the chat '{0}' in the workspace '{1}'? UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::WORKSPACES::T1016188706"] = "Are you sure you want to delete the chat '{0}' in the workspace '{1}'?" diff --git a/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs b/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs index 6b648372..2c763678 100644 --- a/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs +++ b/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs @@ -25,7 +25,7 @@ public sealed class ProviderAlibabaCloud() : BaseProvider(LLMProviders.ALIBABA_C public override async IAsyncEnumerable StreamChatCompletion(Model chatModel, ChatThread chatThread, SettingsManager settingsManager, [EnumeratorCancellation] CancellationToken token = default) { // Get the API key: - var requestedSecret = await RUST_SERVICE.GetAPIKey(this); + var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.LLM_PROVIDER); if(!requestedSecret.Success) yield break; @@ -80,6 +80,12 @@ public sealed class ProviderAlibabaCloud() : BaseProvider(LLMProviders.ALIBABA_C yield break; } #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously + + /// + public override Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default) + { + return Task.FromResult(string.Empty); + } /// public override Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default) @@ -111,7 +117,7 @@ public sealed class ProviderAlibabaCloud() : BaseProvider(LLMProviders.ALIBABA_C new Model("qwen2.5-vl-3b-instruct", "Qwen2.5-VL 3b"), }; - return this.LoadModels(["q"],token, apiKeyProvisional).ContinueWith(t => t.Result.Concat(additionalModels).OrderBy(x => x.Id).AsEnumerable(), token); + return this.LoadModels(["q"], SecretStoreType.LLM_PROVIDER, token, apiKeyProvisional).ContinueWith(t => t.Result.Concat(additionalModels).OrderBy(x => x.Id).AsEnumerable(), token); } /// @@ -129,7 +135,7 @@ public sealed class ProviderAlibabaCloud() : BaseProvider(LLMProviders.ALIBABA_C new Model("text-embedding-v3", "text-embedding-v3"), }; - return this.LoadModels(["text-embedding-"], token, apiKeyProvisional).ContinueWith(t => t.Result.Concat(additionalModels).OrderBy(x => x.Id).AsEnumerable(), token); + return this.LoadModels(["text-embedding-"], SecretStoreType.EMBEDDING_PROVIDER, token, apiKeyProvisional).ContinueWith(t => t.Result.Concat(additionalModels).OrderBy(x => x.Id).AsEnumerable(), token); } #region Overrides of BaseProvider @@ -144,12 +150,12 @@ public sealed class ProviderAlibabaCloud() : BaseProvider(LLMProviders.ALIBABA_C #endregion - private async Task> LoadModels(string[] prefixes, CancellationToken token, string? apiKeyProvisional = null) + private async Task> LoadModels(string[] prefixes, SecretStoreType storeType, CancellationToken token, string? apiKeyProvisional = null) { var secretKey = apiKeyProvisional switch { not null => apiKeyProvisional, - _ => await RUST_SERVICE.GetAPIKey(this) switch + _ => await RUST_SERVICE.GetAPIKey(this, storeType) switch { { Success: true } result => await result.Secret.Decrypt(ENCRYPTION), _ => null, diff --git a/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs b/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs index 42268936..539c4427 100644 --- a/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs +++ b/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs @@ -23,7 +23,7 @@ public sealed class ProviderAnthropic() : BaseProvider(LLMProviders.ANTHROPIC, " public override async IAsyncEnumerable StreamChatCompletion(Model chatModel, ChatThread chatThread, SettingsManager settingsManager, [EnumeratorCancellation] CancellationToken token = default) { // Get the API key: - var requestedSecret = await RUST_SERVICE.GetAPIKey(this); + var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.LLM_PROVIDER); if(!requestedSecret.Success) yield break; @@ -107,6 +107,12 @@ public sealed class ProviderAnthropic() : BaseProvider(LLMProviders.ANTHROPIC, " yield break; } #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously + + /// + public override Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default) + { + return Task.FromResult(string.Empty); + } /// public override Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default) @@ -121,7 +127,7 @@ public sealed class ProviderAnthropic() : BaseProvider(LLMProviders.ANTHROPIC, " new Model("claude-3-opus-latest", "Claude 3 Opus (Latest)"), }; - return this.LoadModels(token, apiKeyProvisional).ContinueWith(t => t.Result.Concat(additionalModels).OrderBy(x => x.Id).AsEnumerable(), token); + return this.LoadModels(SecretStoreType.LLM_PROVIDER, token, apiKeyProvisional).ContinueWith(t => t.Result.Concat(additionalModels).OrderBy(x => x.Id).AsEnumerable(), token); } /// @@ -144,12 +150,12 @@ public sealed class ProviderAnthropic() : BaseProvider(LLMProviders.ANTHROPIC, " #endregion - private async Task> LoadModels(CancellationToken token, string? apiKeyProvisional = null) + private async Task> LoadModels(SecretStoreType storeType, CancellationToken token, string? apiKeyProvisional = null) { var secretKey = apiKeyProvisional switch { not null => apiKeyProvisional, - _ => await RUST_SERVICE.GetAPIKey(this) switch + _ => await RUST_SERVICE.GetAPIKey(this, storeType) switch { { Success: true } result => await result.Secret.Decrypt(ENCRYPTION), _ => null, diff --git a/app/MindWork AI Studio/Provider/BaseProvider.cs b/app/MindWork AI Studio/Provider/BaseProvider.cs index c5594087..3e207da5 100644 --- a/app/MindWork AI Studio/Provider/BaseProvider.cs +++ b/app/MindWork AI Studio/Provider/BaseProvider.cs @@ -1,4 +1,5 @@ using System.Net; +using System.Net.Http.Headers; using System.Runtime.CompilerServices; using System.Text.Json; using System.Text.Json.Serialization; @@ -6,10 +7,15 @@ using System.Text.Json.Serialization; using AIStudio.Chat; using AIStudio.Provider.Anthropic; using AIStudio.Provider.OpenAI; +using AIStudio.Provider.SelfHosted; using AIStudio.Settings; +using AIStudio.Tools.MIME; using AIStudio.Tools.PluginSystem; +using AIStudio.Tools.Rust; using AIStudio.Tools.Services; +using Host = AIStudio.Provider.SelfHosted.Host; + namespace AIStudio.Provider; /// @@ -89,6 +95,9 @@ public abstract class BaseProvider : IProvider, ISecretId /// public abstract IAsyncEnumerable StreamImageCompletion(Model imageModel, string promptPositive, string promptNegative = FilterOperator.String.Empty, ImageURL referenceImageURL = default, CancellationToken token = default); + /// + public abstract Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default); + /// public abstract Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default); @@ -536,6 +545,78 @@ public abstract class BaseProvider : IProvider, ISecretId streamReader.Dispose(); } + protected async Task PerformStandardTranscriptionRequest(RequestedSecret requestedSecret, Model transcriptionModel, string audioFilePath, Host host = Host.NONE, CancellationToken token = default) + { + try + { + using var form = new MultipartFormDataContent(); + var mimeType = Builder.FromFilename(audioFilePath); + + await using var fileStream = File.OpenRead(audioFilePath); + using var fileContent = new StreamContent(fileStream); + fileContent.Headers.ContentType = new MediaTypeHeaderValue(mimeType); + + form.Add(fileContent, "file", Path.GetFileName(audioFilePath)); + form.Add(new StringContent(transcriptionModel.Id), "model"); + + using var request = new HttpRequestMessage(HttpMethod.Post, host.TranscriptionURL()); + request.Content = form; + + // Handle the authorization header based on the provider: + switch (this.Provider) + { + case LLMProviders.SELF_HOSTED: + if(requestedSecret.Success) + request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION)); + + break; + + case LLMProviders.FIREWORKS: + if(!requestedSecret.Success) + { + this.logger.LogError("No valid API key available for transcription request."); + return string.Empty; + } + + request.Headers.Add("Authorization", await requestedSecret.Secret.Decrypt(ENCRYPTION)); + break; + + default: + if(!requestedSecret.Success) + { + this.logger.LogError("No valid API key available for transcription request."); + return string.Empty; + } + + request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION)); + break; + } + + using var response = await this.httpClient.SendAsync(request, token); + var responseBody = response.Content.ReadAsStringAsync(token).Result; + + if (!response.IsSuccessStatusCode) + { + this.logger.LogError("Transcription request failed with status code {ResponseStatusCode} and body: '{ResponseBody}'.", response.StatusCode, responseBody); + return string.Empty; + } + + var transcriptionResponse = JsonSerializer.Deserialize(responseBody, JSON_SERIALIZER_OPTIONS); + if(transcriptionResponse is null) + { + this.logger.LogError("Was not able to deserialize the transcription response."); + return string.Empty; + } + + return transcriptionResponse.Text; + } + catch (Exception e) + { + this.logger.LogError("Failed to perform transcription request: '{Message}'.", e.Message); + return string.Empty; + } + } + /// /// Parse and convert API parameters from a provided JSON string into a dictionary, /// optionally merging additional parameters and removing specific keys. diff --git a/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs b/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs index b2715f47..f5e1016d 100644 --- a/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs +++ b/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs @@ -25,7 +25,7 @@ public sealed class ProviderDeepSeek() : BaseProvider(LLMProviders.DEEP_SEEK, "h public override async IAsyncEnumerable StreamChatCompletion(Model chatModel, ChatThread chatThread, SettingsManager settingsManager, [EnumeratorCancellation] CancellationToken token = default) { // Get the API key: - var requestedSecret = await RUST_SERVICE.GetAPIKey(this); + var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.LLM_PROVIDER); if(!requestedSecret.Success) yield break; @@ -80,11 +80,17 @@ public sealed class ProviderDeepSeek() : BaseProvider(LLMProviders.DEEP_SEEK, "h yield break; } #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously + + /// + public override Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default) + { + return Task.FromResult(string.Empty); + } /// public override Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default) { - return this.LoadModels(token, apiKeyProvisional); + return this.LoadModels(SecretStoreType.LLM_PROVIDER, token, apiKeyProvisional); } /// @@ -107,12 +113,12 @@ public sealed class ProviderDeepSeek() : BaseProvider(LLMProviders.DEEP_SEEK, "h #endregion - private async Task> LoadModels(CancellationToken token, string? apiKeyProvisional = null) + private async Task> LoadModels(SecretStoreType storeType, CancellationToken token, string? apiKeyProvisional = null) { var secretKey = apiKeyProvisional switch { not null => apiKeyProvisional, - _ => await RUST_SERVICE.GetAPIKey(this) switch + _ => await RUST_SERVICE.GetAPIKey(this, storeType) switch { { Success: true } result => await result.Secret.Decrypt(ENCRYPTION), _ => null, diff --git a/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs b/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs index 9450134d..25fc2611 100644 --- a/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs +++ b/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs @@ -25,7 +25,7 @@ public class ProviderFireworks() : BaseProvider(LLMProviders.FIREWORKS, "https:/ public override async IAsyncEnumerable StreamChatCompletion(Model chatModel, ChatThread chatThread, SettingsManager settingsManager, [EnumeratorCancellation] CancellationToken token = default) { // Get the API key: - var requestedSecret = await RUST_SERVICE.GetAPIKey(this); + var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.LLM_PROVIDER); if(!requestedSecret.Success) yield break; @@ -81,6 +81,13 @@ public class ProviderFireworks() : BaseProvider(LLMProviders.FIREWORKS, "https:/ yield break; } #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously + + /// + public override async Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default) + { + var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.TRANSCRIPTION_PROVIDER); + return await this.PerformStandardTranscriptionRequest(requestedSecret, transcriptionModel, audioFilePath, token: token); + } /// public override Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default) @@ -103,11 +110,12 @@ public class ProviderFireworks() : BaseProvider(LLMProviders.FIREWORKS, "https:/ /// public override Task> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default) { + // Source: https://docs.fireworks.ai/api-reference/audio-transcriptions#param-model return Task.FromResult>( new List { new("whisper-v3", "Whisper v3"), - new("whisper-v3-turbo", "Whisper v3 Turbo"), + // new("whisper-v3-turbo", "Whisper v3 Turbo"), // does not work }); } diff --git a/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs b/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs index da322942..28af268f 100644 --- a/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs +++ b/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs @@ -25,7 +25,7 @@ public sealed class ProviderGWDG() : BaseProvider(LLMProviders.GWDG, "https://ch public override async IAsyncEnumerable StreamChatCompletion(Model chatModel, ChatThread chatThread, SettingsManager settingsManager, [EnumeratorCancellation] CancellationToken token = default) { // Get the API key: - var requestedSecret = await RUST_SERVICE.GetAPIKey(this); + var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.LLM_PROVIDER); if(!requestedSecret.Success) yield break; @@ -80,11 +80,18 @@ public sealed class ProviderGWDG() : BaseProvider(LLMProviders.GWDG, "https://ch yield break; } #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously + + /// + public override async Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default) + { + var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.TRANSCRIPTION_PROVIDER); + return await this.PerformStandardTranscriptionRequest(requestedSecret, transcriptionModel, audioFilePath, token: token); + } /// public override async Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default) { - var models = await this.LoadModels(token, apiKeyProvisional); + var models = await this.LoadModels(SecretStoreType.LLM_PROVIDER, token, apiKeyProvisional); return models.Where(model => !model.Id.StartsWith("e5-mistral-7b-instruct", StringComparison.InvariantCultureIgnoreCase)); } @@ -97,7 +104,7 @@ public sealed class ProviderGWDG() : BaseProvider(LLMProviders.GWDG, "https://ch /// public override async Task> GetEmbeddingModels(string? apiKeyProvisional = null, CancellationToken token = default) { - var models = await this.LoadModels(token, apiKeyProvisional); + var models = await this.LoadModels(SecretStoreType.EMBEDDING_PROVIDER, token, apiKeyProvisional); return models.Where(model => model.Id.StartsWith("e5-", StringComparison.InvariantCultureIgnoreCase)); } @@ -114,12 +121,12 @@ public sealed class ProviderGWDG() : BaseProvider(LLMProviders.GWDG, "https://ch #endregion - private async Task> LoadModels(CancellationToken token, string? apiKeyProvisional = null) + private async Task> LoadModels(SecretStoreType storeType, CancellationToken token, string? apiKeyProvisional = null) { var secretKey = apiKeyProvisional switch { not null => apiKeyProvisional, - _ => await RUST_SERVICE.GetAPIKey(this) switch + _ => await RUST_SERVICE.GetAPIKey(this, storeType) switch { { Success: true } result => await result.Secret.Decrypt(ENCRYPTION), _ => null, diff --git a/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs b/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs index fce1a451..55490992 100644 --- a/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs +++ b/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs @@ -25,7 +25,7 @@ public class ProviderGoogle() : BaseProvider(LLMProviders.GOOGLE, "https://gener public override async IAsyncEnumerable StreamChatCompletion(Provider.Model chatModel, ChatThread chatThread, SettingsManager settingsManager, [EnumeratorCancellation] CancellationToken token = default) { // Get the API key: - var requestedSecret = await RUST_SERVICE.GetAPIKey(this); + var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.LLM_PROVIDER); if(!requestedSecret.Success) yield break; @@ -82,10 +82,16 @@ public class ProviderGoogle() : BaseProvider(LLMProviders.GOOGLE, "https://gener } #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously + /// + public override Task TranscribeAudioAsync(Provider.Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default) + { + return Task.FromResult(string.Empty); + } + /// public override async Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default) { - var modelResponse = await this.LoadModels(token, apiKeyProvisional); + var modelResponse = await this.LoadModels(SecretStoreType.LLM_PROVIDER, token, apiKeyProvisional); if(modelResponse == default) return []; @@ -102,7 +108,7 @@ public class ProviderGoogle() : BaseProvider(LLMProviders.GOOGLE, "https://gener public override async Task> GetEmbeddingModels(string? apiKeyProvisional = null, CancellationToken token = default) { - var modelResponse = await this.LoadModels(token, apiKeyProvisional); + var modelResponse = await this.LoadModels(SecretStoreType.EMBEDDING_PROVIDER, token, apiKeyProvisional); if(modelResponse == default) return []; @@ -120,12 +126,12 @@ public class ProviderGoogle() : BaseProvider(LLMProviders.GOOGLE, "https://gener #endregion - private async Task LoadModels(CancellationToken token, string? apiKeyProvisional = null) + private async Task LoadModels(SecretStoreType storeType, CancellationToken token, string? apiKeyProvisional = null) { var secretKey = apiKeyProvisional switch { not null => apiKeyProvisional, - _ => await RUST_SERVICE.GetAPIKey(this) switch + _ => await RUST_SERVICE.GetAPIKey(this, storeType) switch { { Success: true } result => await result.Secret.Decrypt(ENCRYPTION), _ => null, diff --git a/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs b/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs index b6e9137a..e66cec3e 100644 --- a/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs +++ b/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs @@ -25,7 +25,7 @@ public class ProviderGroq() : BaseProvider(LLMProviders.GROQ, "https://api.groq. public override async IAsyncEnumerable StreamChatCompletion(Model chatModel, ChatThread chatThread, SettingsManager settingsManager, [EnumeratorCancellation] CancellationToken token = default) { // Get the API key: - var requestedSecret = await RUST_SERVICE.GetAPIKey(this); + var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.LLM_PROVIDER); if(!requestedSecret.Success) yield break; @@ -81,17 +81,23 @@ public class ProviderGroq() : BaseProvider(LLMProviders.GROQ, "https://api.groq. yield break; } #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously + + /// + public override Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default) + { + return Task.FromResult(string.Empty); + } /// public override Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default) { - return this.LoadModels(token, apiKeyProvisional); + return this.LoadModels(SecretStoreType.LLM_PROVIDER, token, apiKeyProvisional); } /// public override Task> GetImageModels(string? apiKeyProvisional = null, CancellationToken token = default) { - return Task.FromResult>(Array.Empty()); + return Task.FromResult>([]); } /// @@ -108,12 +114,12 @@ public class ProviderGroq() : BaseProvider(LLMProviders.GROQ, "https://api.groq. #endregion - private async Task> LoadModels(CancellationToken token, string? apiKeyProvisional = null) + private async Task> LoadModels(SecretStoreType storeType, CancellationToken token, string? apiKeyProvisional = null) { var secretKey = apiKeyProvisional switch { not null => apiKeyProvisional, - _ => await RUST_SERVICE.GetAPIKey(this) switch + _ => await RUST_SERVICE.GetAPIKey(this, storeType) switch { { Success: true } result => await result.Secret.Decrypt(ENCRYPTION), _ => null, diff --git a/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs b/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs index 213bf075..cf4359ea 100644 --- a/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs +++ b/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs @@ -25,7 +25,7 @@ public sealed class ProviderHelmholtz() : BaseProvider(LLMProviders.HELMHOLTZ, " public override async IAsyncEnumerable StreamChatCompletion(Model chatModel, ChatThread chatThread, SettingsManager settingsManager, [EnumeratorCancellation] CancellationToken token = default) { // Get the API key: - var requestedSecret = await RUST_SERVICE.GetAPIKey(this); + var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.LLM_PROVIDER); if(!requestedSecret.Success) yield break; @@ -80,11 +80,17 @@ public sealed class ProviderHelmholtz() : BaseProvider(LLMProviders.HELMHOLTZ, " yield break; } #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously + + /// + public override Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default) + { + return Task.FromResult(string.Empty); + } /// public override async Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default) { - var models = await this.LoadModels(token, apiKeyProvisional); + var models = await this.LoadModels(SecretStoreType.LLM_PROVIDER, token, apiKeyProvisional); return models.Where(model => !model.Id.StartsWith("text-", StringComparison.InvariantCultureIgnoreCase) && !model.Id.StartsWith("alias-embedding", StringComparison.InvariantCultureIgnoreCase)); } @@ -98,7 +104,7 @@ public sealed class ProviderHelmholtz() : BaseProvider(LLMProviders.HELMHOLTZ, " /// public override async Task> GetEmbeddingModels(string? apiKeyProvisional = null, CancellationToken token = default) { - var models = await this.LoadModels(token, apiKeyProvisional); + var models = await this.LoadModels(SecretStoreType.EMBEDDING_PROVIDER, token, apiKeyProvisional); return models.Where(model => model.Id.StartsWith("alias-embedding", StringComparison.InvariantCultureIgnoreCase) || model.Id.StartsWith("text-", StringComparison.InvariantCultureIgnoreCase) || @@ -113,12 +119,12 @@ public sealed class ProviderHelmholtz() : BaseProvider(LLMProviders.HELMHOLTZ, " #endregion - private async Task> LoadModels(CancellationToken token, string? apiKeyProvisional = null) + private async Task> LoadModels(SecretStoreType storeType, CancellationToken token, string? apiKeyProvisional = null) { var secretKey = apiKeyProvisional switch { not null => apiKeyProvisional, - _ => await RUST_SERVICE.GetAPIKey(this) switch + _ => await RUST_SERVICE.GetAPIKey(this, storeType) switch { { Success: true } result => await result.Secret.Decrypt(ENCRYPTION), _ => null, diff --git a/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs b/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs index 794b4f42..685dea10 100644 --- a/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs +++ b/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs @@ -30,7 +30,7 @@ public sealed class ProviderHuggingFace : BaseProvider public override async IAsyncEnumerable StreamChatCompletion(Model chatModel, ChatThread chatThread, SettingsManager settingsManager, [EnumeratorCancellation] CancellationToken token = default) { // Get the API key: - var requestedSecret = await RUST_SERVICE.GetAPIKey(this); + var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.LLM_PROVIDER); if(!requestedSecret.Success) yield break; @@ -85,6 +85,12 @@ public sealed class ProviderHuggingFace : BaseProvider yield break; } #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously + + /// + public override Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default) + { + return Task.FromResult(string.Empty); + } /// public override Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default) diff --git a/app/MindWork AI Studio/Provider/IProvider.cs b/app/MindWork AI Studio/Provider/IProvider.cs index 4ae6dc6c..5c390074 100644 --- a/app/MindWork AI Studio/Provider/IProvider.cs +++ b/app/MindWork AI Studio/Provider/IProvider.cs @@ -50,6 +50,16 @@ public interface IProvider /// The image completion stream. public IAsyncEnumerable StreamImageCompletion(Model imageModel, string promptPositive, string promptNegative = FilterOperator.String.Empty, ImageURL referenceImageURL = default, CancellationToken token = default); + /// + /// Transcribe an audio file. + /// + /// The model to use for transcription. + /// The audio file path. + /// The settings manager instance to use. + /// The cancellation token. + /// >The transcription result. + public Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default); + /// /// Load all possible text models that can be used with this provider. /// diff --git a/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs b/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs index 598f7016..0755f349 100644 --- a/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs +++ b/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs @@ -23,7 +23,7 @@ public sealed class ProviderMistral() : BaseProvider(LLMProviders.MISTRAL, "http public override async IAsyncEnumerable StreamChatCompletion(Provider.Model chatModel, ChatThread chatThread, SettingsManager settingsManager, [EnumeratorCancellation] CancellationToken token = default) { // Get the API key: - var requestedSecret = await RUST_SERVICE.GetAPIKey(this); + var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.LLM_PROVIDER); if(!requestedSecret.Success) yield break; @@ -81,11 +81,18 @@ public sealed class ProviderMistral() : BaseProvider(LLMProviders.MISTRAL, "http yield break; } #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously + + /// + public override async Task TranscribeAudioAsync(Provider.Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default) + { + var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.TRANSCRIPTION_PROVIDER); + return await this.PerformStandardTranscriptionRequest(requestedSecret, transcriptionModel, audioFilePath, token: token); + } /// public override async Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default) { - var modelResponse = await this.LoadModelList(apiKeyProvisional, token); + var modelResponse = await this.LoadModelList(SecretStoreType.LLM_PROVIDER, apiKeyProvisional, token); if(modelResponse == default) return []; @@ -99,7 +106,7 @@ public sealed class ProviderMistral() : BaseProvider(LLMProviders.MISTRAL, "http /// public override async Task> GetEmbeddingModels(string? apiKeyProvisional = null, CancellationToken token = default) { - var modelResponse = await this.LoadModelList(apiKeyProvisional, token); + var modelResponse = await this.LoadModelList(SecretStoreType.EMBEDDING_PROVIDER, apiKeyProvisional, token); if(modelResponse == default) return []; @@ -126,12 +133,12 @@ public sealed class ProviderMistral() : BaseProvider(LLMProviders.MISTRAL, "http #endregion - private async Task LoadModelList(string? apiKeyProvisional, CancellationToken token) + private async Task LoadModelList(SecretStoreType storeType, string? apiKeyProvisional, CancellationToken token) { var secretKey = apiKeyProvisional switch { not null => apiKeyProvisional, - _ => await RUST_SERVICE.GetAPIKey(this) switch + _ => await RUST_SERVICE.GetAPIKey(this, storeType) switch { { Success: true } result => await result.Secret.Decrypt(ENCRYPTION), _ => null, diff --git a/app/MindWork AI Studio/Provider/NoProvider.cs b/app/MindWork AI Studio/Provider/NoProvider.cs index 4f92e5c9..a650ac34 100644 --- a/app/MindWork AI Studio/Provider/NoProvider.cs +++ b/app/MindWork AI Studio/Provider/NoProvider.cs @@ -38,6 +38,8 @@ public class NoProvider : IProvider yield break; } + public Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default) => Task.FromResult(string.Empty); + public IReadOnlyCollection GetModelCapabilities(Model model) => [ Capability.NONE ]; #endregion diff --git a/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs b/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs index d06d6e15..c4a213db 100644 --- a/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs +++ b/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs @@ -27,7 +27,7 @@ public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https public override async IAsyncEnumerable StreamChatCompletion(Model chatModel, ChatThread chatThread, SettingsManager settingsManager, [EnumeratorCancellation] CancellationToken token = default) { // Get the API key: - var requestedSecret = await RUST_SERVICE.GetAPIKey(this); + var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.LLM_PROVIDER); if(!requestedSecret.Success) yield break; @@ -217,11 +217,18 @@ public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https } #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously + + /// + public override async Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default) + { + var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.TRANSCRIPTION_PROVIDER); + return await this.PerformStandardTranscriptionRequest(requestedSecret, transcriptionModel, audioFilePath, token: token); + } /// public override async Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default) { - var models = await this.LoadModels(["chatgpt-", "gpt-", "o1-", "o3-", "o4-"], token, apiKeyProvisional); + var models = await this.LoadModels(SecretStoreType.LLM_PROVIDER, ["chatgpt-", "gpt-", "o1-", "o3-", "o4-"], token, apiKeyProvisional); return models.Where(model => !model.Id.Contains("image", StringComparison.OrdinalIgnoreCase) && !model.Id.Contains("realtime", StringComparison.OrdinalIgnoreCase) && !model.Id.Contains("audio", StringComparison.OrdinalIgnoreCase) && @@ -232,31 +239,31 @@ public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https /// public override Task> GetImageModels(string? apiKeyProvisional = null, CancellationToken token = default) { - return this.LoadModels(["dall-e-", "gpt-image"], token, apiKeyProvisional); + return this.LoadModels(SecretStoreType.IMAGE_PROVIDER, ["dall-e-", "gpt-image"], token, apiKeyProvisional); } /// public override Task> GetEmbeddingModels(string? apiKeyProvisional = null, CancellationToken token = default) { - return this.LoadModels(["text-embedding-"], token, apiKeyProvisional); + return this.LoadModels(SecretStoreType.EMBEDDING_PROVIDER, ["text-embedding-"], token, apiKeyProvisional); } /// public override async Task> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default) { - var models = await this.LoadModels(["whisper-", "gpt-"], token, apiKeyProvisional); + var models = await this.LoadModels(SecretStoreType.TRANSCRIPTION_PROVIDER, ["whisper-", "gpt-"], token, apiKeyProvisional); return models.Where(model => model.Id.StartsWith("whisper-", StringComparison.InvariantCultureIgnoreCase) || model.Id.Contains("-transcribe", StringComparison.InvariantCultureIgnoreCase)); } #endregion - private async Task> LoadModels(string[] prefixes, CancellationToken token, string? apiKeyProvisional = null) + private async Task> LoadModels(SecretStoreType storeType, string[] prefixes, CancellationToken token, string? apiKeyProvisional = null) { var secretKey = apiKeyProvisional switch { not null => apiKeyProvisional, - _ => await RUST_SERVICE.GetAPIKey(this) switch + _ => await RUST_SERVICE.GetAPIKey(this, storeType) switch { { Success: true } result => await result.Secret.Decrypt(ENCRYPTION), _ => null, diff --git a/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs b/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs index d6945799..e627ee4c 100644 --- a/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs +++ b/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs @@ -28,7 +28,7 @@ public sealed class ProviderOpenRouter() : BaseProvider(LLMProviders.OPEN_ROUTER public override async IAsyncEnumerable StreamChatCompletion(Model chatModel, ChatThread chatThread, SettingsManager settingsManager, [EnumeratorCancellation] CancellationToken token = default) { // Get the API key: - var requestedSecret = await RUST_SERVICE.GetAPIKey(this); + var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.LLM_PROVIDER); if(!requestedSecret.Success) yield break; @@ -88,11 +88,17 @@ public sealed class ProviderOpenRouter() : BaseProvider(LLMProviders.OPEN_ROUTER yield break; } #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously + + /// + public override Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default) + { + return Task.FromResult(string.Empty); + } /// public override Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default) { - return this.LoadModels(token, apiKeyProvisional); + return this.LoadModels(SecretStoreType.LLM_PROVIDER, token, apiKeyProvisional); } /// @@ -115,12 +121,12 @@ public sealed class ProviderOpenRouter() : BaseProvider(LLMProviders.OPEN_ROUTER #endregion - private async Task> LoadModels(CancellationToken token, string? apiKeyProvisional = null) + private async Task> LoadModels(SecretStoreType storeType, CancellationToken token, string? apiKeyProvisional = null) { var secretKey = apiKeyProvisional switch { not null => apiKeyProvisional, - _ => await RUST_SERVICE.GetAPIKey(this) switch + _ => await RUST_SERVICE.GetAPIKey(this, storeType) switch { { Success: true } result => await result.Secret.Decrypt(ENCRYPTION), _ => null, @@ -162,7 +168,7 @@ public sealed class ProviderOpenRouter() : BaseProvider(LLMProviders.OPEN_ROUTER var secretKey = apiKeyProvisional switch { not null => apiKeyProvisional, - _ => await RUST_SERVICE.GetAPIKey(this) switch + _ => await RUST_SERVICE.GetAPIKey(this, SecretStoreType.EMBEDDING_PROVIDER) switch { { Success: true } result => await result.Secret.Decrypt(ENCRYPTION), _ => null, diff --git a/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs b/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs index 0616f2d9..6ecfc69f 100644 --- a/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs +++ b/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs @@ -34,7 +34,7 @@ public sealed class ProviderPerplexity() : BaseProvider(LLMProviders.PERPLEXITY, public override async IAsyncEnumerable StreamChatCompletion(Model chatModel, ChatThread chatThread, SettingsManager settingsManager, [EnumeratorCancellation] CancellationToken token = default) { // Get the API key: - var requestedSecret = await RUST_SERVICE.GetAPIKey(this); + var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.LLM_PROVIDER); if(!requestedSecret.Success) yield break; @@ -88,6 +88,12 @@ public sealed class ProviderPerplexity() : BaseProvider(LLMProviders.PERPLEXITY, yield break; } #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously + + /// + public override Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default) + { + return Task.FromResult(string.Empty); + } /// public override Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default) diff --git a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs index a61a3b26..d63ec0d1 100644 --- a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs +++ b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs @@ -26,7 +26,7 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide public override async IAsyncEnumerable StreamChatCompletion(Provider.Model chatModel, ChatThread chatThread, SettingsManager settingsManager, [EnumeratorCancellation] CancellationToken token = default) { // Get the API key: - var requestedSecret = await RUST_SERVICE.GetAPIKey(this, isTrying: true); + var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.LLM_PROVIDER, isTrying: true); // Prepare the system prompt: var systemPrompt = new TextMessage @@ -88,6 +88,13 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide } #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously + /// + public override async Task TranscribeAudioAsync(Provider.Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default) + { + var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.TRANSCRIPTION_PROVIDER, isTrying: true); + return await this.PerformStandardTranscriptionRequest(requestedSecret, transcriptionModel, audioFilePath, host, token); + } + public override async Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default) { try @@ -102,7 +109,7 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide case Host.LM_STUDIO: case Host.OLLAMA: case Host.VLLM: - return await this.LoadModels(["embed"], [], token, apiKeyProvisional); + return await this.LoadModels( SecretStoreType.LLM_PROVIDER, ["embed"], [], token, apiKeyProvisional); } return []; @@ -129,7 +136,7 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide case Host.LM_STUDIO: case Host.OLLAMA: case Host.VLLM: - return await this.LoadModels([], ["embed"], token, apiKeyProvisional); + return await this.LoadModels( SecretStoreType.EMBEDDING_PROVIDER, [], ["embed"], token, apiKeyProvisional); } return []; @@ -157,7 +164,7 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide case Host.OLLAMA: case Host.VLLM: - return this.LoadModels([], [], token, apiKeyProvisional); + return this.LoadModels(SecretStoreType.TRANSCRIPTION_PROVIDER, [], [], token, apiKeyProvisional); default: return Task.FromResult(Enumerable.Empty()); @@ -172,12 +179,12 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide #endregion - private async Task> LoadModels(string[] ignorePhrases, string[] filterPhrases, CancellationToken token, string? apiKeyProvisional = null) + private async Task> LoadModels(SecretStoreType storeType, string[] ignorePhrases, string[] filterPhrases, CancellationToken token, string? apiKeyProvisional = null) { var secretKey = apiKeyProvisional switch { not null => apiKeyProvisional, - _ => await RUST_SERVICE.GetAPIKey(this, isTrying: true) switch + _ => await RUST_SERVICE.GetAPIKey(this, storeType, isTrying: true) switch { { Success: true } result => await result.Secret.Decrypt(ENCRYPTION), _ => null, diff --git a/app/MindWork AI Studio/Provider/TranscriptionResponse.cs b/app/MindWork AI Studio/Provider/TranscriptionResponse.cs new file mode 100644 index 00000000..7ba1f587 --- /dev/null +++ b/app/MindWork AI Studio/Provider/TranscriptionResponse.cs @@ -0,0 +1,3 @@ +namespace AIStudio.Provider; + +public sealed record TranscriptionResponse(string Text); diff --git a/app/MindWork AI Studio/Provider/X/ProviderX.cs b/app/MindWork AI Studio/Provider/X/ProviderX.cs index 92aad1eb..82d92c2a 100644 --- a/app/MindWork AI Studio/Provider/X/ProviderX.cs +++ b/app/MindWork AI Studio/Provider/X/ProviderX.cs @@ -25,7 +25,7 @@ public sealed class ProviderX() : BaseProvider(LLMProviders.X, "https://api.x.ai public override async IAsyncEnumerable StreamChatCompletion(Model chatModel, ChatThread chatThread, SettingsManager settingsManager, [EnumeratorCancellation] CancellationToken token = default) { // Get the API key: - var requestedSecret = await RUST_SERVICE.GetAPIKey(this); + var requestedSecret = await RUST_SERVICE.GetAPIKey(this, SecretStoreType.LLM_PROVIDER); if(!requestedSecret.Success) yield break; @@ -81,11 +81,17 @@ public sealed class ProviderX() : BaseProvider(LLMProviders.X, "https://api.x.ai yield break; } #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously - + + /// + public override Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default) + { + return Task.FromResult(string.Empty); + } + /// public override async Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default) { - var models = await this.LoadModels(["grok-"], token, apiKeyProvisional); + var models = await this.LoadModels(SecretStoreType.LLM_PROVIDER, ["grok-"], token, apiKeyProvisional); return models.Where(n => !n.Id.Contains("-image", StringComparison.OrdinalIgnoreCase)); } @@ -109,12 +115,12 @@ public sealed class ProviderX() : BaseProvider(LLMProviders.X, "https://api.x.ai #endregion - private async Task> LoadModels(string[] prefixes, CancellationToken token, string? apiKeyProvisional = null) + private async Task> LoadModels(SecretStoreType storeType, string[] prefixes, CancellationToken token, string? apiKeyProvisional = null) { var secretKey = apiKeyProvisional switch { not null => apiKeyProvisional, - _ => await RUST_SERVICE.GetAPIKey(this) switch + _ => await RUST_SERVICE.GetAPIKey(this, storeType) switch { { Success: true } result => await result.Secret.Decrypt(ENCRYPTION), _ => null, diff --git a/app/MindWork AI Studio/Tools/MIME/Builder.cs b/app/MindWork AI Studio/Tools/MIME/Builder.cs index 3a45b8db..97f86fef 100644 --- a/app/MindWork AI Studio/Tools/MIME/Builder.cs +++ b/app/MindWork AI Studio/Tools/MIME/Builder.cs @@ -8,6 +8,68 @@ public class Builder public static Builder Create() => new(); + public static MIMEType FromFilename(string filenameOrPath) + { + var extension = Path.GetExtension(filenameOrPath); + if (string.IsNullOrEmpty(extension)) + throw new ArgumentException("Filename or path does not have a valid extension.", nameof(filenameOrPath)); + + extension = extension.TrimStart('.').ToLowerInvariant(); + + var builder = Create(); + return extension switch + { + // Application types + "pdf" => builder.UseApplication().UseSubtype(ApplicationSubtype.PDF).Build(), + "zip" => builder.UseApplication().UseSubtype(ApplicationSubtype.ZIP).Build(), + "doc" => builder.UseApplication().UseSubtype(ApplicationSubtype.WORD_OLD).Build(), + "docx" => builder.UseApplication().UseSubtype(ApplicationSubtype.WORD).Build(), + "xls" => builder.UseApplication().UseSubtype(ApplicationSubtype.EXCEL_OLD).Build(), + "xlsx" => builder.UseApplication().UseSubtype(ApplicationSubtype.EXCEL).Build(), + "ppt" => builder.UseApplication().UseSubtype(ApplicationSubtype.POWERPOINT_OLD).Build(), + "pptx" => builder.UseApplication().UseSubtype(ApplicationSubtype.POWERPOINT).Build(), + "json" => builder.UseApplication().UseSubtype(ApplicationSubtype.JSON).Build(), + "xml" => builder.UseApplication().UseSubtype(ApplicationSubtype.XML).Build(), + + // Text types + "txt" => builder.UseText().UseSubtype(TextSubtype.PLAIN).Build(), + "html" or "htm" => builder.UseText().UseSubtype(TextSubtype.HTML).Build(), + "css" => builder.UseText().UseSubtype(TextSubtype.CSS).Build(), + "csv" => builder.UseText().UseSubtype(TextSubtype.CSV).Build(), + "js" => builder.UseText().UseSubtype(TextSubtype.JAVASCRIPT).Build(), + "md" or "markdown" => builder.UseText().UseSubtype(TextSubtype.MARKDOWN).Build(), + + // Audio types + "wav" => builder.UseAudio().UseSubtype(AudioSubtype.WAV).Build(), + "mp3" => builder.UseAudio().UseSubtype(AudioSubtype.MP3).Build(), + "ogg" => builder.UseAudio().UseSubtype(AudioSubtype.OGG).Build(), + "aac" => builder.UseAudio().UseSubtype(AudioSubtype.AAC).Build(), + "flac" => builder.UseAudio().UseSubtype(AudioSubtype.FLAC).Build(), + "m4a" => builder.UseAudio().UseSubtype(AudioSubtype.M4A).Build(), + "aiff" or "aif" => builder.UseAudio().UseSubtype(AudioSubtype.AIFF).Build(), + "mpga" => builder.UseAudio().UseSubtype(AudioSubtype.MPEG).Build(), + "webm" => builder.UseAudio().UseSubtype(AudioSubtype.WEBM).Build(), + + // Image types + "jpg" or "jpeg" => builder.UseImage().UseSubtype(ImageSubtype.JPEG).Build(), + "png" => builder.UseImage().UseSubtype(ImageSubtype.PNG).Build(), + "gif" => builder.UseImage().UseSubtype(ImageSubtype.GIF).Build(), + "tiff" or "tif" => builder.UseImage().UseSubtype(ImageSubtype.TIFF).Build(), + "webp" => builder.UseImage().UseSubtype(ImageSubtype.WEBP).Build(), + "svg" => builder.UseImage().UseSubtype(ImageSubtype.SVG).Build(), + "heic" => builder.UseImage().UseSubtype(ImageSubtype.HEIC).Build(), + + // Video types + "mp4" => builder.UseVideo().UseSubtype(VideoSubtype.MP4).Build(), + "avi" => builder.UseVideo().UseSubtype(VideoSubtype.AVI).Build(), + "mov" => builder.UseVideo().UseSubtype(VideoSubtype.MOV).Build(), + "mkv" => builder.UseVideo().UseSubtype(VideoSubtype.MKV).Build(), + "mpeg" or "mpg" => builder.UseVideo().UseSubtype(VideoSubtype.MPEG).Build(), + + _ => throw new ArgumentException($"Unsupported file extension: '.{extension}'.", nameof(filenameOrPath)) + }; + } + public static MIMEType FromTextRepresentation(string textRepresentation) { var parts = textRepresentation.Split('/'); diff --git a/app/MindWork AI Studio/Tools/SecretStoreType.cs b/app/MindWork AI Studio/Tools/SecretStoreType.cs new file mode 100644 index 00000000..c4382b7b --- /dev/null +++ b/app/MindWork AI Studio/Tools/SecretStoreType.cs @@ -0,0 +1,32 @@ +namespace AIStudio.Tools; + +/// +/// Represents the type of secret store used for API keys. +/// +/// +/// Different provider types use different prefixes for storing API keys. +/// This prevents collisions when the same instance name is used across +/// different provider types (e.g., LLM, Embedding, Transcription). +/// +public enum SecretStoreType +{ + /// + /// LLM provider secrets. Uses the legacy "provider::" prefix for backward compatibility. + /// + LLM_PROVIDER = 0, + + /// + /// Embedding provider secrets. Uses the "embedding::" prefix. + /// + EMBEDDING_PROVIDER, + + /// + /// Transcription provider secrets. Uses the "transcription::" prefix. + /// + TRANSCRIPTION_PROVIDER, + + /// + /// Image provider secrets. Uses the "image::" prefix. + /// + IMAGE_PROVIDER, +} diff --git a/app/MindWork AI Studio/Tools/SecretStoreTypeExtensions.cs b/app/MindWork AI Studio/Tools/SecretStoreTypeExtensions.cs new file mode 100644 index 00000000..d0d4ba9e --- /dev/null +++ b/app/MindWork AI Studio/Tools/SecretStoreTypeExtensions.cs @@ -0,0 +1,21 @@ +namespace AIStudio.Tools; + +public static class SecretStoreTypeExtensions +{ + /// + /// Gets the prefix string associated with the SecretStoreType. + /// + /// + /// LLM_PROVIDER uses the legacy "provider" prefix for backward compatibility. + /// + /// The SecretStoreType enum value. + /// >The corresponding prefix string. + public static string Prefix(this SecretStoreType type) => type switch + { + SecretStoreType.LLM_PROVIDER => "provider", + SecretStoreType.EMBEDDING_PROVIDER => "embedding", + SecretStoreType.TRANSCRIPTION_PROVIDER => "transcription", + + _ => "provider", + }; +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/Services/RustService.APIKeys.cs b/app/MindWork AI Studio/Tools/Services/RustService.APIKeys.cs index 8c0ed88a..abc06b03 100644 --- a/app/MindWork AI Studio/Tools/Services/RustService.APIKeys.cs +++ b/app/MindWork AI Studio/Tools/Services/RustService.APIKeys.cs @@ -9,68 +9,76 @@ public sealed partial class RustService /// /// The secret ID to get the API key for. /// Indicates if we are trying to get the API key. In that case, we don't log errors. + /// The secret store type. Defaults to LLM_PROVIDER for backward compatibility. /// The requested secret. - public async Task GetAPIKey(ISecretId secretId, bool isTrying = false) + public async Task GetAPIKey(ISecretId secretId, SecretStoreType storeType, bool isTrying = false) { - var secretRequest = new SelectSecretRequest($"provider::{secretId.SecretId}::{secretId.SecretName}::api_key", Environment.UserName, isTrying); + var prefix = storeType.Prefix(); + var secretRequest = new SelectSecretRequest($"{prefix}::{secretId.SecretId}::{secretId.SecretName}::api_key", Environment.UserName, isTrying); var result = await this.http.PostAsJsonAsync("/secrets/get", secretRequest, this.jsonRustSerializerOptions); if (!result.IsSuccessStatusCode) { if(!isTrying) - this.logger!.LogError($"Failed to get the API key for secret ID '{secretId.SecretId}' due to an API issue: '{result.StatusCode}'"); + this.logger!.LogError($"Failed to get the API key for '{prefix}::{secretId.SecretId}::{secretId.SecretName}::api_key' due to an API issue: '{result.StatusCode}'"); return new RequestedSecret(false, new EncryptedText(string.Empty), TB("Failed to get the API key due to an API issue.")); } - + var secret = await result.Content.ReadFromJsonAsync(this.jsonRustSerializerOptions); if (!secret.Success && !isTrying) - this.logger!.LogError($"Failed to get the API key for secret ID '{secretId.SecretId}': '{secret.Issue}'"); - + this.logger!.LogError($"Failed to get the API key for '{prefix}::{secretId.SecretId}::{secretId.SecretName}::api_key': '{secret.Issue}'"); + + this.logger!.LogDebug($"Successfully retrieved the API key for '{prefix}::{secretId.SecretId}::{secretId.SecretName}::api_key'."); return secret; } - + /// /// Try to store the API key for the given secret ID. /// /// The secret ID to store the API key for. /// The API key to store. + /// The secret store type. Defaults to LLM_PROVIDER for backward compatibility. /// The store secret response. - public async Task SetAPIKey(ISecretId secretId, string key) + public async Task SetAPIKey(ISecretId secretId, string key, SecretStoreType storeType) { + var prefix = storeType.Prefix(); var encryptedKey = await this.encryptor!.Encrypt(key); - var request = new StoreSecretRequest($"provider::{secretId.SecretId}::{secretId.SecretName}::api_key", Environment.UserName, encryptedKey); + var request = new StoreSecretRequest($"{prefix}::{secretId.SecretId}::{secretId.SecretName}::api_key", Environment.UserName, encryptedKey); var result = await this.http.PostAsJsonAsync("/secrets/store", request, this.jsonRustSerializerOptions); if (!result.IsSuccessStatusCode) { - this.logger!.LogError($"Failed to store the API key for secret ID '{secretId.SecretId}' due to an API issue: '{result.StatusCode}'"); + this.logger!.LogError($"Failed to store the API key for '{prefix}::{secretId.SecretId}::{secretId.SecretName}::api_key' due to an API issue: '{result.StatusCode}'"); return new StoreSecretResponse(false, TB("Failed to get the API key due to an API issue.")); } - + var state = await result.Content.ReadFromJsonAsync(this.jsonRustSerializerOptions); if (!state.Success) - this.logger!.LogError($"Failed to store the API key for secret ID '{secretId.SecretId}': '{state.Issue}'"); - + this.logger!.LogError($"Failed to store the API key for '{prefix}::{secretId.SecretId}::{secretId.SecretName}::api_key': '{state.Issue}'"); + + this.logger!.LogDebug($"Successfully stored the API key for '{prefix}::{secretId.SecretId}::{secretId.SecretName}::api_key'."); return state; } - + /// /// Tries to delete the API key for the given secret ID. /// /// The secret ID to delete the API key for. + /// The secret store type. Defaults to LLM_PROVIDER for backward compatibility. /// The delete secret response. - public async Task DeleteAPIKey(ISecretId secretId) + public async Task DeleteAPIKey(ISecretId secretId, SecretStoreType storeType) { - var request = new SelectSecretRequest($"provider::{secretId.SecretId}::{secretId.SecretName}::api_key", Environment.UserName, false); + var prefix = storeType.Prefix(); + var request = new SelectSecretRequest($"{prefix}::{secretId.SecretId}::{secretId.SecretName}::api_key", Environment.UserName, false); var result = await this.http.PostAsJsonAsync("/secrets/delete", request, this.jsonRustSerializerOptions); if (!result.IsSuccessStatusCode) { this.logger!.LogError($"Failed to delete the API key for secret ID '{secretId.SecretId}' due to an API issue: '{result.StatusCode}'"); return new DeleteSecretResponse{Success = false, WasEntryFound = false, Issue = TB("Failed to delete the API key due to an API issue.")}; } - + var state = await result.Content.ReadFromJsonAsync(this.jsonRustSerializerOptions); if (!state.Success) this.logger!.LogError($"Failed to delete the API key for secret ID '{secretId.SecretId}': '{state.Issue}'"); - + return state; } } \ No newline at end of file diff --git a/app/MindWork AI Studio/wwwroot/app.js b/app/MindWork AI Studio/wwwroot/app.js index 2dd43e5c..8a4e036d 100644 --- a/app/MindWork AI Studio/wwwroot/app.js +++ b/app/MindWork AI Studio/wwwroot/app.js @@ -27,28 +27,28 @@ window.scrollToBottom = function(element) { element.scrollIntoView({ behavior: 'smooth', block: 'end', inline: 'nearest' }); } +window.playSound = function(soundPath) { + try { + const audio = new Audio(soundPath); + audio.play().catch(error => { + console.warn('Failed to play sound effect:', error); + }); + } catch (error) { + console.warn('Error creating audio element:', error); + } +}; + let mediaRecorder; let actualRecordingMimeType; let changedMimeType = false; let pendingChunkUploads = 0; window.audioRecorder = { - playSound: function(soundPath) { - try { - const audio = new Audio(soundPath); - audio.play().catch(error => { - console.warn('Failed to play sound effect:', error); - }); - } catch (error) { - console.warn('Error creating audio element:', error); - } - }, - start: async function (dotnetRef, desiredMimeTypes = []) { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); // Play start recording sound effect: - this.playSound('/sounds/start_recording.ogg'); + window.playSound('/sounds/start_recording.ogg'); // When only one mime type is provided as a string, convert it to an array: if (typeof desiredMimeTypes === 'string') { @@ -138,7 +138,7 @@ window.audioRecorder = { console.log('Audio recording - all chunks uploaded, finalizing.'); // Play stop recording sound effect: - window.audioRecorder.playSound('/sounds/stop_recording.ogg'); + window.playSound('/sounds/stop_recording.ogg'); // Stop all tracks to release the microphone: mediaRecorder.stream.getTracks().forEach(track => track.stop()); diff --git a/app/MindWork AI Studio/wwwroot/changelog/v26.1.1.md b/app/MindWork AI Studio/wwwroot/changelog/v26.1.1.md index 1f241e20..c1a84f1f 100644 --- a/app/MindWork AI Studio/wwwroot/changelog/v26.1.1.md +++ b/app/MindWork AI Studio/wwwroot/changelog/v26.1.1.md @@ -7,3 +7,5 @@ - Added the option to configure embedding providers through a config plugin and distribute them within an organization. - Improved the app versioning. Starting in 2026, each version number includes the year, followed by the month. The last digit shows the release number for that month. For example, version `26.1.1` is the first release in January 2026. - Fixed a bug in the profile selection where the "Use no profile" entry could not be localized, causing English text to appear in languages such as German. This behavior has now been fixed. +- Fixed a bug in the provider dialogs (LLMs, embeddings, and transcriptions) when editing a provider. In cases where an error had to be displayed, a non-localized message in English was used. +- Fixed a very rare bug in the provider dialogs (LLMs, embeddings, and transcriptions) where a validation error appeared if the API key could not be read from the operating system, but the error did not clear after the user changed the API key. \ No newline at end of file