From b94614b0ab0c195ddeb29bf2fad3884f5bd85377 Mon Sep 17 00:00:00 2001 From: Thorsten Sommer Date: Fri, 9 Jan 2026 15:49:44 +0100 Subject: [PATCH] Added transcription provider selection to app settings (#621) --- .../Assistants/I18N/allTexts.lua | 9 ++ .../Settings/SettingsPanelApp.razor | 5 + .../Settings/SettingsPanelApp.razor.cs | 14 +++ .../Settings/SettingsPanelTranscription.razor | 31 +++-- .../Components/VoiceRecorder.razor | 2 +- .../TranscriptionProviderDialog.razor.cs | 2 + .../Plugins/configuration/plugin.lua | 28 ++++- .../plugin.lua | 9 ++ .../plugin.lua | 9 ++ .../ConfigurationSelectDataFactory.cs | 6 + .../Settings/DataModel/DataApp.cs | 5 + .../Settings/TranscriptionProvider.cs | 114 +++++++++++++++++- .../Tools/PluginSystem/PluginConfiguration.cs | 8 +- .../PluginSystem/PluginConfigurationObject.cs | 4 +- .../PluginConfigurationObjectType.cs | 3 +- .../PluginSystem/PluginFactory.Loading.cs | 8 ++ .../wwwroot/changelog/v26.1.1.md | 5 +- 17 files changed, 237 insertions(+), 25 deletions(-) diff --git a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua index 108267b1..d596fb44 100644 --- a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua +++ b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua @@ -2023,6 +2023,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELAPP::T1599198973"] -- Would you like to set one of your profiles as the default for the entire app? When you configure a different profile for an assistant, it will always take precedence. UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELAPP::T1666052109"] = "Would you like to set one of your profiles as the default for the entire app? When you configure a different profile for an assistant, it will always take precedence." +-- Select a transcription provider for transcribing your voice. Without a selected provider, dictation and transcription features will be disabled. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELAPP::T1834486728"] = "Select a transcription provider for transcribing your voice. Without a selected provider, dictation and transcription features will be disabled." + -- Select the language behavior for the app. The default is to use the system language. You might want to choose a language manually? UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELAPP::T186780842"] = "Select the language behavior for the app. The default is to use the system language. You might want to choose a language manually?" @@ -2035,6 +2038,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELAPP::T1898060643"] -- Select the language for the app. UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELAPP::T1907446663"] = "Select the language for the app." +-- Disable dictation and transcription +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELAPP::T215381891"] = "Disable dictation and transcription" + -- Language behavior UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELAPP::T2341504363"] = "Language behavior" @@ -2065,6 +2071,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELAPP::T4004501229"] -- When enabled, spellchecking will be active in all input fields. Depending on your operating system, errors may not be visually highlighted, but right-clicking may still offer possible corrections. UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELAPP::T4067492921"] = "When enabled, spellchecking will be active in all input fields. Depending on your operating system, errors may not be visually highlighted, but right-clicking may still offer possible corrections." +-- Select a transcription provider +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELAPP::T4174666315"] = "Select a transcription provider" + -- Navigation bar behavior UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELAPP::T602293588"] = "Navigation bar behavior" diff --git a/app/MindWork AI Studio/Components/Settings/SettingsPanelApp.razor b/app/MindWork AI Studio/Components/Settings/SettingsPanelApp.razor index 58e9e828..0a5c89c8 100644 --- a/app/MindWork AI Studio/Components/Settings/SettingsPanelApp.razor +++ b/app/MindWork AI Studio/Components/Settings/SettingsPanelApp.razor @@ -29,4 +29,9 @@ + + @if (PreviewFeatures.PRE_SPEECH_TO_TEXT_2026.IsEnabled(this.SettingsManager)) + { + + } \ No newline at end of file diff --git a/app/MindWork AI Studio/Components/Settings/SettingsPanelApp.razor.cs b/app/MindWork AI Studio/Components/Settings/SettingsPanelApp.razor.cs index 07e69709..2fbb61ed 100644 --- a/app/MindWork AI Studio/Components/Settings/SettingsPanelApp.razor.cs +++ b/app/MindWork AI Studio/Components/Settings/SettingsPanelApp.razor.cs @@ -1,9 +1,23 @@ +using AIStudio.Provider; +using AIStudio.Settings; using AIStudio.Settings.DataModel; namespace AIStudio.Components.Settings; public partial class SettingsPanelApp : SettingsPanelBase { + private IEnumerable> GetFilteredTranscriptionProviders() + { + yield return new(T("Disable dictation and transcription"), string.Empty); + + var minimumLevel = this.SettingsManager.GetMinimumConfidenceLevel(Tools.Components.APP_SETTINGS); + foreach (var provider in this.SettingsManager.ConfigurationData.TranscriptionProviders) + { + if (provider.UsedLLMProvider.GetConfidence(this.SettingsManager).Level >= minimumLevel) + yield return new(provider.Name, provider.Id); + } + } + private void UpdatePreviewFeatures(PreviewVisibility previewVisibility) { this.SettingsManager.ConfigurationData.App.PreviewVisibility = previewVisibility; diff --git a/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor b/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor index fd707c5a..273bee0b 100644 --- a/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor +++ b/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor @@ -33,18 +33,27 @@ @context.Name @context.UsedLLMProvider.ToName() @GetTranscriptionProviderModelName(context) - + - - - - - - - - - - + + @if (context.IsEnterpriseConfiguration) + { + + + + } + else + { + + + + + + + + + + } diff --git a/app/MindWork AI Studio/Components/VoiceRecorder.razor b/app/MindWork AI Studio/Components/VoiceRecorder.razor index a3866719..b1e5a07b 100644 --- a/app/MindWork AI Studio/Components/VoiceRecorder.razor +++ b/app/MindWork AI Studio/Components/VoiceRecorder.razor @@ -3,7 +3,7 @@ @namespace AIStudio.Components @inherits MSGComponentBase -@if (PreviewFeatures.PRE_SPEECH_TO_TEXT_2026.IsEnabled(this.SettingsManager)) +@if (PreviewFeatures.PRE_SPEECH_TO_TEXT_2026.IsEnabled(this.SettingsManager) && !string.IsNullOrWhiteSpace(this.SettingsManager.ConfigurationData.App.UseTranscriptionProvider)) { > GetTranscriptionProvidersData(IEnumerable transcriptionProviders) + { + foreach (var provider in transcriptionProviders) + yield return new(provider.Name, provider.Id); + } public static IEnumerable> GetChatTemplatesData(IEnumerable chatTemplates) { diff --git a/app/MindWork AI Studio/Settings/DataModel/DataApp.cs b/app/MindWork AI Studio/Settings/DataModel/DataApp.cs index a6029cda..720d3b5a 100644 --- a/app/MindWork AI Studio/Settings/DataModel/DataApp.cs +++ b/app/MindWork AI Studio/Settings/DataModel/DataApp.cs @@ -77,6 +77,11 @@ public sealed class DataApp(Expression>? configSelection = n /// public string PreselectedChatTemplate { get; set; } = string.Empty; + /// + /// Which transcription provider should be used? + /// + public string UseTranscriptionProvider { get; set; } = ManagedConfiguration.Register(configSelection, n => n.UseTranscriptionProvider, string.Empty); + /// /// Should the user be allowed to add providers? /// diff --git a/app/MindWork AI Studio/Settings/TranscriptionProvider.cs b/app/MindWork AI Studio/Settings/TranscriptionProvider.cs index a7ce8957..02b20792 100644 --- a/app/MindWork AI Studio/Settings/TranscriptionProvider.cs +++ b/app/MindWork AI Studio/Settings/TranscriptionProvider.cs @@ -1,32 +1,134 @@ using System.Text.Json.Serialization; using AIStudio.Provider; +using AIStudio.Tools.PluginSystem; + +using Lua; using Host = AIStudio.Provider.SelfHosted.Host; namespace AIStudio.Settings; -public readonly record struct TranscriptionProvider( +public sealed record TranscriptionProvider( uint Num, string Id, string Name, LLMProviders UsedLLMProvider, Model Model, bool IsSelfHosted = false, + bool IsEnterpriseConfiguration = false, + Guid EnterpriseConfigurationPluginId = default, string Hostname = "http://localhost:1234", - Host Host = Host.NONE) : ISecretId + Host Host = Host.NONE) : ConfigurationBaseObject, ISecretId { + private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); + + public static readonly TranscriptionProvider NONE = new(); + + public TranscriptionProvider() : this( + 0, + Guid.Empty.ToString(), + string.Empty, + LLMProviders.NONE, + default, + false, + false, + Guid.Empty) + { + } + public override string ToString() => this.Name; - + #region Implementation of ISecretId - + /// [JsonIgnore] public string SecretId => this.Id; - + /// [JsonIgnore] public string SecretName => this.Name; - + #endregion + + public static bool TryParseTranscriptionProviderTable(int idx, LuaTable table, Guid configPluginId, out ConfigurationBaseObject provider) + { + provider = NONE; + if (!table.TryGetValue("Id", out var idValue) || !idValue.TryRead(out var idText) || !Guid.TryParse(idText, out var id)) + { + LOGGER.LogWarning($"The configured transcription provider {idx} does not contain a valid ID. The ID must be a valid GUID."); + return false; + } + + if (!table.TryGetValue("Name", out var nameValue) || !nameValue.TryRead(out var name)) + { + LOGGER.LogWarning($"The configured transcription provider {idx} does not contain a valid name."); + return false; + } + + if (!table.TryGetValue("UsedLLMProvider", out var usedLLMProviderValue) || !usedLLMProviderValue.TryRead(out var usedLLMProviderText) || !Enum.TryParse(usedLLMProviderText, true, out var usedLLMProvider)) + { + LOGGER.LogWarning($"The configured transcription provider {idx} does not contain a valid LLM provider enum value."); + return false; + } + + if (!table.TryGetValue("Host", out var hostValue) || !hostValue.TryRead(out var hostText) || !Enum.TryParse(hostText, true, out var host)) + { + LOGGER.LogWarning($"The configured transcription provider {idx} does not contain a valid host enum value."); + return false; + } + + if (!table.TryGetValue("Hostname", out var hostnameValue) || !hostnameValue.TryRead(out var hostname)) + { + LOGGER.LogWarning($"The configured transcription provider {idx} does not contain a valid hostname."); + return false; + } + + if (!table.TryGetValue("Model", out var modelValue) || !modelValue.TryRead(out var modelTable)) + { + LOGGER.LogWarning($"The configured transcription provider {idx} does not contain a valid model table."); + return false; + } + + if (!TryReadModelTable(idx, modelTable, out var model)) + { + LOGGER.LogWarning($"The configured transcription provider {idx} does not contain a valid model configuration."); + return false; + } + + provider = new TranscriptionProvider + { + Num = 0, + Id = id.ToString(), + Name = name, + UsedLLMProvider = usedLLMProvider, + Model = model, + IsSelfHosted = usedLLMProvider is LLMProviders.SELF_HOSTED, + IsEnterpriseConfiguration = true, + EnterpriseConfigurationPluginId = configPluginId, + Hostname = hostname, + Host = host, + }; + + return true; + } + + private static bool TryReadModelTable(int idx, LuaTable table, out Model model) + { + model = default; + if (!table.TryGetValue("Id", out var idValue) || !idValue.TryRead(out var id)) + { + LOGGER.LogWarning($"The configured transcription provider {idx} does not contain a valid model ID."); + return false; + } + + if (!table.TryGetValue("DisplayName", out var displayNameValue) || !displayNameValue.TryRead(out var displayName)) + { + LOGGER.LogWarning($"The configured transcription provider {idx} does not contain a valid model display name."); + return false; + } + + model = new(id, displayName); + return true; + } } \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/PluginSystem/PluginConfiguration.cs b/app/MindWork AI Studio/Tools/PluginSystem/PluginConfiguration.cs index 11aac759..76148218 100644 --- a/app/MindWork AI Studio/Tools/PluginSystem/PluginConfiguration.cs +++ b/app/MindWork AI Studio/Tools/PluginSystem/PluginConfiguration.cs @@ -70,6 +70,9 @@ public sealed class PluginConfiguration(bool isInternal, LuaState state, PluginT // Handle configured LLM providers: PluginConfigurationObject.TryParse(PluginConfigurationObjectType.LLM_PROVIDER, x => x.Providers, x => x.NextProviderNum, mainTable, this.Id, ref this.configObjects, dryRun); + // Handle configured transcription providers: + PluginConfigurationObject.TryParse(PluginConfigurationObjectType.TRANSCRIPTION_PROVIDER, x => x.TranscriptionProviders, x => x.NextTranscriptionNum, mainTable, this.Id, ref this.configObjects, dryRun); + // Handle configured embedding providers: PluginConfigurationObject.TryParse(PluginConfigurationObjectType.EMBEDDING_PROVIDER, x => x.EmbeddingProviders, x => x.NextEmbeddingNum, mainTable, this.Id, ref this.configObjects, dryRun); @@ -81,7 +84,10 @@ public sealed class PluginConfiguration(bool isInternal, LuaState state, PluginT // Config: preselected profile? ManagedConfiguration.TryProcessConfiguration(x => x.App, x => x.PreselectedProfile, Guid.Empty, this.Id, settingsTable, dryRun); - + + // Config: transcription provider? + ManagedConfiguration.TryProcessConfiguration(x => x.App, x => x.UseTranscriptionProvider, Guid.Empty, this.Id, settingsTable, dryRun); + message = string.Empty; return true; } diff --git a/app/MindWork AI Studio/Tools/PluginSystem/PluginConfigurationObject.cs b/app/MindWork AI Studio/Tools/PluginSystem/PluginConfigurationObject.cs index 864f5488..da5c46c2 100644 --- a/app/MindWork AI Studio/Tools/PluginSystem/PluginConfigurationObject.cs +++ b/app/MindWork AI Studio/Tools/PluginSystem/PluginConfigurationObject.cs @@ -68,8 +68,9 @@ public sealed record PluginConfigurationObject PluginConfigurationObjectType.CHAT_TEMPLATE => "CHAT_TEMPLATES", PluginConfigurationObjectType.DATA_SOURCE => "DATA_SOURCES", PluginConfigurationObjectType.EMBEDDING_PROVIDER => "EMBEDDING_PROVIDERS", + PluginConfigurationObjectType.TRANSCRIPTION_PROVIDER => "TRANSCRIPTION_PROVIDERS", PluginConfigurationObjectType.PROFILE => "PROFILES", - + _ => null, }; @@ -102,6 +103,7 @@ public sealed record PluginConfigurationObject PluginConfigurationObjectType.LLM_PROVIDER => (Settings.Provider.TryParseProviderTable(i, luaObjectTable, configPluginId, out var configurationObject) && configurationObject != Settings.Provider.NONE, configurationObject), PluginConfigurationObjectType.CHAT_TEMPLATE => (ChatTemplate.TryParseChatTemplateTable(i, luaObjectTable, configPluginId, out var configurationObject) && configurationObject != ChatTemplate.NO_CHAT_TEMPLATE, configurationObject), PluginConfigurationObjectType.PROFILE => (Profile.TryParseProfileTable(i, luaObjectTable, configPluginId, out var configurationObject) && configurationObject != Profile.NO_PROFILE, configurationObject), + PluginConfigurationObjectType.TRANSCRIPTION_PROVIDER => (TranscriptionProvider.TryParseTranscriptionProviderTable(i, luaObjectTable, configPluginId, out var configurationObject) && configurationObject != TranscriptionProvider.NONE, configurationObject), PluginConfigurationObjectType.EMBEDDING_PROVIDER => (EmbeddingProvider.TryParseEmbeddingProviderTable(i, luaObjectTable, configPluginId, out var configurationObject) && configurationObject != EmbeddingProvider.NONE, configurationObject), _ => (false, NoConfigurationObject.INSTANCE) diff --git a/app/MindWork AI Studio/Tools/PluginSystem/PluginConfigurationObjectType.cs b/app/MindWork AI Studio/Tools/PluginSystem/PluginConfigurationObjectType.cs index 1cb4f604..82931873 100644 --- a/app/MindWork AI Studio/Tools/PluginSystem/PluginConfigurationObjectType.cs +++ b/app/MindWork AI Studio/Tools/PluginSystem/PluginConfigurationObjectType.cs @@ -4,10 +4,11 @@ public enum PluginConfigurationObjectType { NONE, UNKNOWN, - + PROFILE, DATA_SOURCE, LLM_PROVIDER, CHAT_TEMPLATE, EMBEDDING_PROVIDER, + TRANSCRIPTION_PROVIDER, } \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/PluginSystem/PluginFactory.Loading.cs b/app/MindWork AI Studio/Tools/PluginSystem/PluginFactory.Loading.cs index 14a5ab8e..2efbde97 100644 --- a/app/MindWork AI Studio/Tools/PluginSystem/PluginFactory.Loading.cs +++ b/app/MindWork AI Studio/Tools/PluginSystem/PluginFactory.Loading.cs @@ -133,6 +133,10 @@ public static partial class PluginFactory // Check LLM providers: var wasConfigurationChanged = PluginConfigurationObject.CleanLeftOverConfigurationObjects(PluginConfigurationObjectType.LLM_PROVIDER, x => x.Providers, AVAILABLE_PLUGINS, configObjectList); + // Check transcription providers: + if(PluginConfigurationObject.CleanLeftOverConfigurationObjects(PluginConfigurationObjectType.TRANSCRIPTION_PROVIDER, x => x.TranscriptionProviders, AVAILABLE_PLUGINS, configObjectList)) + wasConfigurationChanged = true; + // Check embedding providers: if(PluginConfigurationObject.CleanLeftOverConfigurationObjects(PluginConfigurationObjectType.EMBEDDING_PROVIDER, x => x.EmbeddingProviders, AVAILABLE_PLUGINS, configObjectList)) wasConfigurationChanged = true; @@ -169,6 +173,10 @@ public static partial class PluginFactory if(ManagedConfiguration.IsConfigurationLeftOver(x => x.App, x => x.EnabledPreviewFeatures, AVAILABLE_PLUGINS)) wasConfigurationChanged = true; + // Check for the transcription provider: + if(ManagedConfiguration.IsConfigurationLeftOver(x => x.App, x => x.UseTranscriptionProvider, AVAILABLE_PLUGINS)) + wasConfigurationChanged = true; + if (wasConfigurationChanged) { await SETTINGS_MANAGER.StoreSettings(); diff --git a/app/MindWork AI Studio/wwwroot/changelog/v26.1.1.md b/app/MindWork AI Studio/wwwroot/changelog/v26.1.1.md index 9cfe8a68..1f241e20 100644 --- a/app/MindWork AI Studio/wwwroot/changelog/v26.1.1.md +++ b/app/MindWork AI Studio/wwwroot/changelog/v26.1.1.md @@ -1,8 +1,9 @@ # v26.1.1, build 231 (2026-01-xx xx:xx UTC) - Added the option to attach files, including images, to chat templates. You can also define templates with file attachments through a configuration plugin. These file attachments aren’t copied—they’re re-read every time. That means the AI will pick up any updates you make to those files. - Added the option to use source code files in chats and document analysis. This supports software development workflows. -- Added a preview feature that lets you record your own voice in preparation for the transcription feature. The feature remains in development and appears only when the preview feature is enabled. -- Added the option to configure transcription providers (for example, using Whisper models). As usual, there can be local as well as cloud models configured. This option is part of the transcription preview and remains hidden until the preview is activated or the feature gets released. +- Added a preview feature that lets you record your own voice for transcription. The feature remains in development and appears only when the preview feature is enabled. +- Added the option to configure transcription providers (for example, using Whisper models). As usual, there can be local as well as cloud models configured. This option is part of the transcription preview and remains hidden until the preview is activated or the feature gets released. Transcription providers can be configured through configuration plugins as well. +- Added an option to the app settings to select a provider for transcribing dictated text. If no provider is selected, dictation and text transcription are disabled. - Added the option to configure embedding providers through a config plugin and distribute them within an organization. - Improved the app versioning. Starting in 2026, each version number includes the year, followed by the month. The last digit shows the release number for that month. For example, version `26.1.1` is the first release in January 2026. - Fixed a bug in the profile selection where the "Use no profile" entry could not be localized, causing English text to appear in languages such as German. This behavior has now been fixed.