From ce8948880c433c439bd144a2803a33edf9cab5f5 Mon Sep 17 00:00:00 2001 From: Thorsten Sommer Date: Fri, 9 Jan 2026 15:17:41 +0100 Subject: [PATCH] Add support for configurable transcription providers --- .../Settings/SettingsPanelTranscription.razor | 31 +++-- .../TranscriptionProviderDialog.razor.cs | 2 + .../Plugins/configuration/plugin.lua | 28 ++++- .../Settings/TranscriptionProvider.cs | 114 +++++++++++++++++- .../Tools/PluginSystem/PluginConfiguration.cs | 5 +- .../PluginSystem/PluginConfigurationObject.cs | 6 +- .../PluginConfigurationObjectType.cs | 3 +- .../PluginSystem/PluginFactory.Loading.cs | 6 +- .../wwwroot/changelog/v26.1.1.md | 2 +- 9 files changed, 172 insertions(+), 25 deletions(-) diff --git a/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor b/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor index fd707c5a..273bee0b 100644 --- a/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor +++ b/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor @@ -33,18 +33,27 @@ @context.Name @context.UsedLLMProvider.ToName() @GetTranscriptionProviderModelName(context) - + - - - - - - - - - - + + @if (context.IsEnterpriseConfiguration) + { + + + + } + else + { + + + + + + + + + + } diff --git a/app/MindWork AI Studio/Dialogs/TranscriptionProviderDialog.razor.cs b/app/MindWork AI Studio/Dialogs/TranscriptionProviderDialog.razor.cs index 01caa6af..b5b02e07 100644 --- a/app/MindWork AI Studio/Dialogs/TranscriptionProviderDialog.razor.cs +++ b/app/MindWork AI Studio/Dialogs/TranscriptionProviderDialog.razor.cs @@ -137,6 +137,8 @@ public partial class TranscriptionProviderDialog : MSGComponentBase, ISecretId IsSelfHosted = this.DataLLMProvider is LLMProviders.SELF_HOSTED, Hostname = cleanedHostname.EndsWith('/') ? cleanedHostname[..^1] : cleanedHostname, Host = this.DataHost, + IsEnterpriseConfiguration = false, + EnterpriseConfigurationPluginId = Guid.Empty, }; } diff --git a/app/MindWork AI Studio/Plugins/configuration/plugin.lua b/app/MindWork AI Studio/Plugins/configuration/plugin.lua index abecda7c..823669c0 100644 --- a/app/MindWork AI Studio/Plugins/configuration/plugin.lua +++ b/app/MindWork AI Studio/Plugins/configuration/plugin.lua @@ -53,11 +53,11 @@ CONFIG["LLM_PROVIDERS"][#CONFIG["LLM_PROVIDERS"]+1] = { ["Id"] = "00000000-0000-0000-0000-000000000000", ["InstanceName"] = "", ["UsedLLMProvider"] = "SELF_HOSTED", - + -- Allowed values for Host are: LM_STUDIO, LLAMACPP, OLLAMA, and VLLM ["Host"] = "OLLAMA", ["Hostname"] = "", - + -- Optional: Additional parameters for the API. -- Please refer to the documentation of the selected host for details. -- Might be something like ... \"temperature\": 0.5 ... for one parameter. @@ -70,6 +70,24 @@ CONFIG["LLM_PROVIDERS"][#CONFIG["LLM_PROVIDERS"]+1] = { } } +-- Transcription providers for voice-to-text functionality: +CONFIG["TRANSCRIPTION_PROVIDERS"] = {} + +-- An example of a transcription provider configuration: +-- CONFIG["TRANSCRIPTION_PROVIDERS"][#CONFIG["TRANSCRIPTION_PROVIDERS"]+1] = { +-- ["Id"] = "00000000-0000-0000-0000-000000000001", +-- ["Name"] = "", +-- ["UsedLLMProvider"] = "SELF_HOSTED", +-- +-- -- Allowed values for Host are: LM_STUDIO, LLAMACPP, OLLAMA, VLLM, and WHISPER_CPP +-- ["Host"] = "WHISPER_CPP", +-- ["Hostname"] = "", +-- ["Model"] = { +-- ["Id"] = "", +-- ["DisplayName"] = "", +-- } +-- } + CONFIG["SETTINGS"] = {} -- Configure the update check interval: @@ -101,6 +119,12 @@ CONFIG["SETTINGS"] = {} -- Please note: using an empty string ("") will lock the preselected profile selection, even though no valid preselected profile is found. -- CONFIG["SETTINGS"]["DataApp.PreselectedProfile"] = "00000000-0000-0000-0000-000000000000" +-- Configure the transcription provider for voice-to-text functionality. +-- It must be one of the transcription provider IDs defined in CONFIG["TRANSCRIPTION_PROVIDERS"]. +-- Without a selected transcription provider, dictation and transcription features will be disabled. +-- Please note: using an empty string ("") will lock the selection and disable dictation/transcription. +-- CONFIG["SETTINGS"]["DataApp.UseTranscriptionProvider"] = "00000000-0000-0000-0000-000000000000" + -- Example chat templates for this configuration: CONFIG["CHAT_TEMPLATES"] = {} diff --git a/app/MindWork AI Studio/Settings/TranscriptionProvider.cs b/app/MindWork AI Studio/Settings/TranscriptionProvider.cs index a7ce8957..02b20792 100644 --- a/app/MindWork AI Studio/Settings/TranscriptionProvider.cs +++ b/app/MindWork AI Studio/Settings/TranscriptionProvider.cs @@ -1,32 +1,134 @@ using System.Text.Json.Serialization; using AIStudio.Provider; +using AIStudio.Tools.PluginSystem; + +using Lua; using Host = AIStudio.Provider.SelfHosted.Host; namespace AIStudio.Settings; -public readonly record struct TranscriptionProvider( +public sealed record TranscriptionProvider( uint Num, string Id, string Name, LLMProviders UsedLLMProvider, Model Model, bool IsSelfHosted = false, + bool IsEnterpriseConfiguration = false, + Guid EnterpriseConfigurationPluginId = default, string Hostname = "http://localhost:1234", - Host Host = Host.NONE) : ISecretId + Host Host = Host.NONE) : ConfigurationBaseObject, ISecretId { + private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); + + public static readonly TranscriptionProvider NONE = new(); + + public TranscriptionProvider() : this( + 0, + Guid.Empty.ToString(), + string.Empty, + LLMProviders.NONE, + default, + false, + false, + Guid.Empty) + { + } + public override string ToString() => this.Name; - + #region Implementation of ISecretId - + /// [JsonIgnore] public string SecretId => this.Id; - + /// [JsonIgnore] public string SecretName => this.Name; - + #endregion + + public static bool TryParseTranscriptionProviderTable(int idx, LuaTable table, Guid configPluginId, out ConfigurationBaseObject provider) + { + provider = NONE; + if (!table.TryGetValue("Id", out var idValue) || !idValue.TryRead(out var idText) || !Guid.TryParse(idText, out var id)) + { + LOGGER.LogWarning($"The configured transcription provider {idx} does not contain a valid ID. The ID must be a valid GUID."); + return false; + } + + if (!table.TryGetValue("Name", out var nameValue) || !nameValue.TryRead(out var name)) + { + LOGGER.LogWarning($"The configured transcription provider {idx} does not contain a valid name."); + return false; + } + + if (!table.TryGetValue("UsedLLMProvider", out var usedLLMProviderValue) || !usedLLMProviderValue.TryRead(out var usedLLMProviderText) || !Enum.TryParse(usedLLMProviderText, true, out var usedLLMProvider)) + { + LOGGER.LogWarning($"The configured transcription provider {idx} does not contain a valid LLM provider enum value."); + return false; + } + + if (!table.TryGetValue("Host", out var hostValue) || !hostValue.TryRead(out var hostText) || !Enum.TryParse(hostText, true, out var host)) + { + LOGGER.LogWarning($"The configured transcription provider {idx} does not contain a valid host enum value."); + return false; + } + + if (!table.TryGetValue("Hostname", out var hostnameValue) || !hostnameValue.TryRead(out var hostname)) + { + LOGGER.LogWarning($"The configured transcription provider {idx} does not contain a valid hostname."); + return false; + } + + if (!table.TryGetValue("Model", out var modelValue) || !modelValue.TryRead(out var modelTable)) + { + LOGGER.LogWarning($"The configured transcription provider {idx} does not contain a valid model table."); + return false; + } + + if (!TryReadModelTable(idx, modelTable, out var model)) + { + LOGGER.LogWarning($"The configured transcription provider {idx} does not contain a valid model configuration."); + return false; + } + + provider = new TranscriptionProvider + { + Num = 0, + Id = id.ToString(), + Name = name, + UsedLLMProvider = usedLLMProvider, + Model = model, + IsSelfHosted = usedLLMProvider is LLMProviders.SELF_HOSTED, + IsEnterpriseConfiguration = true, + EnterpriseConfigurationPluginId = configPluginId, + Hostname = hostname, + Host = host, + }; + + return true; + } + + private static bool TryReadModelTable(int idx, LuaTable table, out Model model) + { + model = default; + if (!table.TryGetValue("Id", out var idValue) || !idValue.TryRead(out var id)) + { + LOGGER.LogWarning($"The configured transcription provider {idx} does not contain a valid model ID."); + return false; + } + + if (!table.TryGetValue("DisplayName", out var displayNameValue) || !displayNameValue.TryRead(out var displayName)) + { + LOGGER.LogWarning($"The configured transcription provider {idx} does not contain a valid model display name."); + return false; + } + + model = new(id, displayName); + return true; + } } \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/PluginSystem/PluginConfiguration.cs b/app/MindWork AI Studio/Tools/PluginSystem/PluginConfiguration.cs index 4ef1b7a5..3b3cb5f1 100644 --- a/app/MindWork AI Studio/Tools/PluginSystem/PluginConfiguration.cs +++ b/app/MindWork AI Studio/Tools/PluginSystem/PluginConfiguration.cs @@ -69,7 +69,10 @@ public sealed class PluginConfiguration(bool isInternal, LuaState state, PluginT // Handle configured LLM providers: PluginConfigurationObject.TryParse(PluginConfigurationObjectType.LLM_PROVIDER, x => x.Providers, x => x.NextProviderNum, mainTable, this.Id, ref this.configObjects, dryRun); - + + // Handle configured transcription providers: + PluginConfigurationObject.TryParse(PluginConfigurationObjectType.TRANSCRIPTION_PROVIDER, x => x.TranscriptionProviders, x => x.NextTranscriptionNum, mainTable, this.Id, ref this.configObjects, dryRun); + // Handle configured chat templates: PluginConfigurationObject.TryParse(PluginConfigurationObjectType.CHAT_TEMPLATE, x => x.ChatTemplates, x => x.NextChatTemplateNum, mainTable, this.Id, ref this.configObjects, dryRun); diff --git a/app/MindWork AI Studio/Tools/PluginSystem/PluginConfigurationObject.cs b/app/MindWork AI Studio/Tools/PluginSystem/PluginConfigurationObject.cs index f05a724b..2f04fd46 100644 --- a/app/MindWork AI Studio/Tools/PluginSystem/PluginConfigurationObject.cs +++ b/app/MindWork AI Studio/Tools/PluginSystem/PluginConfigurationObject.cs @@ -68,8 +68,9 @@ public sealed record PluginConfigurationObject PluginConfigurationObjectType.CHAT_TEMPLATE => "CHAT_TEMPLATES", PluginConfigurationObjectType.DATA_SOURCE => "DATA_SOURCES", PluginConfigurationObjectType.EMBEDDING_PROVIDER => "EMBEDDING_PROVIDERS", + PluginConfigurationObjectType.TRANSCRIPTION_PROVIDER => "TRANSCRIPTION_PROVIDERS", PluginConfigurationObjectType.PROFILE => "PROFILES", - + _ => null, }; @@ -102,7 +103,8 @@ public sealed record PluginConfigurationObject PluginConfigurationObjectType.LLM_PROVIDER => (Settings.Provider.TryParseProviderTable(i, luaObjectTable, configPluginId, out var configurationObject) && configurationObject != Settings.Provider.NONE, configurationObject), PluginConfigurationObjectType.CHAT_TEMPLATE => (ChatTemplate.TryParseChatTemplateTable(i, luaObjectTable, configPluginId, out var configurationObject) && configurationObject != ChatTemplate.NO_CHAT_TEMPLATE, configurationObject), PluginConfigurationObjectType.PROFILE => (Profile.TryParseProfileTable(i, luaObjectTable, configPluginId, out var configurationObject) && configurationObject != Profile.NO_PROFILE, configurationObject), - + PluginConfigurationObjectType.TRANSCRIPTION_PROVIDER => (TranscriptionProvider.TryParseTranscriptionProviderTable(i, luaObjectTable, configPluginId, out var configurationObject) && configurationObject != TranscriptionProvider.NONE, configurationObject), + _ => (false, NoConfigurationObject.INSTANCE) }; diff --git a/app/MindWork AI Studio/Tools/PluginSystem/PluginConfigurationObjectType.cs b/app/MindWork AI Studio/Tools/PluginSystem/PluginConfigurationObjectType.cs index 1cb4f604..82931873 100644 --- a/app/MindWork AI Studio/Tools/PluginSystem/PluginConfigurationObjectType.cs +++ b/app/MindWork AI Studio/Tools/PluginSystem/PluginConfigurationObjectType.cs @@ -4,10 +4,11 @@ public enum PluginConfigurationObjectType { NONE, UNKNOWN, - + PROFILE, DATA_SOURCE, LLM_PROVIDER, CHAT_TEMPLATE, EMBEDDING_PROVIDER, + TRANSCRIPTION_PROVIDER, } \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/PluginSystem/PluginFactory.Loading.cs b/app/MindWork AI Studio/Tools/PluginSystem/PluginFactory.Loading.cs index 5c494b42..38ff3977 100644 --- a/app/MindWork AI Studio/Tools/PluginSystem/PluginFactory.Loading.cs +++ b/app/MindWork AI Studio/Tools/PluginSystem/PluginFactory.Loading.cs @@ -132,7 +132,11 @@ public static partial class PluginFactory // Check LLM providers: var wasConfigurationChanged = PluginConfigurationObject.CleanLeftOverConfigurationObjects(PluginConfigurationObjectType.LLM_PROVIDER, x => x.Providers, AVAILABLE_PLUGINS, configObjectList); - + + // Check transcription providers: + if(PluginConfigurationObject.CleanLeftOverConfigurationObjects(PluginConfigurationObjectType.TRANSCRIPTION_PROVIDER, x => x.TranscriptionProviders, AVAILABLE_PLUGINS, configObjectList)) + wasConfigurationChanged = true; + // Check chat templates: if(PluginConfigurationObject.CleanLeftOverConfigurationObjects(PluginConfigurationObjectType.CHAT_TEMPLATE, x => x.ChatTemplates, AVAILABLE_PLUGINS, configObjectList)) wasConfigurationChanged = true; diff --git a/app/MindWork AI Studio/wwwroot/changelog/v26.1.1.md b/app/MindWork AI Studio/wwwroot/changelog/v26.1.1.md index 48bee6f0..f7287390 100644 --- a/app/MindWork AI Studio/wwwroot/changelog/v26.1.1.md +++ b/app/MindWork AI Studio/wwwroot/changelog/v26.1.1.md @@ -2,7 +2,7 @@ - Added the option to attach files, including images, to chat templates. You can also define templates with file attachments through a configuration plugin. These file attachments aren’t copied—they’re re-read every time. That means the AI will pick up any updates you make to those files. - Added the option to use source code files in chats and document analysis. This supports software development workflows. - Added a preview feature that lets you record your own voice for transcription. The feature remains in development and appears only when the preview feature is enabled. -- Added the option to configure transcription providers (for example, using Whisper models). As usual, there can be local as well as cloud models configured. This option is part of the transcription preview and remains hidden until the preview is activated or the feature gets released. +- Added the option to configure transcription providers (for example, using Whisper models). As usual, there can be local as well as cloud models configured. This option is part of the transcription preview and remains hidden until the preview is activated or the feature gets released. Transcription providers can be configured through configuration plugins as well. - Added an option to the app settings to select a provider for transcribing dictated text. If no provider is selected, dictation and text transcription are disabled. - Improved the app versioning. Starting in 2026, each version number includes the year, followed by the month. The last digit shows the release number for that month. For example, version `26.1.1` is the first release in January 2026. - Fixed a bug in the profile selection where the "Use no profile" entry could not be localized, causing English text to appear in languages such as German. This behavior has now been fixed.