diff --git a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua index c2429c15..7eba84e5 100644 --- a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua +++ b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua @@ -2242,6 +2242,54 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELPROVIDERS::T853225 -- Provider UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELPROVIDERS::T900237532"] = "Provider" +-- No transcription provider configured yet. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T1079350363"] = "No transcription provider configured yet." + +-- Edit Transcription Provider +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T1317362918"] = "Edit Transcription Provider" + +-- Delete +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T1469573738"] = "Delete" + +-- Configure Transcription +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T1622062299"] = "Configure Transcription" + +-- Add transcription provider +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T1645238629"] = "Add transcription provider" + +-- Add Transcription Provider +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T2066315685"] = "Add Transcription Provider" + +-- Model +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T2189814010"] = "Model" + +-- Name +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T266367750"] = "Name" + +-- Edit +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T3267849393"] = "Edit" + +-- Delete Transcription Provider +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T370103955"] = "Delete Transcription Provider" + +-- Actions +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T3865031940"] = "Actions" + +-- Configured Transcription +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T4138223521"] = "Configured Transcription" + +-- Open Dashboard +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T78223861"] = "Open Dashboard" + +-- Are you sure you want to delete the transcription provider '{0}'? +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T789660305"] = "Are you sure you want to delete the transcription provider '{0}'?" + +-- With the support of transcription models, MindWork AI Studio can convert human speech into text. This is useful, for example, when you need to dictate text. You can choose from dedicated transcription models, but not multimodal LLMs (large language models) that can handle both speech and text. The configuration of multimodal models is done in the \"Configure providers\" section. +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T799338148"] = "With the support of transcription models, MindWork AI Studio can convert human speech into text. This is useful, for example, when you need to dictate text. You can choose from dedicated transcription models, but not multimodal LLMs (large language models) that can handle both speech and text. The configuration of multimodal models is done in the \\\"Configure providers\\\" section." + +-- Provider +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T900237532"] = "Provider" + -- Copy {0} to the clipboard UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::TEXTINFOLINE::T2206391442"] = "Copy {0} to the clipboard" @@ -3106,9 +3154,6 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2189814010"] = "Mo -- (Optional) API Key UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2331453405"] = "(Optional) API Key" --- Currently, we cannot query the embedding models of self-hosted systems. Therefore, enter the model name manually. -UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2615586687"] = "Currently, we cannot query the embedding models of self-hosted systems. Therefore, enter the model name manually." - -- Add UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2646845972"] = "Add" @@ -3118,6 +3163,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2810182573"] = "No -- Instance Name UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2842060373"] = "Instance Name" +-- Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually. +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T290547799"] = "Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually." + -- Model selection UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T416738168"] = "Model selection" @@ -3328,6 +3376,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T3361153305"] = "Show Expert -- Show available models UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T3763891899"] = "Show available models" +-- Currently, we cannot query the models for the selected provider and/or host. Therefore, please enter the model name manually. +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T4116737656"] = "Currently, we cannot query the models for the selected provider and/or host. Therefore, please enter the model name manually." + -- Model selection UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T416738168"] = "Model selection" @@ -4504,6 +4555,60 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SINGLEINPUTDIALOG::T4030229154"] = "Your Inp -- Cancel UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SINGLEINPUTDIALOG::T900713019"] = "Cancel" +-- Failed to store the API key in the operating system. The message was: {0}. Please try again. +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T1122745046"] = "Failed to store the API key in the operating system. The message was: {0}. Please try again." + +-- API Key +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T1324664716"] = "API Key" + +-- Create account +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T1356621346"] = "Create account" + +-- Currently, we cannot query the transcription models for the selected provider and/or host. Therefore, please enter the model name manually. +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T1381635232"] = "Currently, we cannot query the transcription models for the selected provider and/or host. Therefore, please enter the model name manually." + +-- Hostname +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T1727440780"] = "Hostname" + +-- Load +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T1756340745"] = "Load" + +-- Update +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T1847791252"] = "Update" + +-- Failed to load the API key from the operating system. The message was: {0}. You might ignore this message and provide the API key again. +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T1870831108"] = "Failed to load the API key from the operating system. The message was: {0}. You might ignore this message and provide the API key again." + +-- Model +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T2189814010"] = "Model" + +-- (Optional) API Key +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T2331453405"] = "(Optional) API Key" + +-- Add +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T2646845972"] = "Add" + +-- No models loaded or available. +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T2810182573"] = "No models loaded or available." + +-- Instance Name +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T2842060373"] = "Instance Name" + +-- Please enter a transcription model name. +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T3703662664"] = "Please enter a transcription model name." + +-- Model selection +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T416738168"] = "Model selection" + +-- Host +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T808120719"] = "Host" + +-- Provider +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T900237532"] = "Provider" + +-- Cancel +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T900713019"] = "Cancel" + -- Install now UI_TEXT_CONTENT["AISTUDIO::DIALOGS::UPDATEDIALOG::T2366359512"] = "Install now" @@ -5206,6 +5311,9 @@ UI_TEXT_CONTENT["AISTUDIO::PROVIDER::LLMPROVIDERSEXTENSIONS::T3424652889"] = "Un -- no model selected UI_TEXT_CONTENT["AISTUDIO::PROVIDER::MODEL::T2234274832"] = "no model selected" +-- Model as configured by whisper.cpp +UI_TEXT_CONTENT["AISTUDIO::PROVIDER::SELFHOSTED::PROVIDERSELFHOSTED::T3313940770"] = "Model as configured by whisper.cpp" + -- Use no chat template UI_TEXT_CONTENT["AISTUDIO::SETTINGS::CHATTEMPLATE::T4258819635"] = "Use no chat template" @@ -5374,15 +5482,15 @@ UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T1848 -- Plugins: Preview of our plugin system where you can extend the functionality of the app UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T2056842933"] = "Plugins: Preview of our plugin system where you can extend the functionality of the app" --- Speech to Text: Preview of our speech to text system where you can transcribe recordings and audio files into text -UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T221133923"] = "Speech to Text: Preview of our speech to text system where you can transcribe recordings and audio files into text" - -- RAG: Preview of our RAG implementation where you can refer your files or integrate enterprise data within your company UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T2708939138"] = "RAG: Preview of our RAG implementation where you can refer your files or integrate enterprise data within your company" -- Unknown preview feature UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T2722827307"] = "Unknown preview feature" +-- Transcription: Preview of our speech to text system where you can transcribe recordings and audio files into text +UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T714355911"] = "Transcription: Preview of our speech to text system where you can transcribe recordings and audio files into text" + -- Use no data sources, when sending an assistant result to a chat UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::SENDTOCHATDATASOURCEBEHAVIOREXTENSIONS::T1223925477"] = "Use no data sources, when sending an assistant result to a chat" diff --git a/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor b/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor new file mode 100644 index 00000000..e9f1575b --- /dev/null +++ b/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor @@ -0,0 +1,64 @@ +@using AIStudio.Provider +@using AIStudio.Settings.DataModel +@inherits SettingsPanelBase + +@if (PreviewFeatures.PRE_SPEECH_TO_TEXT_2026.IsEnabled(this.SettingsManager)) +{ + + + + @T("Configured Transcription") + + + @T("With the support of transcription models, MindWork AI Studio can convert human speech into text. This is useful, for example, when you need to dictate text. You can choose from dedicated transcription models, but not multimodal LLMs (large language models) that can handle both speech and text. The configuration of multimodal models is done in the \"Configure providers\" section.") + + + + + + + + + + + + # + @T("Name") + @T("Provider") + @T("Model") + @T("Actions") + + + @context.Num + @context.Name + @context.UsedLLMProvider.ToName() + @GetTranscriptionProviderModelName(context) + + + + + + + + + + + + + + + + + + @if (this.SettingsManager.ConfigurationData.TranscriptionProviders.Count == 0) + { + + @T("No transcription provider configured yet.") + + } + + + @T("Add transcription provider") + + +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor.cs b/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor.cs new file mode 100644 index 00000000..b9e699f7 --- /dev/null +++ b/app/MindWork AI Studio/Components/Settings/SettingsPanelTranscription.razor.cs @@ -0,0 +1,122 @@ +using AIStudio.Dialogs; +using AIStudio.Settings; + +using Microsoft.AspNetCore.Components; + +using DialogOptions = AIStudio.Dialogs.DialogOptions; + +namespace AIStudio.Components.Settings; + +public partial class SettingsPanelTranscription : SettingsPanelBase +{ + [Parameter] + public List> AvailableTranscriptionProviders { get; set; } = new(); + + [Parameter] + public EventCallback>> AvailableTranscriptionProvidersChanged { get; set; } + + private static string GetTranscriptionProviderModelName(TranscriptionProvider provider) + { + const int MAX_LENGTH = 36; + var modelName = provider.Model.ToString(); + return modelName.Length > MAX_LENGTH ? "[...] " + modelName[^Math.Min(MAX_LENGTH, modelName.Length)..] : modelName; + } + + #region Overrides of ComponentBase + + protected override async Task OnInitializedAsync() + { + await this.UpdateTranscriptionProviders(); + await base.OnInitializedAsync(); + } + + #endregion + + private async Task AddTranscriptionProvider() + { + var dialogParameters = new DialogParameters + { + { x => x.IsEditing, false }, + }; + + var dialogReference = await this.DialogService.ShowAsync(T("Add Transcription Provider"), dialogParameters, DialogOptions.FULLSCREEN); + var dialogResult = await dialogReference.Result; + if (dialogResult is null || dialogResult.Canceled) + return; + + var addedTranscription = (TranscriptionProvider)dialogResult.Data!; + addedTranscription = addedTranscription with { Num = this.SettingsManager.ConfigurationData.NextTranscriptionNum++ }; + + this.SettingsManager.ConfigurationData.TranscriptionProviders.Add(addedTranscription); + await this.UpdateTranscriptionProviders(); + + await this.SettingsManager.StoreSettings(); + await this.MessageBus.SendMessage(this, Event.CONFIGURATION_CHANGED); + } + + private async Task EditTranscriptionProvider(TranscriptionProvider transcriptionProvider) + { + var dialogParameters = new DialogParameters + { + { x => x.DataNum, transcriptionProvider.Num }, + { x => x.DataId, transcriptionProvider.Id }, + { x => x.DataName, transcriptionProvider.Name }, + { x => x.DataLLMProvider, transcriptionProvider.UsedLLMProvider }, + { x => x.DataModel, transcriptionProvider.Model }, + { x => x.DataHostname, transcriptionProvider.Hostname }, + { x => x.IsSelfHosted, transcriptionProvider.IsSelfHosted }, + { x => x.IsEditing, true }, + { x => x.DataHost, transcriptionProvider.Host }, + }; + + var dialogReference = await this.DialogService.ShowAsync(T("Edit Transcription Provider"), dialogParameters, DialogOptions.FULLSCREEN); + var dialogResult = await dialogReference.Result; + if (dialogResult is null || dialogResult.Canceled) + return; + + var editedTranscriptionProvider = (TranscriptionProvider)dialogResult.Data!; + + // Set the provider number if it's not set. This is important for providers + // added before we started saving the provider number. + if(editedTranscriptionProvider.Num == 0) + editedTranscriptionProvider = editedTranscriptionProvider with { Num = this.SettingsManager.ConfigurationData.NextTranscriptionNum++ }; + + this.SettingsManager.ConfigurationData.TranscriptionProviders[this.SettingsManager.ConfigurationData.TranscriptionProviders.IndexOf(transcriptionProvider)] = editedTranscriptionProvider; + await this.UpdateTranscriptionProviders(); + + await this.SettingsManager.StoreSettings(); + await this.MessageBus.SendMessage(this, Event.CONFIGURATION_CHANGED); + } + + private async Task DeleteTranscriptionProvider(TranscriptionProvider provider) + { + var dialogParameters = new DialogParameters + { + { x => x.Message, string.Format(T("Are you sure you want to delete the transcription provider '{0}'?"), provider.Name) }, + }; + + var dialogReference = await this.DialogService.ShowAsync(T("Delete Transcription Provider"), dialogParameters, DialogOptions.FULLSCREEN); + var dialogResult = await dialogReference.Result; + if (dialogResult is null || dialogResult.Canceled) + return; + + var deleteSecretResponse = await this.RustService.DeleteAPIKey(provider); + if(deleteSecretResponse.Success) + { + this.SettingsManager.ConfigurationData.TranscriptionProviders.Remove(provider); + await this.SettingsManager.StoreSettings(); + } + + await this.UpdateTranscriptionProviders(); + await this.MessageBus.SendMessage(this, Event.CONFIGURATION_CHANGED); + } + + private async Task UpdateTranscriptionProviders() + { + this.AvailableTranscriptionProviders.Clear(); + foreach (var provider in this.SettingsManager.ConfigurationData.TranscriptionProviders) + this.AvailableTranscriptionProviders.Add(new (provider.Name, provider.Id)); + + await this.AvailableTranscriptionProvidersChanged.InvokeAsync(this.AvailableTranscriptionProviders); + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Dialogs/ProviderDialog.razor b/app/MindWork AI Studio/Dialogs/ProviderDialog.razor index 3b6fc209..dc92f441 100644 --- a/app/MindWork AI Studio/Dialogs/ProviderDialog.razor +++ b/app/MindWork AI Studio/Dialogs/ProviderDialog.razor @@ -44,9 +44,12 @@ @foreach (Host host in Enum.GetValues(typeof(Host))) { - - @host.Name() - + @if (host.IsChatSupported()) + { + + @host.Name() + + } } } diff --git a/app/MindWork AI Studio/Dialogs/TranscriptionProviderDialog.razor b/app/MindWork AI Studio/Dialogs/TranscriptionProviderDialog.razor new file mode 100644 index 00000000..5c862365 --- /dev/null +++ b/app/MindWork AI Studio/Dialogs/TranscriptionProviderDialog.razor @@ -0,0 +1,140 @@ +@using AIStudio.Provider +@using AIStudio.Provider.SelfHosted +@inherits MSGComponentBase + + + + + + @* ReSharper disable once CSharpWarnings::CS8974 *@ + + @foreach (LLMProviders provider in Enum.GetValues(typeof(LLMProviders))) + { + if (provider.ProvideTranscriptionAPI() || provider is LLMProviders.NONE) + { + + @provider.ToName() + + } + } + + + @T("Create account") + + + + @if (this.DataLLMProvider.IsAPIKeyNeeded(this.DataHost)) + { + + } + + @if (this.DataLLMProvider.IsHostnameNeeded()) + { + + } + + @if (this.DataLLMProvider.IsHostNeeded()) + { + + @foreach (Host host in Enum.GetValues(typeof(Host))) + { + if (host.IsTranscriptionSupported()) + { + + @host.Name() + + } + } + + } + + + + @if (this.DataLLMProvider.IsTranscriptionModelProvidedManually(this.DataHost)) + { + + } + else + { + + @T("Load") + + @if(this.availableModels.Count is 0) + { + + @T("No models loaded or available.") + + } + else + { + + @foreach (var model in this.availableModels) + { + + @model + + } + + } + } + + + + @* ReSharper disable once CSharpWarnings::CS8974 *@ + + + + + + + + @T("Cancel") + + + @if(this.IsEditing) + { + @T("Update") + } + else + { + @T("Add") + } + + + \ No newline at end of file diff --git a/app/MindWork AI Studio/Dialogs/TranscriptionProviderDialog.razor.cs b/app/MindWork AI Studio/Dialogs/TranscriptionProviderDialog.razor.cs new file mode 100644 index 00000000..01caa6af --- /dev/null +++ b/app/MindWork AI Studio/Dialogs/TranscriptionProviderDialog.razor.cs @@ -0,0 +1,272 @@ +using AIStudio.Components; +using AIStudio.Provider; +using AIStudio.Settings; +using AIStudio.Tools.Services; +using AIStudio.Tools.Validation; + +using Microsoft.AspNetCore.Components; + +using Host = AIStudio.Provider.SelfHosted.Host; + +namespace AIStudio.Dialogs; + +public partial class TranscriptionProviderDialog : MSGComponentBase, ISecretId +{ + [CascadingParameter] + private IMudDialogInstance MudDialog { get; set; } = null!; + + /// + /// The transcription provider's number in the list. + /// + [Parameter] + public uint DataNum { get; set; } + + /// + /// The transcription provider's ID. + /// + [Parameter] + public string DataId { get; set; } = Guid.NewGuid().ToString(); + + /// + /// The user chosen name. + /// + [Parameter] + public string DataName { get; set; } = string.Empty; + + /// + /// The chosen hostname for self-hosted providers. + /// + [Parameter] + public string DataHostname { get; set; } = string.Empty; + + /// + /// The host to use, e.g., llama.cpp. + /// + [Parameter] + public Host DataHost { get; set; } = Host.NONE; + + /// + /// Is this provider self-hosted? + /// + [Parameter] + public bool IsSelfHosted { get; set; } + + /// + /// The provider to use. + /// + [Parameter] + public LLMProviders DataLLMProvider { get; set; } = LLMProviders.NONE; + + /// + /// The transcription model to use. + /// + [Parameter] + public Model DataModel { get; set; } + + /// + /// Should the dialog be in editing mode? + /// + [Parameter] + public bool IsEditing { get; init; } + + [Inject] + private RustService RustService { get; init; } = null!; + + private static readonly Dictionary SPELLCHECK_ATTRIBUTES = new(); + + /// + /// The list of used instance names. We need this to check for uniqueness. + /// + private List UsedInstanceNames { get; set; } = []; + + private bool dataIsValid; + private string[] dataIssues = []; + private string dataAPIKey = string.Empty; + private string dataManuallyModel = string.Empty; + private string dataAPIKeyStorageIssue = string.Empty; + private string dataEditingPreviousInstanceName = string.Empty; + + // We get the form reference from Blazor code to validate it manually: + private MudForm form = null!; + + private readonly List availableModels = new(); + private readonly Encryption encryption = Program.ENCRYPTION; + private readonly ProviderValidation providerValidation; + + public TranscriptionProviderDialog() + { + this.providerValidation = new() + { + GetProvider = () => this.DataLLMProvider, + GetAPIKeyStorageIssue = () => this.dataAPIKeyStorageIssue, + GetPreviousInstanceName = () => this.dataEditingPreviousInstanceName, + GetUsedInstanceNames = () => this.UsedInstanceNames, + GetHost = () => this.DataHost, + }; + } + + private TranscriptionProvider CreateTranscriptionProviderSettings() + { + var cleanedHostname = this.DataHostname.Trim(); + Model model = default; + if(this.DataLLMProvider is LLMProviders.SELF_HOSTED) + { + switch (this.DataHost) + { + case Host.OLLAMA: + model = new Model(this.dataManuallyModel, null); + break; + + case Host.VLLM: + case Host.LM_STUDIO: + case Host.WHISPER_CPP: + model = this.DataModel; + break; + } + } + else + model = this.DataModel; + + return new() + { + Num = this.DataNum, + Id = this.DataId, + Name = this.DataName, + UsedLLMProvider = this.DataLLMProvider, + Model = model, + IsSelfHosted = this.DataLLMProvider is LLMProviders.SELF_HOSTED, + Hostname = cleanedHostname.EndsWith('/') ? cleanedHostname[..^1] : cleanedHostname, + Host = this.DataHost, + }; + } + + #region Overrides of ComponentBase + + protected override async Task OnInitializedAsync() + { + // Configure the spellchecking for the instance name input: + this.SettingsManager.InjectSpellchecking(SPELLCHECK_ATTRIBUTES); + + // Load the used instance names: + this.UsedInstanceNames = this.SettingsManager.ConfigurationData.TranscriptionProviders.Select(x => x.Name.ToLowerInvariant()).ToList(); + + // When editing, we need to load the data: + if(this.IsEditing) + { + this.dataEditingPreviousInstanceName = this.DataName.ToLowerInvariant(); + + // When using self-hosted models, we must copy the model name: + if (this.DataLLMProvider is LLMProviders.SELF_HOSTED) + this.dataManuallyModel = this.DataModel.Id; + + // + // We cannot load the API key for self-hosted providers: + // + if (this.DataLLMProvider is LLMProviders.SELF_HOSTED && this.DataHost is not Host.OLLAMA) + { + await this.ReloadModels(); + await base.OnInitializedAsync(); + return; + } + + // Load the API key: + var requestedSecret = await this.RustService.GetAPIKey(this, isTrying: this.DataLLMProvider is LLMProviders.SELF_HOSTED); + if (requestedSecret.Success) + this.dataAPIKey = await requestedSecret.Secret.Decrypt(this.encryption); + else + { + this.dataAPIKey = string.Empty; + if (this.DataLLMProvider is not LLMProviders.SELF_HOSTED) + { + this.dataAPIKeyStorageIssue = string.Format(T("Failed to load the API key from the operating system. The message was: {0}. You might ignore this message and provide the API key again."), requestedSecret.Issue); + await this.form.Validate(); + } + } + + await this.ReloadModels(); + } + + await base.OnInitializedAsync(); + } + + protected override async Task OnAfterRenderAsync(bool firstRender) + { + // Reset the validation when not editing and on the first render. + // We don't want to show validation errors when the user opens the dialog. + if(!this.IsEditing && firstRender) + this.form.ResetValidation(); + + await base.OnAfterRenderAsync(firstRender); + } + + #endregion + + #region Implementation of ISecretId + + public string SecretId => this.DataId; + + public string SecretName => this.DataName; + + #endregion + + private async Task Store() + { + await this.form.Validate(); + this.dataAPIKeyStorageIssue = string.Empty; + + // When the data is not valid, we don't store it: + if (!this.dataIsValid) + return; + + // Use the data model to store the provider. + // We just return this data to the parent component: + var addedProviderSettings = this.CreateTranscriptionProviderSettings(); + if (!string.IsNullOrWhiteSpace(this.dataAPIKey)) + { + // Store the API key in the OS secure storage: + var storeResponse = await this.RustService.SetAPIKey(this, this.dataAPIKey); + if (!storeResponse.Success) + { + this.dataAPIKeyStorageIssue = string.Format(T("Failed to store the API key in the operating system. The message was: {0}. Please try again."), storeResponse.Issue); + await this.form.Validate(); + return; + } + } + + this.MudDialog.Close(DialogResult.Ok(addedProviderSettings)); + } + + private string? ValidateManuallyModel(string manuallyModel) + { + if (this.DataLLMProvider is LLMProviders.SELF_HOSTED && string.IsNullOrWhiteSpace(manuallyModel)) + return T("Please enter a transcription model name."); + + return null; + } + + private void Cancel() => this.MudDialog.Cancel(); + + private async Task ReloadModels() + { + var currentTranscriptionProviderSettings = this.CreateTranscriptionProviderSettings(); + var provider = currentTranscriptionProviderSettings.CreateProvider(); + if(provider is NoProvider) + return; + + var models = await provider.GetTranscriptionModels(this.dataAPIKey); + + // Order descending by ID means that the newest models probably come first: + var orderedModels = models.OrderByDescending(n => n.Id); + + this.availableModels.Clear(); + this.availableModels.AddRange(orderedModels); + } + + private string APIKeyText => this.DataLLMProvider switch + { + LLMProviders.SELF_HOSTED => T("(Optional) API Key"), + _ => T("API Key"), + }; + + private bool IsNoneProvider => this.DataLLMProvider is LLMProviders.NONE; +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Pages/Settings.razor b/app/MindWork AI Studio/Pages/Settings.razor index 7931453f..70201807 100644 --- a/app/MindWork AI Studio/Pages/Settings.razor +++ b/app/MindWork AI Studio/Pages/Settings.razor @@ -15,6 +15,11 @@ } + @if (PreviewFeatures.PRE_SPEECH_TO_TEXT_2026.IsEnabled(this.SettingsManager)) + { + + } + @if (PreviewFeatures.PRE_RAG_2024.IsEnabled(this.SettingsManager)) diff --git a/app/MindWork AI Studio/Pages/Settings.razor.cs b/app/MindWork AI Studio/Pages/Settings.razor.cs index 89c7338b..561ebb46 100644 --- a/app/MindWork AI Studio/Pages/Settings.razor.cs +++ b/app/MindWork AI Studio/Pages/Settings.razor.cs @@ -9,6 +9,7 @@ public partial class Settings : MSGComponentBase { private List> availableLLMProviders = new(); private List> availableEmbeddingProviders = new(); + private List> availableTranscriptionProviders = new(); #region Overrides of ComponentBase diff --git a/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs b/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs index dacfeea5..6b648372 100644 --- a/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs +++ b/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs @@ -131,8 +131,17 @@ public sealed class ProviderAlibabaCloud() : BaseProvider(LLMProviders.ALIBABA_C return this.LoadModels(["text-embedding-"], token, apiKeyProvisional).ContinueWith(t => t.Result.Concat(additionalModels).OrderBy(x => x.Id).AsEnumerable(), token); } - - + + #region Overrides of BaseProvider + + /// + public override Task> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default) + { + return Task.FromResult(Enumerable.Empty()); + } + + #endregion + #endregion private async Task> LoadModels(string[] prefixes, CancellationToken token, string? apiKeyProvisional = null) diff --git a/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs b/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs index 7f02781b..42268936 100644 --- a/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs +++ b/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs @@ -136,6 +136,12 @@ public sealed class ProviderAnthropic() : BaseProvider(LLMProviders.ANTHROPIC, " return Task.FromResult(Enumerable.Empty()); } + /// + public override Task> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default) + { + return Task.FromResult(Enumerable.Empty()); + } + #endregion private async Task> LoadModels(CancellationToken token, string? apiKeyProvisional = null) diff --git a/app/MindWork AI Studio/Provider/BaseProvider.cs b/app/MindWork AI Studio/Provider/BaseProvider.cs index 62464910..c5594087 100644 --- a/app/MindWork AI Studio/Provider/BaseProvider.cs +++ b/app/MindWork AI Studio/Provider/BaseProvider.cs @@ -97,6 +97,9 @@ public abstract class BaseProvider : IProvider, ISecretId /// public abstract Task> GetEmbeddingModels(string? apiKeyProvisional = null, CancellationToken token = default); + + /// + public abstract Task> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default); #endregion diff --git a/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs b/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs index 4b597601..b2715f47 100644 --- a/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs +++ b/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs @@ -99,6 +99,11 @@ public sealed class ProviderDeepSeek() : BaseProvider(LLMProviders.DEEP_SEEK, "h return Task.FromResult(Enumerable.Empty()); } + /// + public override Task> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default) + { + return Task.FromResult(Enumerable.Empty()); + } #endregion diff --git a/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs b/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs index 5e056674..9450134d 100644 --- a/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs +++ b/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs @@ -100,5 +100,16 @@ public class ProviderFireworks() : BaseProvider(LLMProviders.FIREWORKS, "https:/ return Task.FromResult(Enumerable.Empty()); } + /// + public override Task> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default) + { + return Task.FromResult>( + new List + { + new("whisper-v3", "Whisper v3"), + new("whisper-v3-turbo", "Whisper v3 Turbo"), + }); + } + #endregion } \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs b/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs index deecd6d5..da322942 100644 --- a/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs +++ b/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs @@ -101,6 +101,17 @@ public sealed class ProviderGWDG() : BaseProvider(LLMProviders.GWDG, "https://ch return models.Where(model => model.Id.StartsWith("e5-", StringComparison.InvariantCultureIgnoreCase)); } + /// + public override Task> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default) + { + // Source: https://docs.hpc.gwdg.de/services/saia/index.html#voice-to-text + return Task.FromResult>( + new List + { + new("whisper-large-v2", "Whisper v2 Large"), + }); + } + #endregion private async Task> LoadModels(CancellationToken token, string? apiKeyProvisional = null) diff --git a/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs b/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs index a53368f9..fce1a451 100644 --- a/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs +++ b/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs @@ -112,6 +112,11 @@ public class ProviderGoogle() : BaseProvider(LLMProviders.GOOGLE, "https://gener .Select(n => new Provider.Model(n.Name.Replace("models/", string.Empty), n.DisplayName)); } + /// + public override Task> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default) + { + return Task.FromResult(Enumerable.Empty()); + } #endregion diff --git a/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs b/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs index 60d449b0..b6e9137a 100644 --- a/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs +++ b/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs @@ -100,6 +100,12 @@ public class ProviderGroq() : BaseProvider(LLMProviders.GROQ, "https://api.groq. return Task.FromResult(Enumerable.Empty()); } + /// + public override Task> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default) + { + return Task.FromResult(Enumerable.Empty()); + } + #endregion private async Task> LoadModels(CancellationToken token, string? apiKeyProvisional = null) diff --git a/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs b/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs index 07263d39..213bf075 100644 --- a/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs +++ b/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs @@ -105,6 +105,12 @@ public sealed class ProviderHelmholtz() : BaseProvider(LLMProviders.HELMHOLTZ, " model.Id.Contains("gritlm", StringComparison.InvariantCultureIgnoreCase)); } + /// + public override Task> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default) + { + return Task.FromResult(Enumerable.Empty()); + } + #endregion private async Task> LoadModels(CancellationToken token, string? apiKeyProvisional = null) diff --git a/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs b/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs index cfd2346c..794b4f42 100644 --- a/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs +++ b/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs @@ -104,5 +104,11 @@ public sealed class ProviderHuggingFace : BaseProvider return Task.FromResult(Enumerable.Empty()); } + /// + public override Task> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default) + { + return Task.FromResult(Enumerable.Empty()); + } + #endregion } \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/IProvider.cs b/app/MindWork AI Studio/Provider/IProvider.cs index 41d9b37d..4ae6dc6c 100644 --- a/app/MindWork AI Studio/Provider/IProvider.cs +++ b/app/MindWork AI Studio/Provider/IProvider.cs @@ -74,4 +74,11 @@ public interface IProvider /// The list of embedding models. public Task> GetEmbeddingModels(string? apiKeyProvisional = null, CancellationToken token = default); + /// + /// Load all possible transcription models that can be used with this provider. + /// + /// The provisional API key to use. Useful when the user is adding a new provider. When null, the stored API key is used. + /// >The cancellation token. + /// >The list of transcription models. + public Task> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default); } \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/LLMProvidersExtensions.cs b/app/MindWork AI Studio/Provider/LLMProvidersExtensions.cs index 6060644d..ea548923 100644 --- a/app/MindWork AI Studio/Provider/LLMProvidersExtensions.cs +++ b/app/MindWork AI Studio/Provider/LLMProvidersExtensions.cs @@ -141,6 +141,43 @@ public static class LLMProvidersExtensions _ => false, }; + + public static bool ProvideTranscriptionAPI(this LLMProviders llmProvider) => llmProvider switch + { + // + // Providers that support transcription: + // + LLMProviders.OPEN_AI => true, + LLMProviders.MISTRAL => true, + LLMProviders.FIREWORKS => true, + LLMProviders.GWDG => true, + + // + // Providers that support transcription but provide no OpenAI-compatible API yet: + // + LLMProviders.ALIBABA_CLOUD => false, + LLMProviders.GOOGLE => false, + + // + // Providers that do not support transcription: + // + LLMProviders.OPEN_ROUTER => false, + LLMProviders.GROQ => false, + LLMProviders.ANTHROPIC => false, + LLMProviders.X => false, + LLMProviders.DEEP_SEEK => false, + LLMProviders.HUGGINGFACE => false, + LLMProviders.PERPLEXITY => false, + + LLMProviders.HELMHOLTZ => false, + + // + // Self-hosted providers are treated as a special case anyway. + // + LLMProviders.SELF_HOSTED => true, + + _ => false, + }; /// /// Creates a new provider instance based on the provider value. @@ -162,6 +199,16 @@ public static class LLMProvidersExtensions return embeddingProviderSettings.UsedLLMProvider.CreateProvider(embeddingProviderSettings.Name, embeddingProviderSettings.Host, embeddingProviderSettings.Hostname, embeddingProviderSettings.Model, HFInferenceProvider.NONE); } + /// + /// Creates a new provider instance based on the speech provider value. + /// + /// The speech provider settings. + /// The provider instance. + public static IProvider CreateProvider(this TranscriptionProvider transcriptionProviderSettings) + { + return transcriptionProviderSettings.UsedLLMProvider.CreateProvider(transcriptionProviderSettings.Name, transcriptionProviderSettings.Host, transcriptionProviderSettings.Hostname, transcriptionProviderSettings.Model, HFInferenceProvider.NONE); + } + private static IProvider CreateProvider(this LLMProviders provider, string instanceName, Host host, string hostname, Model model, HFInferenceProvider inferenceProvider, string expertProviderApiParameter = "") { try @@ -274,6 +321,11 @@ public static class LLMProvidersExtensions LLMProviders.SELF_HOSTED => host is not Host.LM_STUDIO, _ => false, }; + + public static bool IsTranscriptionModelProvidedManually(this LLMProviders provider, Host host) => provider switch + { + _ => false, + }; public static bool IsHostNeeded(this LLMProviders provider) => provider switch { @@ -345,6 +397,7 @@ public static class LLMProvidersExtensions case Host.OLLAMA: case Host.LM_STUDIO: case Host.VLLM: + case Host.WHISPER_CPP: return true; } } diff --git a/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs b/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs index 522757ea..598f7016 100644 --- a/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs +++ b/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs @@ -113,6 +113,17 @@ public sealed class ProviderMistral() : BaseProvider(LLMProviders.MISTRAL, "http return Task.FromResult(Enumerable.Empty()); } + /// + public override Task> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default) + { + // Source: https://docs.mistral.ai/capabilities/audio_transcription + return Task.FromResult>( + new List + { + new("voxtral-mini-latest", "Voxtral Mini Latest"), + }); + } + #endregion private async Task LoadModelList(string? apiKeyProvisional, CancellationToken token) diff --git a/app/MindWork AI Studio/Provider/NoProvider.cs b/app/MindWork AI Studio/Provider/NoProvider.cs index 73d75d61..4f92e5c9 100644 --- a/app/MindWork AI Studio/Provider/NoProvider.cs +++ b/app/MindWork AI Studio/Provider/NoProvider.cs @@ -23,6 +23,8 @@ public class NoProvider : IProvider public Task> GetImageModels(string? apiKeyProvisional = null, CancellationToken token = default) => Task.FromResult>([]); public Task> GetEmbeddingModels(string? apiKeyProvisional = null, CancellationToken token = default) => Task.FromResult>([]); + + public Task> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default) => Task.FromResult>([]); public async IAsyncEnumerable StreamChatCompletion(Model chatModel, ChatThread chatChatThread, SettingsManager settingsManager, [EnumeratorCancellation] CancellationToken token = default) { diff --git a/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs b/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs index b5a11e60..d06d6e15 100644 --- a/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs +++ b/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs @@ -241,6 +241,14 @@ public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https return this.LoadModels(["text-embedding-"], token, apiKeyProvisional); } + /// + public override async Task> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default) + { + var models = await this.LoadModels(["whisper-", "gpt-"], token, apiKeyProvisional); + return models.Where(model => model.Id.StartsWith("whisper-", StringComparison.InvariantCultureIgnoreCase) || + model.Id.Contains("-transcribe", StringComparison.InvariantCultureIgnoreCase)); + } + #endregion private async Task> LoadModels(string[] prefixes, CancellationToken token, string? apiKeyProvisional = null) diff --git a/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs b/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs index 9f2d3648..d6945799 100644 --- a/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs +++ b/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs @@ -106,6 +106,12 @@ public sealed class ProviderOpenRouter() : BaseProvider(LLMProviders.OPEN_ROUTER { return this.LoadEmbeddingModels(token, apiKeyProvisional); } + + /// + public override Task> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default) + { + return Task.FromResult(Enumerable.Empty()); + } #endregion diff --git a/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs b/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs index 27101716..0616f2d9 100644 --- a/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs +++ b/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs @@ -107,6 +107,12 @@ public sealed class ProviderPerplexity() : BaseProvider(LLMProviders.PERPLEXITY, return Task.FromResult(Enumerable.Empty()); } + /// + public override Task> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default) + { + return Task.FromResult(Enumerable.Empty()); + } + #endregion private Task> LoadModels() => Task.FromResult>(KNOWN_MODELS); diff --git a/app/MindWork AI Studio/Provider/SelfHosted/Host.cs b/app/MindWork AI Studio/Provider/SelfHosted/Host.cs index ddb5738f..d0dde806 100644 --- a/app/MindWork AI Studio/Provider/SelfHosted/Host.cs +++ b/app/MindWork AI Studio/Provider/SelfHosted/Host.cs @@ -6,6 +6,7 @@ public enum Host LM_STUDIO, LLAMA_CPP, + WHISPER_CPP, OLLAMA, VLLM, } \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/SelfHosted/HostExtensions.cs b/app/MindWork AI Studio/Provider/SelfHosted/HostExtensions.cs index 642681ae..6c475273 100644 --- a/app/MindWork AI Studio/Provider/SelfHosted/HostExtensions.cs +++ b/app/MindWork AI Studio/Provider/SelfHosted/HostExtensions.cs @@ -8,6 +8,7 @@ public static class HostExtensions Host.LM_STUDIO => "LM Studio", Host.LLAMA_CPP => "llama.cpp", + Host.WHISPER_CPP => "whisper.cpp", Host.OLLAMA => "ollama", Host.VLLM => "vLLM", @@ -24,6 +25,27 @@ public static class HostExtensions _ => "chat/completions", }; + public static string TranscriptionURL(this Host host) => host switch + { + _ => "audio/transcriptions", + }; + + public static bool IsChatSupported(this Host host) + { + switch (host) + { + case Host.WHISPER_CPP: + return false; + + default: + case Host.OLLAMA: + case Host.VLLM: + case Host.LM_STUDIO: + case Host.LLAMA_CPP: + return true; + } + } + public static bool IsEmbeddingSupported(this Host host) { switch (host) @@ -38,4 +60,20 @@ public static class HostExtensions return false; } } + + public static bool IsTranscriptionSupported(this Host host) + { + switch (host) + { + case Host.OLLAMA: + case Host.VLLM: + case Host.WHISPER_CPP: + return true; + + default: + case Host.LM_STUDIO: + case Host.LLAMA_CPP: + return false; + } + } } \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs index 6f43d9ce..a61a3b26 100644 --- a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs +++ b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs @@ -6,12 +6,15 @@ using System.Text.Json; using AIStudio.Chat; using AIStudio.Provider.OpenAI; using AIStudio.Settings; +using AIStudio.Tools.PluginSystem; namespace AIStudio.Provider.SelfHosted; public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvider(LLMProviders.SELF_HOSTED, $"{hostname}{host.BaseURL()}", LOGGER) { private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); + + private static string TB(string fallbackEN) => I18N.I.T(fallbackEN, typeof(ProviderSelfHosted).Namespace, nameof(ProviderSelfHosted)); #region Implementation of IProvider @@ -138,6 +141,35 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide } } + /// + public override Task> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default) + { + try + { + switch (host) + { + case Host.WHISPER_CPP: + return Task.FromResult>( + new List + { + new("loaded-model", TB("Model as configured by whisper.cpp")), + }); + + case Host.OLLAMA: + case Host.VLLM: + return this.LoadModels([], [], token, apiKeyProvisional); + + default: + return Task.FromResult(Enumerable.Empty()); + } + } + catch (Exception e) + { + LOGGER.LogError(e, "Failed to load transcription models from self-hosted provider."); + return Task.FromResult(Enumerable.Empty()); + } + } + #endregion private async Task> LoadModels(string[] ignorePhrases, string[] filterPhrases, CancellationToken token, string? apiKeyProvisional = null) diff --git a/app/MindWork AI Studio/Provider/X/ProviderX.cs b/app/MindWork AI Studio/Provider/X/ProviderX.cs index db3f3a29..92aad1eb 100644 --- a/app/MindWork AI Studio/Provider/X/ProviderX.cs +++ b/app/MindWork AI Studio/Provider/X/ProviderX.cs @@ -101,6 +101,12 @@ public sealed class ProviderX() : BaseProvider(LLMProviders.X, "https://api.x.ai return Task.FromResult>([]); } + /// + public override Task> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default) + { + return Task.FromResult(Enumerable.Empty()); + } + #endregion private async Task> LoadModels(string[] prefixes, CancellationToken token, string? apiKeyProvisional = null) diff --git a/app/MindWork AI Studio/Settings/DataModel/Data.cs b/app/MindWork AI Studio/Settings/DataModel/Data.cs index c07ab3d6..622d737e 100644 --- a/app/MindWork AI Studio/Settings/DataModel/Data.cs +++ b/app/MindWork AI Studio/Settings/DataModel/Data.cs @@ -25,6 +25,11 @@ public sealed class Data /// A collection of embedding providers configured. /// public List EmbeddingProviders { get; init; } = []; + + /// + /// A collection of speech providers configured. + /// + public List TranscriptionProviders { get; init; } = []; /// /// A collection of data sources configured. @@ -52,9 +57,14 @@ public sealed class Data public uint NextProviderNum { get; set; } = 1; /// - /// The next embedding number to use. + /// The next embedding provider number to use. /// public uint NextEmbeddingNum { get; set; } = 1; + + /// + /// The next transcription provider number to use. + /// + public uint NextTranscriptionNum { get; set; } = 1; /// /// The next data source number to use. diff --git a/app/MindWork AI Studio/Settings/DataModel/PreviewFeaturesExtensions.cs b/app/MindWork AI Studio/Settings/DataModel/PreviewFeaturesExtensions.cs index 0433119c..00399f0b 100644 --- a/app/MindWork AI Studio/Settings/DataModel/PreviewFeaturesExtensions.cs +++ b/app/MindWork AI Studio/Settings/DataModel/PreviewFeaturesExtensions.cs @@ -14,7 +14,7 @@ public static class PreviewFeaturesExtensions PreviewFeatures.PRE_PLUGINS_2025 => TB("Plugins: Preview of our plugin system where you can extend the functionality of the app"), PreviewFeatures.PRE_READ_PDF_2025 => TB("Read PDF: Preview of our PDF reading system where you can read and extract text from PDF files"), PreviewFeatures.PRE_DOCUMENT_ANALYSIS_2025 => TB("Document Analysis: Preview of our document analysis system where you can analyze and extract information from documents"), - PreviewFeatures.PRE_SPEECH_TO_TEXT_2026 => TB("Speech to Text: Preview of our speech to text system where you can transcribe recordings and audio files into text"), + PreviewFeatures.PRE_SPEECH_TO_TEXT_2026 => TB("Transcription: Preview of our speech to text system where you can transcribe recordings and audio files into text"), _ => TB("Unknown preview feature") }; diff --git a/app/MindWork AI Studio/Settings/TranscriptionProvider.cs b/app/MindWork AI Studio/Settings/TranscriptionProvider.cs new file mode 100644 index 00000000..a7ce8957 --- /dev/null +++ b/app/MindWork AI Studio/Settings/TranscriptionProvider.cs @@ -0,0 +1,32 @@ +using System.Text.Json.Serialization; + +using AIStudio.Provider; + +using Host = AIStudio.Provider.SelfHosted.Host; + +namespace AIStudio.Settings; + +public readonly record struct TranscriptionProvider( + uint Num, + string Id, + string Name, + LLMProviders UsedLLMProvider, + Model Model, + bool IsSelfHosted = false, + string Hostname = "http://localhost:1234", + Host Host = Host.NONE) : ISecretId +{ + public override string ToString() => this.Name; + + #region Implementation of ISecretId + + /// + [JsonIgnore] + public string SecretId => this.Id; + + /// + [JsonIgnore] + public string SecretName => this.Name; + + #endregion +} \ No newline at end of file