Added transcription provider configuration

This commit is contained in:
Thorsten Sommer 2026-01-09 09:50:01 +01:00
parent e9fe1e14b9
commit 49b2a0f785
Signed by: tsommer
GPG Key ID: 371BBA77A02C0108
32 changed files with 1008 additions and 13 deletions

View File

@ -2242,6 +2242,54 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELPROVIDERS::T853225
-- Provider
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELPROVIDERS::T900237532"] = "Provider"
-- No transcription provider configured yet.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T1079350363"] = "No transcription provider configured yet."
-- Edit Transcription Provider
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T1317362918"] = "Edit Transcription Provider"
-- Delete
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T1469573738"] = "Delete"
-- Configure Transcription
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T1622062299"] = "Configure Transcription"
-- Add transcription provider
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T1645238629"] = "Add transcription provider"
-- Add Transcription Provider
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T2066315685"] = "Add Transcription Provider"
-- Model
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T2189814010"] = "Model"
-- Name
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T266367750"] = "Name"
-- Edit
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T3267849393"] = "Edit"
-- Delete Transcription Provider
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T370103955"] = "Delete Transcription Provider"
-- Actions
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T3865031940"] = "Actions"
-- Configured Transcription
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T4138223521"] = "Configured Transcription"
-- Open Dashboard
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T78223861"] = "Open Dashboard"
-- Are you sure you want to delete the transcription provider '{0}'?
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T789660305"] = "Are you sure you want to delete the transcription provider '{0}'?"
-- With the support of transcription models, MindWork AI Studio can convert human speech into text. This is useful, for example, when you need to dictate text. You can choose from dedicated transcription models, but not multimodal LLMs (large language models) that can handle both speech and text. The configuration of multimodal models is done in the \"Configure providers\" section.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T799338148"] = "With the support of transcription models, MindWork AI Studio can convert human speech into text. This is useful, for example, when you need to dictate text. You can choose from dedicated transcription models, but not multimodal LLMs (large language models) that can handle both speech and text. The configuration of multimodal models is done in the \\\"Configure providers\\\" section."
-- Provider
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::SETTINGS::SETTINGSPANELTRANSCRIPTION::T900237532"] = "Provider"
-- Copy {0} to the clipboard
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::TEXTINFOLINE::T2206391442"] = "Copy {0} to the clipboard"
@ -3106,9 +3154,6 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2189814010"] = "Mo
-- (Optional) API Key
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2331453405"] = "(Optional) API Key"
-- Currently, we cannot query the embedding models of self-hosted systems. Therefore, enter the model name manually.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2615586687"] = "Currently, we cannot query the embedding models of self-hosted systems. Therefore, enter the model name manually."
-- Add
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2646845972"] = "Add"
@ -3118,6 +3163,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2810182573"] = "No
-- Instance Name
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T2842060373"] = "Instance Name"
-- Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T290547799"] = "Currently, we cannot query the embedding models for the selected provider and/or host. Therefore, please enter the model name manually."
-- Model selection
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGPROVIDERDIALOG::T416738168"] = "Model selection"
@ -3328,6 +3376,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T3361153305"] = "Show Expert
-- Show available models
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T3763891899"] = "Show available models"
-- Currently, we cannot query the models for the selected provider and/or host. Therefore, please enter the model name manually.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T4116737656"] = "Currently, we cannot query the models for the selected provider and/or host. Therefore, please enter the model name manually."
-- Model selection
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::PROVIDERDIALOG::T416738168"] = "Model selection"
@ -4504,6 +4555,60 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SINGLEINPUTDIALOG::T4030229154"] = "Your Inp
-- Cancel
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SINGLEINPUTDIALOG::T900713019"] = "Cancel"
-- Failed to store the API key in the operating system. The message was: {0}. Please try again.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T1122745046"] = "Failed to store the API key in the operating system. The message was: {0}. Please try again."
-- API Key
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T1324664716"] = "API Key"
-- Create account
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T1356621346"] = "Create account"
-- Currently, we cannot query the transcription models for the selected provider and/or host. Therefore, please enter the model name manually.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T1381635232"] = "Currently, we cannot query the transcription models for the selected provider and/or host. Therefore, please enter the model name manually."
-- Hostname
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T1727440780"] = "Hostname"
-- Load
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T1756340745"] = "Load"
-- Update
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T1847791252"] = "Update"
-- Failed to load the API key from the operating system. The message was: {0}. You might ignore this message and provide the API key again.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T1870831108"] = "Failed to load the API key from the operating system. The message was: {0}. You might ignore this message and provide the API key again."
-- Model
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T2189814010"] = "Model"
-- (Optional) API Key
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T2331453405"] = "(Optional) API Key"
-- Add
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T2646845972"] = "Add"
-- No models loaded or available.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T2810182573"] = "No models loaded or available."
-- Instance Name
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T2842060373"] = "Instance Name"
-- Please enter a transcription model name.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T3703662664"] = "Please enter a transcription model name."
-- Model selection
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T416738168"] = "Model selection"
-- Host
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T808120719"] = "Host"
-- Provider
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T900237532"] = "Provider"
-- Cancel
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::TRANSCRIPTIONPROVIDERDIALOG::T900713019"] = "Cancel"
-- Install now
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::UPDATEDIALOG::T2366359512"] = "Install now"
@ -5206,6 +5311,9 @@ UI_TEXT_CONTENT["AISTUDIO::PROVIDER::LLMPROVIDERSEXTENSIONS::T3424652889"] = "Un
-- no model selected
UI_TEXT_CONTENT["AISTUDIO::PROVIDER::MODEL::T2234274832"] = "no model selected"
-- Model as configured by whisper.cpp
UI_TEXT_CONTENT["AISTUDIO::PROVIDER::SELFHOSTED::PROVIDERSELFHOSTED::T3313940770"] = "Model as configured by whisper.cpp"
-- Use no chat template
UI_TEXT_CONTENT["AISTUDIO::SETTINGS::CHATTEMPLATE::T4258819635"] = "Use no chat template"
@ -5374,15 +5482,15 @@ UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T1848
-- Plugins: Preview of our plugin system where you can extend the functionality of the app
UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T2056842933"] = "Plugins: Preview of our plugin system where you can extend the functionality of the app"
-- Speech to Text: Preview of our speech to text system where you can transcribe recordings and audio files into text
UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T221133923"] = "Speech to Text: Preview of our speech to text system where you can transcribe recordings and audio files into text"
-- RAG: Preview of our RAG implementation where you can refer your files or integrate enterprise data within your company
UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T2708939138"] = "RAG: Preview of our RAG implementation where you can refer your files or integrate enterprise data within your company"
-- Unknown preview feature
UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T2722827307"] = "Unknown preview feature"
-- Transcription: Preview of our speech to text system where you can transcribe recordings and audio files into text
UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T714355911"] = "Transcription: Preview of our speech to text system where you can transcribe recordings and audio files into text"
-- Use no data sources, when sending an assistant result to a chat
UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::SENDTOCHATDATASOURCEBEHAVIOREXTENSIONS::T1223925477"] = "Use no data sources, when sending an assistant result to a chat"

View File

@ -0,0 +1,64 @@
@using AIStudio.Provider
@using AIStudio.Settings.DataModel
@inherits SettingsPanelBase
@if (PreviewFeatures.PRE_SPEECH_TO_TEXT_2026.IsEnabled(this.SettingsManager))
{
<ExpansionPanel HeaderIcon="@Icons.Material.Filled.VoiceChat" HeaderText="@T("Configure Transcription")">
<PreviewPrototype ApplyInnerScrollingFix="true"/>
<MudText Typo="Typo.h4" Class="mb-3">
@T("Configured Transcription")
</MudText>
<MudJustifiedText Typo="Typo.body1" Class="mb-3">
@T("With the support of transcription models, MindWork AI Studio can convert human speech into text. This is useful, for example, when you need to dictate text. You can choose from dedicated transcription models, but not multimodal LLMs (large language models) that can handle both speech and text. The configuration of multimodal models is done in the \"Configure providers\" section.")
</MudJustifiedText>
<MudTable Items="@this.SettingsManager.ConfigurationData.TranscriptionProviders" Hover="@true" Class="border-dashed border rounded-lg">
<ColGroup>
<col style="width: 3em;"/>
<col style="width: 12em;"/>
<col style="width: 12em;"/>
<col/>
<col style="width: 16em;"/>
</ColGroup>
<HeaderContent>
<MudTh>#</MudTh>
<MudTh>@T("Name")</MudTh>
<MudTh>@T("Provider")</MudTh>
<MudTh>@T("Model")</MudTh>
<MudTh>@T("Actions")</MudTh>
</HeaderContent>
<RowTemplate>
<MudTd>@context.Num</MudTd>
<MudTd>@context.Name</MudTd>
<MudTd>@context.UsedLLMProvider.ToName()</MudTd>
<MudTd>@GetTranscriptionProviderModelName(context)</MudTd>
<MudTd>
<MudStack Row="true" Class="mb-2 mt-2" Wrap="Wrap.Wrap">
<MudTooltip Text="@T("Open Dashboard")">
<MudIconButton Color="Color.Info" Icon="@Icons.Material.Filled.OpenInBrowser" Href="@context.UsedLLMProvider.GetDashboardURL()" Target="_blank" Disabled="@(!context.UsedLLMProvider.HasDashboard())"/>
</MudTooltip>
<MudTooltip Text="@T("Edit")">
<MudIconButton Color="Color.Info" Icon="@Icons.Material.Filled.Edit" OnClick="@(() => this.EditTranscriptionProvider(context))"/>
</MudTooltip>
<MudTooltip Text="@T("Delete")">
<MudIconButton Color="Color.Error" Icon="@Icons.Material.Filled.Delete" OnClick="@(() => this.DeleteTranscriptionProvider(context))"/>
</MudTooltip>
</MudStack>
</MudTd>
</RowTemplate>
</MudTable>
@if (this.SettingsManager.ConfigurationData.TranscriptionProviders.Count == 0)
{
<MudText Typo="Typo.h6" Class="mt-3">
@T("No transcription provider configured yet.")
</MudText>
}
<MudButton Variant="Variant.Filled" Color="@Color.Primary" StartIcon="@Icons.Material.Filled.AddRoad" Class="mt-3 mb-6" OnClick="@this.AddTranscriptionProvider">
@T("Add transcription provider")
</MudButton>
</ExpansionPanel>
}

View File

@ -0,0 +1,122 @@
using AIStudio.Dialogs;
using AIStudio.Settings;
using Microsoft.AspNetCore.Components;
using DialogOptions = AIStudio.Dialogs.DialogOptions;
namespace AIStudio.Components.Settings;
public partial class SettingsPanelTranscription : SettingsPanelBase
{
[Parameter]
public List<ConfigurationSelectData<string>> AvailableTranscriptionProviders { get; set; } = new();
[Parameter]
public EventCallback<List<ConfigurationSelectData<string>>> AvailableTranscriptionProvidersChanged { get; set; }
private static string GetTranscriptionProviderModelName(TranscriptionProvider provider)
{
const int MAX_LENGTH = 36;
var modelName = provider.Model.ToString();
return modelName.Length > MAX_LENGTH ? "[...] " + modelName[^Math.Min(MAX_LENGTH, modelName.Length)..] : modelName;
}
#region Overrides of ComponentBase
protected override async Task OnInitializedAsync()
{
await this.UpdateTranscriptionProviders();
await base.OnInitializedAsync();
}
#endregion
private async Task AddTranscriptionProvider()
{
var dialogParameters = new DialogParameters<TranscriptionProviderDialog>
{
{ x => x.IsEditing, false },
};
var dialogReference = await this.DialogService.ShowAsync<TranscriptionProviderDialog>(T("Add Transcription Provider"), dialogParameters, DialogOptions.FULLSCREEN);
var dialogResult = await dialogReference.Result;
if (dialogResult is null || dialogResult.Canceled)
return;
var addedTranscription = (TranscriptionProvider)dialogResult.Data!;
addedTranscription = addedTranscription with { Num = this.SettingsManager.ConfigurationData.NextTranscriptionNum++ };
this.SettingsManager.ConfigurationData.TranscriptionProviders.Add(addedTranscription);
await this.UpdateTranscriptionProviders();
await this.SettingsManager.StoreSettings();
await this.MessageBus.SendMessage<bool>(this, Event.CONFIGURATION_CHANGED);
}
private async Task EditTranscriptionProvider(TranscriptionProvider transcriptionProvider)
{
var dialogParameters = new DialogParameters<TranscriptionProviderDialog>
{
{ x => x.DataNum, transcriptionProvider.Num },
{ x => x.DataId, transcriptionProvider.Id },
{ x => x.DataName, transcriptionProvider.Name },
{ x => x.DataLLMProvider, transcriptionProvider.UsedLLMProvider },
{ x => x.DataModel, transcriptionProvider.Model },
{ x => x.DataHostname, transcriptionProvider.Hostname },
{ x => x.IsSelfHosted, transcriptionProvider.IsSelfHosted },
{ x => x.IsEditing, true },
{ x => x.DataHost, transcriptionProvider.Host },
};
var dialogReference = await this.DialogService.ShowAsync<TranscriptionProviderDialog>(T("Edit Transcription Provider"), dialogParameters, DialogOptions.FULLSCREEN);
var dialogResult = await dialogReference.Result;
if (dialogResult is null || dialogResult.Canceled)
return;
var editedTranscriptionProvider = (TranscriptionProvider)dialogResult.Data!;
// Set the provider number if it's not set. This is important for providers
// added before we started saving the provider number.
if(editedTranscriptionProvider.Num == 0)
editedTranscriptionProvider = editedTranscriptionProvider with { Num = this.SettingsManager.ConfigurationData.NextTranscriptionNum++ };
this.SettingsManager.ConfigurationData.TranscriptionProviders[this.SettingsManager.ConfigurationData.TranscriptionProviders.IndexOf(transcriptionProvider)] = editedTranscriptionProvider;
await this.UpdateTranscriptionProviders();
await this.SettingsManager.StoreSettings();
await this.MessageBus.SendMessage<bool>(this, Event.CONFIGURATION_CHANGED);
}
private async Task DeleteTranscriptionProvider(TranscriptionProvider provider)
{
var dialogParameters = new DialogParameters<ConfirmDialog>
{
{ x => x.Message, string.Format(T("Are you sure you want to delete the transcription provider '{0}'?"), provider.Name) },
};
var dialogReference = await this.DialogService.ShowAsync<ConfirmDialog>(T("Delete Transcription Provider"), dialogParameters, DialogOptions.FULLSCREEN);
var dialogResult = await dialogReference.Result;
if (dialogResult is null || dialogResult.Canceled)
return;
var deleteSecretResponse = await this.RustService.DeleteAPIKey(provider);
if(deleteSecretResponse.Success)
{
this.SettingsManager.ConfigurationData.TranscriptionProviders.Remove(provider);
await this.SettingsManager.StoreSettings();
}
await this.UpdateTranscriptionProviders();
await this.MessageBus.SendMessage<bool>(this, Event.CONFIGURATION_CHANGED);
}
private async Task UpdateTranscriptionProviders()
{
this.AvailableTranscriptionProviders.Clear();
foreach (var provider in this.SettingsManager.ConfigurationData.TranscriptionProviders)
this.AvailableTranscriptionProviders.Add(new (provider.Name, provider.Id));
await this.AvailableTranscriptionProvidersChanged.InvokeAsync(this.AvailableTranscriptionProviders);
}
}

View File

@ -44,9 +44,12 @@
<MudSelect @bind-Value="@this.DataHost" Label="@T("Host")" Class="mb-3" OpenIcon="@Icons.Material.Filled.ExpandMore" AdornmentColor="Color.Info" Adornment="Adornment.Start" Validation="@this.providerValidation.ValidatingHost">
@foreach (Host host in Enum.GetValues(typeof(Host)))
{
<MudSelectItem Value="@host">
@host.Name()
</MudSelectItem>
@if (host.IsChatSupported())
{
<MudSelectItem Value="@host">
@host.Name()
</MudSelectItem>
}
}
</MudSelect>
}

View File

@ -0,0 +1,140 @@
@using AIStudio.Provider
@using AIStudio.Provider.SelfHosted
@inherits MSGComponentBase
<MudDialog>
<DialogContent>
<MudForm @ref="@this.form" @bind-IsValid="@this.dataIsValid" @bind-Errors="@this.dataIssues">
<MudStack Row="@true" AlignItems="AlignItems.Center">
@* ReSharper disable once CSharpWarnings::CS8974 *@
<MudSelect @bind-Value="@this.DataLLMProvider" Label="@T("Provider")" Class="mb-3" OpenIcon="@Icons.Material.Filled.AccountBalance" AdornmentColor="Color.Info" Adornment="Adornment.Start" Validation="@this.providerValidation.ValidatingProvider">
@foreach (LLMProviders provider in Enum.GetValues(typeof(LLMProviders)))
{
if (provider.ProvideTranscriptionAPI() || provider is LLMProviders.NONE)
{
<MudSelectItem Value="@provider">
@provider.ToName()
</MudSelectItem>
}
}
</MudSelect>
<MudButton Disabled="@(!this.DataLLMProvider.ShowRegisterButton())" Variant="Variant.Filled" Size="Size.Small" StartIcon="@Icons.Material.Filled.OpenInBrowser" Href="@this.DataLLMProvider.GetCreationURL()" Target="_blank">
@T("Create account")
</MudButton>
</MudStack>
@if (this.DataLLMProvider.IsAPIKeyNeeded(this.DataHost))
{
<SecretInputField @bind-Secret="@this.dataAPIKey" Label="@this.APIKeyText" Validation="@this.providerValidation.ValidatingAPIKey"/>
}
@if (this.DataLLMProvider.IsHostnameNeeded())
{
<MudTextField
T="string"
@bind-Text="@this.DataHostname"
Label="@T("Hostname")"
Class="mb-3"
Adornment="Adornment.Start"
AdornmentIcon="@Icons.Material.Filled.Dns"
AdornmentColor="Color.Info"
Validation="@this.providerValidation.ValidatingHostname"
UserAttributes="@SPELLCHECK_ATTRIBUTES"/>
}
@if (this.DataLLMProvider.IsHostNeeded())
{
<MudSelect @bind-Value="@this.DataHost" Label="@T("Host")" Class="mb-3" OpenIcon="@Icons.Material.Filled.ExpandMore" AdornmentColor="Color.Info" Adornment="Adornment.Start" Validation="@this.providerValidation.ValidatingHost">
@foreach (Host host in Enum.GetValues(typeof(Host)))
{
if (host.IsTranscriptionSupported())
{
<MudSelectItem Value="@host">
@host.Name()
</MudSelectItem>
}
}
</MudSelect>
}
<MudField FullWidth="true" Label="@T("Model selection")" Variant="Variant.Outlined" Class="mb-3">
<MudStack Row="@true" AlignItems="AlignItems.Center" StretchItems="StretchItems.End">
@if (this.DataLLMProvider.IsTranscriptionModelProvidedManually(this.DataHost))
{
<MudTextField
T="string"
@bind-Text="@this.dataManuallyModel"
Label="@T("Model")"
Class="mb-3"
Adornment="Adornment.Start"
AdornmentIcon="@Icons.Material.Filled.Dns"
AdornmentColor="Color.Info"
Validation="@this.ValidateManuallyModel"
UserAttributes="@SPELLCHECK_ATTRIBUTES"
HelperText="@T("Currently, we cannot query the transcription models for the selected provider and/or host. Therefore, please enter the model name manually.")"
/>
}
else
{
<MudButton Disabled="@(!this.DataLLMProvider.CanLoadModels(this.DataHost, this.dataAPIKey))" Variant="Variant.Filled" Size="Size.Small" StartIcon="@Icons.Material.Filled.Refresh" OnClick="@this.ReloadModels">
@T("Load")
</MudButton>
@if(this.availableModels.Count is 0)
{
<MudText Typo="Typo.body1">
@T("No models loaded or available.")
</MudText>
}
else
{
<MudSelect Disabled="@this.IsNoneProvider" @bind-Value="@this.DataModel" Label="@T("Model")"
OpenIcon="@Icons.Material.Filled.FaceRetouchingNatural"
AdornmentColor="Color.Info" Adornment="Adornment.Start"
Validation="@this.providerValidation.ValidatingModel">
@foreach (var model in this.availableModels)
{
<MudSelectItem Value="@model">
@model
</MudSelectItem>
}
</MudSelect>
}
}
</MudStack>
</MudField>
@* ReSharper disable once CSharpWarnings::CS8974 *@
<MudTextField
T="string"
@bind-Text="@this.DataName"
Label="@T("Instance Name")"
Class="mb-3"
MaxLength="40"
Counter="40"
Immediate="@true"
Adornment="Adornment.Start"
AdornmentIcon="@Icons.Material.Filled.Lightbulb"
AdornmentColor="Color.Info"
Validation="@this.providerValidation.ValidatingInstanceName"
UserAttributes="@SPELLCHECK_ATTRIBUTES"
/>
</MudForm>
<Issues IssuesData="@this.dataIssues"/>
</DialogContent>
<DialogActions>
<MudButton OnClick="@this.Cancel" Variant="Variant.Filled">
@T("Cancel")
</MudButton>
<MudButton OnClick="@this.Store" Variant="Variant.Filled" Color="Color.Primary">
@if(this.IsEditing)
{
@T("Update")
}
else
{
@T("Add")
}
</MudButton>
</DialogActions>
</MudDialog>

View File

@ -0,0 +1,272 @@
using AIStudio.Components;
using AIStudio.Provider;
using AIStudio.Settings;
using AIStudio.Tools.Services;
using AIStudio.Tools.Validation;
using Microsoft.AspNetCore.Components;
using Host = AIStudio.Provider.SelfHosted.Host;
namespace AIStudio.Dialogs;
public partial class TranscriptionProviderDialog : MSGComponentBase, ISecretId
{
[CascadingParameter]
private IMudDialogInstance MudDialog { get; set; } = null!;
/// <summary>
/// The transcription provider's number in the list.
/// </summary>
[Parameter]
public uint DataNum { get; set; }
/// <summary>
/// The transcription provider's ID.
/// </summary>
[Parameter]
public string DataId { get; set; } = Guid.NewGuid().ToString();
/// <summary>
/// The user chosen name.
/// </summary>
[Parameter]
public string DataName { get; set; } = string.Empty;
/// <summary>
/// The chosen hostname for self-hosted providers.
/// </summary>
[Parameter]
public string DataHostname { get; set; } = string.Empty;
/// <summary>
/// The host to use, e.g., llama.cpp.
/// </summary>
[Parameter]
public Host DataHost { get; set; } = Host.NONE;
/// <summary>
/// Is this provider self-hosted?
/// </summary>
[Parameter]
public bool IsSelfHosted { get; set; }
/// <summary>
/// The provider to use.
/// </summary>
[Parameter]
public LLMProviders DataLLMProvider { get; set; } = LLMProviders.NONE;
/// <summary>
/// The transcription model to use.
/// </summary>
[Parameter]
public Model DataModel { get; set; }
/// <summary>
/// Should the dialog be in editing mode?
/// </summary>
[Parameter]
public bool IsEditing { get; init; }
[Inject]
private RustService RustService { get; init; } = null!;
private static readonly Dictionary<string, object?> SPELLCHECK_ATTRIBUTES = new();
/// <summary>
/// The list of used instance names. We need this to check for uniqueness.
/// </summary>
private List<string> UsedInstanceNames { get; set; } = [];
private bool dataIsValid;
private string[] dataIssues = [];
private string dataAPIKey = string.Empty;
private string dataManuallyModel = string.Empty;
private string dataAPIKeyStorageIssue = string.Empty;
private string dataEditingPreviousInstanceName = string.Empty;
// We get the form reference from Blazor code to validate it manually:
private MudForm form = null!;
private readonly List<Model> availableModels = new();
private readonly Encryption encryption = Program.ENCRYPTION;
private readonly ProviderValidation providerValidation;
public TranscriptionProviderDialog()
{
this.providerValidation = new()
{
GetProvider = () => this.DataLLMProvider,
GetAPIKeyStorageIssue = () => this.dataAPIKeyStorageIssue,
GetPreviousInstanceName = () => this.dataEditingPreviousInstanceName,
GetUsedInstanceNames = () => this.UsedInstanceNames,
GetHost = () => this.DataHost,
};
}
private TranscriptionProvider CreateTranscriptionProviderSettings()
{
var cleanedHostname = this.DataHostname.Trim();
Model model = default;
if(this.DataLLMProvider is LLMProviders.SELF_HOSTED)
{
switch (this.DataHost)
{
case Host.OLLAMA:
model = new Model(this.dataManuallyModel, null);
break;
case Host.VLLM:
case Host.LM_STUDIO:
case Host.WHISPER_CPP:
model = this.DataModel;
break;
}
}
else
model = this.DataModel;
return new()
{
Num = this.DataNum,
Id = this.DataId,
Name = this.DataName,
UsedLLMProvider = this.DataLLMProvider,
Model = model,
IsSelfHosted = this.DataLLMProvider is LLMProviders.SELF_HOSTED,
Hostname = cleanedHostname.EndsWith('/') ? cleanedHostname[..^1] : cleanedHostname,
Host = this.DataHost,
};
}
#region Overrides of ComponentBase
protected override async Task OnInitializedAsync()
{
// Configure the spellchecking for the instance name input:
this.SettingsManager.InjectSpellchecking(SPELLCHECK_ATTRIBUTES);
// Load the used instance names:
this.UsedInstanceNames = this.SettingsManager.ConfigurationData.TranscriptionProviders.Select(x => x.Name.ToLowerInvariant()).ToList();
// When editing, we need to load the data:
if(this.IsEditing)
{
this.dataEditingPreviousInstanceName = this.DataName.ToLowerInvariant();
// When using self-hosted models, we must copy the model name:
if (this.DataLLMProvider is LLMProviders.SELF_HOSTED)
this.dataManuallyModel = this.DataModel.Id;
//
// We cannot load the API key for self-hosted providers:
//
if (this.DataLLMProvider is LLMProviders.SELF_HOSTED && this.DataHost is not Host.OLLAMA)
{
await this.ReloadModels();
await base.OnInitializedAsync();
return;
}
// Load the API key:
var requestedSecret = await this.RustService.GetAPIKey(this, isTrying: this.DataLLMProvider is LLMProviders.SELF_HOSTED);
if (requestedSecret.Success)
this.dataAPIKey = await requestedSecret.Secret.Decrypt(this.encryption);
else
{
this.dataAPIKey = string.Empty;
if (this.DataLLMProvider is not LLMProviders.SELF_HOSTED)
{
this.dataAPIKeyStorageIssue = string.Format(T("Failed to load the API key from the operating system. The message was: {0}. You might ignore this message and provide the API key again."), requestedSecret.Issue);
await this.form.Validate();
}
}
await this.ReloadModels();
}
await base.OnInitializedAsync();
}
protected override async Task OnAfterRenderAsync(bool firstRender)
{
// Reset the validation when not editing and on the first render.
// We don't want to show validation errors when the user opens the dialog.
if(!this.IsEditing && firstRender)
this.form.ResetValidation();
await base.OnAfterRenderAsync(firstRender);
}
#endregion
#region Implementation of ISecretId
public string SecretId => this.DataId;
public string SecretName => this.DataName;
#endregion
private async Task Store()
{
await this.form.Validate();
this.dataAPIKeyStorageIssue = string.Empty;
// When the data is not valid, we don't store it:
if (!this.dataIsValid)
return;
// Use the data model to store the provider.
// We just return this data to the parent component:
var addedProviderSettings = this.CreateTranscriptionProviderSettings();
if (!string.IsNullOrWhiteSpace(this.dataAPIKey))
{
// Store the API key in the OS secure storage:
var storeResponse = await this.RustService.SetAPIKey(this, this.dataAPIKey);
if (!storeResponse.Success)
{
this.dataAPIKeyStorageIssue = string.Format(T("Failed to store the API key in the operating system. The message was: {0}. Please try again."), storeResponse.Issue);
await this.form.Validate();
return;
}
}
this.MudDialog.Close(DialogResult.Ok(addedProviderSettings));
}
private string? ValidateManuallyModel(string manuallyModel)
{
if (this.DataLLMProvider is LLMProviders.SELF_HOSTED && string.IsNullOrWhiteSpace(manuallyModel))
return T("Please enter a transcription model name.");
return null;
}
private void Cancel() => this.MudDialog.Cancel();
private async Task ReloadModels()
{
var currentTranscriptionProviderSettings = this.CreateTranscriptionProviderSettings();
var provider = currentTranscriptionProviderSettings.CreateProvider();
if(provider is NoProvider)
return;
var models = await provider.GetTranscriptionModels(this.dataAPIKey);
// Order descending by ID means that the newest models probably come first:
var orderedModels = models.OrderByDescending(n => n.Id);
this.availableModels.Clear();
this.availableModels.AddRange(orderedModels);
}
private string APIKeyText => this.DataLLMProvider switch
{
LLMProviders.SELF_HOSTED => T("(Optional) API Key"),
_ => T("API Key"),
};
private bool IsNoneProvider => this.DataLLMProvider is LLMProviders.NONE;
}

View File

@ -15,6 +15,11 @@
<SettingsPanelEmbeddings AvailableLLMProvidersFunc="@(() => this.availableLLMProviders)" @bind-AvailableEmbeddingProviders="@this.availableEmbeddingProviders"/>
}
@if (PreviewFeatures.PRE_SPEECH_TO_TEXT_2026.IsEnabled(this.SettingsManager))
{
<SettingsPanelTranscription AvailableLLMProvidersFunc="@(() => this.availableLLMProviders)" @bind-AvailableTranscriptionProviders="@this.availableTranscriptionProviders"/>
}
<SettingsPanelApp AvailableLLMProvidersFunc="@(() => this.availableLLMProviders)"/>
@if (PreviewFeatures.PRE_RAG_2024.IsEnabled(this.SettingsManager))

View File

@ -9,6 +9,7 @@ public partial class Settings : MSGComponentBase
{
private List<ConfigurationSelectData<string>> availableLLMProviders = new();
private List<ConfigurationSelectData<string>> availableEmbeddingProviders = new();
private List<ConfigurationSelectData<string>> availableTranscriptionProviders = new();
#region Overrides of ComponentBase

View File

@ -131,8 +131,17 @@ public sealed class ProviderAlibabaCloud() : BaseProvider(LLMProviders.ALIBABA_C
return this.LoadModels(["text-embedding-"], token, apiKeyProvisional).ContinueWith(t => t.Result.Concat(additionalModels).OrderBy(x => x.Id).AsEnumerable(), token);
}
#region Overrides of BaseProvider
/// <inheritdoc />
public override Task<IEnumerable<Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default)
{
return Task.FromResult(Enumerable.Empty<Model>());
}
#endregion
#endregion
private async Task<IEnumerable<Model>> LoadModels(string[] prefixes, CancellationToken token, string? apiKeyProvisional = null)

View File

@ -136,6 +136,12 @@ public sealed class ProviderAnthropic() : BaseProvider(LLMProviders.ANTHROPIC, "
return Task.FromResult(Enumerable.Empty<Model>());
}
/// <inheritdoc />
public override Task<IEnumerable<Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default)
{
return Task.FromResult(Enumerable.Empty<Model>());
}
#endregion
private async Task<IEnumerable<Model>> LoadModels(CancellationToken token, string? apiKeyProvisional = null)

View File

@ -97,6 +97,9 @@ public abstract class BaseProvider : IProvider, ISecretId
/// <inheritdoc />
public abstract Task<IEnumerable<Model>> GetEmbeddingModels(string? apiKeyProvisional = null, CancellationToken token = default);
/// <inheritdoc />
public abstract Task<IEnumerable<Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default);
#endregion

View File

@ -99,6 +99,11 @@ public sealed class ProviderDeepSeek() : BaseProvider(LLMProviders.DEEP_SEEK, "h
return Task.FromResult(Enumerable.Empty<Model>());
}
/// <inheritdoc />
public override Task<IEnumerable<Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default)
{
return Task.FromResult(Enumerable.Empty<Model>());
}
#endregion

View File

@ -100,5 +100,16 @@ public class ProviderFireworks() : BaseProvider(LLMProviders.FIREWORKS, "https:/
return Task.FromResult(Enumerable.Empty<Model>());
}
/// <inheritdoc />
public override Task<IEnumerable<Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default)
{
return Task.FromResult<IEnumerable<Model>>(
new List<Model>
{
new("whisper-v3", "Whisper v3"),
new("whisper-v3-turbo", "Whisper v3 Turbo"),
});
}
#endregion
}

View File

@ -101,6 +101,17 @@ public sealed class ProviderGWDG() : BaseProvider(LLMProviders.GWDG, "https://ch
return models.Where(model => model.Id.StartsWith("e5-", StringComparison.InvariantCultureIgnoreCase));
}
/// <inheritdoc />
public override Task<IEnumerable<Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default)
{
// Source: https://docs.hpc.gwdg.de/services/saia/index.html#voice-to-text
return Task.FromResult<IEnumerable<Model>>(
new List<Model>
{
new("whisper-large-v2", "Whisper v2 Large"),
});
}
#endregion
private async Task<IEnumerable<Model>> LoadModels(CancellationToken token, string? apiKeyProvisional = null)

View File

@ -112,6 +112,11 @@ public class ProviderGoogle() : BaseProvider(LLMProviders.GOOGLE, "https://gener
.Select(n => new Provider.Model(n.Name.Replace("models/", string.Empty), n.DisplayName));
}
/// <inheritdoc />
public override Task<IEnumerable<Provider.Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default)
{
return Task.FromResult(Enumerable.Empty<Provider.Model>());
}
#endregion

View File

@ -100,6 +100,12 @@ public class ProviderGroq() : BaseProvider(LLMProviders.GROQ, "https://api.groq.
return Task.FromResult(Enumerable.Empty<Model>());
}
/// <inheritdoc />
public override Task<IEnumerable<Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default)
{
return Task.FromResult(Enumerable.Empty<Model>());
}
#endregion
private async Task<IEnumerable<Model>> LoadModels(CancellationToken token, string? apiKeyProvisional = null)

View File

@ -105,6 +105,12 @@ public sealed class ProviderHelmholtz() : BaseProvider(LLMProviders.HELMHOLTZ, "
model.Id.Contains("gritlm", StringComparison.InvariantCultureIgnoreCase));
}
/// <inheritdoc />
public override Task<IEnumerable<Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default)
{
return Task.FromResult(Enumerable.Empty<Model>());
}
#endregion
private async Task<IEnumerable<Model>> LoadModels(CancellationToken token, string? apiKeyProvisional = null)

View File

@ -104,5 +104,11 @@ public sealed class ProviderHuggingFace : BaseProvider
return Task.FromResult(Enumerable.Empty<Model>());
}
/// <inheritdoc />
public override Task<IEnumerable<Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default)
{
return Task.FromResult(Enumerable.Empty<Model>());
}
#endregion
}

View File

@ -74,4 +74,11 @@ public interface IProvider
/// <returns>The list of embedding models.</returns>
public Task<IEnumerable<Model>> GetEmbeddingModels(string? apiKeyProvisional = null, CancellationToken token = default);
/// <summary>
/// Load all possible transcription models that can be used with this provider.
/// </summary>
/// <param name="apiKeyProvisional">The provisional API key to use. Useful when the user is adding a new provider. When null, the stored API key is used.</param>
/// <param name="token">>The cancellation token.</param>
/// <returns>>The list of transcription models.</returns>
public Task<IEnumerable<Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default);
}

View File

@ -141,6 +141,43 @@ public static class LLMProvidersExtensions
_ => false,
};
public static bool ProvideTranscriptionAPI(this LLMProviders llmProvider) => llmProvider switch
{
//
// Providers that support transcription:
//
LLMProviders.OPEN_AI => true,
LLMProviders.MISTRAL => true,
LLMProviders.FIREWORKS => true,
LLMProviders.GWDG => true,
//
// Providers that support transcription but provide no OpenAI-compatible API yet:
//
LLMProviders.ALIBABA_CLOUD => false,
LLMProviders.GOOGLE => false,
//
// Providers that do not support transcription:
//
LLMProviders.OPEN_ROUTER => false,
LLMProviders.GROQ => false,
LLMProviders.ANTHROPIC => false,
LLMProviders.X => false,
LLMProviders.DEEP_SEEK => false,
LLMProviders.HUGGINGFACE => false,
LLMProviders.PERPLEXITY => false,
LLMProviders.HELMHOLTZ => false,
//
// Self-hosted providers are treated as a special case anyway.
//
LLMProviders.SELF_HOSTED => true,
_ => false,
};
/// <summary>
/// Creates a new provider instance based on the provider value.
@ -162,6 +199,16 @@ public static class LLMProvidersExtensions
return embeddingProviderSettings.UsedLLMProvider.CreateProvider(embeddingProviderSettings.Name, embeddingProviderSettings.Host, embeddingProviderSettings.Hostname, embeddingProviderSettings.Model, HFInferenceProvider.NONE);
}
/// <summary>
/// Creates a new provider instance based on the speech provider value.
/// </summary>
/// <param name="transcriptionProviderSettings">The speech provider settings.</param>
/// <returns>The provider instance.</returns>
public static IProvider CreateProvider(this TranscriptionProvider transcriptionProviderSettings)
{
return transcriptionProviderSettings.UsedLLMProvider.CreateProvider(transcriptionProviderSettings.Name, transcriptionProviderSettings.Host, transcriptionProviderSettings.Hostname, transcriptionProviderSettings.Model, HFInferenceProvider.NONE);
}
private static IProvider CreateProvider(this LLMProviders provider, string instanceName, Host host, string hostname, Model model, HFInferenceProvider inferenceProvider, string expertProviderApiParameter = "")
{
try
@ -274,6 +321,11 @@ public static class LLMProvidersExtensions
LLMProviders.SELF_HOSTED => host is not Host.LM_STUDIO,
_ => false,
};
public static bool IsTranscriptionModelProvidedManually(this LLMProviders provider, Host host) => provider switch
{
_ => false,
};
public static bool IsHostNeeded(this LLMProviders provider) => provider switch
{
@ -345,6 +397,7 @@ public static class LLMProvidersExtensions
case Host.OLLAMA:
case Host.LM_STUDIO:
case Host.VLLM:
case Host.WHISPER_CPP:
return true;
}
}

View File

@ -113,6 +113,17 @@ public sealed class ProviderMistral() : BaseProvider(LLMProviders.MISTRAL, "http
return Task.FromResult(Enumerable.Empty<Provider.Model>());
}
/// <inheritdoc />
public override Task<IEnumerable<Provider.Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default)
{
// Source: https://docs.mistral.ai/capabilities/audio_transcription
return Task.FromResult<IEnumerable<Provider.Model>>(
new List<Provider.Model>
{
new("voxtral-mini-latest", "Voxtral Mini Latest"),
});
}
#endregion
private async Task<ModelsResponse> LoadModelList(string? apiKeyProvisional, CancellationToken token)

View File

@ -23,6 +23,8 @@ public class NoProvider : IProvider
public Task<IEnumerable<Model>> GetImageModels(string? apiKeyProvisional = null, CancellationToken token = default) => Task.FromResult<IEnumerable<Model>>([]);
public Task<IEnumerable<Model>> GetEmbeddingModels(string? apiKeyProvisional = null, CancellationToken token = default) => Task.FromResult<IEnumerable<Model>>([]);
public Task<IEnumerable<Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default) => Task.FromResult<IEnumerable<Model>>([]);
public async IAsyncEnumerable<ContentStreamChunk> StreamChatCompletion(Model chatModel, ChatThread chatChatThread, SettingsManager settingsManager, [EnumeratorCancellation] CancellationToken token = default)
{

View File

@ -241,6 +241,14 @@ public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https
return this.LoadModels(["text-embedding-"], token, apiKeyProvisional);
}
/// <inheritdoc />
public override async Task<IEnumerable<Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default)
{
var models = await this.LoadModels(["whisper-", "gpt-"], token, apiKeyProvisional);
return models.Where(model => model.Id.StartsWith("whisper-", StringComparison.InvariantCultureIgnoreCase) ||
model.Id.Contains("-transcribe", StringComparison.InvariantCultureIgnoreCase));
}
#endregion
private async Task<IEnumerable<Model>> LoadModels(string[] prefixes, CancellationToken token, string? apiKeyProvisional = null)

View File

@ -106,6 +106,12 @@ public sealed class ProviderOpenRouter() : BaseProvider(LLMProviders.OPEN_ROUTER
{
return this.LoadEmbeddingModels(token, apiKeyProvisional);
}
/// <inheritdoc />
public override Task<IEnumerable<Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default)
{
return Task.FromResult(Enumerable.Empty<Model>());
}
#endregion

View File

@ -107,6 +107,12 @@ public sealed class ProviderPerplexity() : BaseProvider(LLMProviders.PERPLEXITY,
return Task.FromResult(Enumerable.Empty<Model>());
}
/// <inheritdoc />
public override Task<IEnumerable<Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default)
{
return Task.FromResult(Enumerable.Empty<Model>());
}
#endregion
private Task<IEnumerable<Model>> LoadModels() => Task.FromResult<IEnumerable<Model>>(KNOWN_MODELS);

View File

@ -6,6 +6,7 @@ public enum Host
LM_STUDIO,
LLAMA_CPP,
WHISPER_CPP,
OLLAMA,
VLLM,
}

View File

@ -8,6 +8,7 @@ public static class HostExtensions
Host.LM_STUDIO => "LM Studio",
Host.LLAMA_CPP => "llama.cpp",
Host.WHISPER_CPP => "whisper.cpp",
Host.OLLAMA => "ollama",
Host.VLLM => "vLLM",
@ -24,6 +25,27 @@ public static class HostExtensions
_ => "chat/completions",
};
public static string TranscriptionURL(this Host host) => host switch
{
_ => "audio/transcriptions",
};
public static bool IsChatSupported(this Host host)
{
switch (host)
{
case Host.WHISPER_CPP:
return false;
default:
case Host.OLLAMA:
case Host.VLLM:
case Host.LM_STUDIO:
case Host.LLAMA_CPP:
return true;
}
}
public static bool IsEmbeddingSupported(this Host host)
{
switch (host)
@ -38,4 +60,20 @@ public static class HostExtensions
return false;
}
}
public static bool IsTranscriptionSupported(this Host host)
{
switch (host)
{
case Host.OLLAMA:
case Host.VLLM:
case Host.WHISPER_CPP:
return true;
default:
case Host.LM_STUDIO:
case Host.LLAMA_CPP:
return false;
}
}
}

View File

@ -6,12 +6,15 @@ using System.Text.Json;
using AIStudio.Chat;
using AIStudio.Provider.OpenAI;
using AIStudio.Settings;
using AIStudio.Tools.PluginSystem;
namespace AIStudio.Provider.SelfHosted;
public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvider(LLMProviders.SELF_HOSTED, $"{hostname}{host.BaseURL()}", LOGGER)
{
private static readonly ILogger<ProviderSelfHosted> LOGGER = Program.LOGGER_FACTORY.CreateLogger<ProviderSelfHosted>();
private static string TB(string fallbackEN) => I18N.I.T(fallbackEN, typeof(ProviderSelfHosted).Namespace, nameof(ProviderSelfHosted));
#region Implementation of IProvider
@ -138,6 +141,35 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide
}
}
/// <inheritdoc />
public override Task<IEnumerable<Provider.Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default)
{
try
{
switch (host)
{
case Host.WHISPER_CPP:
return Task.FromResult<IEnumerable<Provider.Model>>(
new List<Provider.Model>
{
new("loaded-model", TB("Model as configured by whisper.cpp")),
});
case Host.OLLAMA:
case Host.VLLM:
return this.LoadModels([], [], token, apiKeyProvisional);
default:
return Task.FromResult(Enumerable.Empty<Provider.Model>());
}
}
catch (Exception e)
{
LOGGER.LogError(e, "Failed to load transcription models from self-hosted provider.");
return Task.FromResult(Enumerable.Empty<Provider.Model>());
}
}
#endregion
private async Task<IEnumerable<Provider.Model>> LoadModels(string[] ignorePhrases, string[] filterPhrases, CancellationToken token, string? apiKeyProvisional = null)

View File

@ -101,6 +101,12 @@ public sealed class ProviderX() : BaseProvider(LLMProviders.X, "https://api.x.ai
return Task.FromResult<IEnumerable<Model>>([]);
}
/// <inheritdoc />
public override Task<IEnumerable<Model>> GetTranscriptionModels(string? apiKeyProvisional = null, CancellationToken token = default)
{
return Task.FromResult(Enumerable.Empty<Model>());
}
#endregion
private async Task<IEnumerable<Model>> LoadModels(string[] prefixes, CancellationToken token, string? apiKeyProvisional = null)

View File

@ -25,6 +25,11 @@ public sealed class Data
/// A collection of embedding providers configured.
/// </summary>
public List<EmbeddingProvider> EmbeddingProviders { get; init; } = [];
/// <summary>
/// A collection of speech providers configured.
/// </summary>
public List<TranscriptionProvider> TranscriptionProviders { get; init; } = [];
/// <summary>
/// A collection of data sources configured.
@ -52,9 +57,14 @@ public sealed class Data
public uint NextProviderNum { get; set; } = 1;
/// <summary>
/// The next embedding number to use.
/// The next embedding provider number to use.
/// </summary>
public uint NextEmbeddingNum { get; set; } = 1;
/// <summary>
/// The next transcription provider number to use.
/// </summary>
public uint NextTranscriptionNum { get; set; } = 1;
/// <summary>
/// The next data source number to use.

View File

@ -14,7 +14,7 @@ public static class PreviewFeaturesExtensions
PreviewFeatures.PRE_PLUGINS_2025 => TB("Plugins: Preview of our plugin system where you can extend the functionality of the app"),
PreviewFeatures.PRE_READ_PDF_2025 => TB("Read PDF: Preview of our PDF reading system where you can read and extract text from PDF files"),
PreviewFeatures.PRE_DOCUMENT_ANALYSIS_2025 => TB("Document Analysis: Preview of our document analysis system where you can analyze and extract information from documents"),
PreviewFeatures.PRE_SPEECH_TO_TEXT_2026 => TB("Speech to Text: Preview of our speech to text system where you can transcribe recordings and audio files into text"),
PreviewFeatures.PRE_SPEECH_TO_TEXT_2026 => TB("Transcription: Preview of our speech to text system where you can transcribe recordings and audio files into text"),
_ => TB("Unknown preview feature")
};

View File

@ -0,0 +1,32 @@
using System.Text.Json.Serialization;
using AIStudio.Provider;
using Host = AIStudio.Provider.SelfHosted.Host;
namespace AIStudio.Settings;
public readonly record struct TranscriptionProvider(
uint Num,
string Id,
string Name,
LLMProviders UsedLLMProvider,
Model Model,
bool IsSelfHosted = false,
string Hostname = "http://localhost:1234",
Host Host = Host.NONE) : ISecretId
{
public override string ToString() => this.Name;
#region Implementation of ISecretId
/// <inheritdoc />
[JsonIgnore]
public string SecretId => this.Id;
/// <inheritdoc />
[JsonIgnore]
public string SecretName => this.Name;
#endregion
}