Implemented the transcription API

This commit is contained in:
Thorsten Sommer 2026-01-09 19:02:57 +01:00
parent 31694339b1
commit f75c5a21b8
Signed by: tsommer
GPG Key ID: 371BBA77A02C0108
19 changed files with 162 additions and 1 deletions

View File

@ -80,6 +80,12 @@ public sealed class ProviderAlibabaCloud() : BaseProvider(LLMProviders.ALIBABA_C
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
/// <inheritdoc />
public override Task<string> TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
{
return Task.FromResult(string.Empty);
}
/// <inheritdoc />
public override Task<IEnumerable<Model>> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)

View File

@ -107,6 +107,12 @@ public sealed class ProviderAnthropic() : BaseProvider(LLMProviders.ANTHROPIC, "
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
/// <inheritdoc />
public override Task<string> TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
{
return Task.FromResult(string.Empty);
}
/// <inheritdoc />
public override Task<IEnumerable<Model>> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)

View File

@ -1,4 +1,5 @@
using System.Net;
using System.Net.Http.Headers;
using System.Runtime.CompilerServices;
using System.Text.Json;
using System.Text.Json.Serialization;
@ -6,10 +7,15 @@ using System.Text.Json.Serialization;
using AIStudio.Chat;
using AIStudio.Provider.Anthropic;
using AIStudio.Provider.OpenAI;
using AIStudio.Provider.SelfHosted;
using AIStudio.Settings;
using AIStudio.Tools.MIME;
using AIStudio.Tools.PluginSystem;
using AIStudio.Tools.Rust;
using AIStudio.Tools.Services;
using Host = AIStudio.Provider.SelfHosted.Host;
namespace AIStudio.Provider;
/// <summary>
@ -89,6 +95,9 @@ public abstract class BaseProvider : IProvider, ISecretId
/// <inheritdoc />
public abstract IAsyncEnumerable<ImageURL> StreamImageCompletion(Model imageModel, string promptPositive, string promptNegative = FilterOperator.String.Empty, ImageURL referenceImageURL = default, CancellationToken token = default);
/// <inheritdoc />
public abstract Task<string> TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default);
/// <inheritdoc />
public abstract Task<IEnumerable<Model>> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default);
@ -536,6 +545,48 @@ public abstract class BaseProvider : IProvider, ISecretId
streamReader.Dispose();
}
protected async Task<string> PerformStandardTranscriptionRequest(RequestedSecret requestedSecret, Model transcriptionModel, string audioFilePath, Host host = Host.NONE, CancellationToken token = default)
{
try
{
using var form = new MultipartFormDataContent();
var mimeType = Builder.FromFilename(audioFilePath);
await using var fileStream = File.OpenRead(audioFilePath);
using var fileContent = new StreamContent(fileStream);
fileContent.Headers.ContentType = new MediaTypeHeaderValue(mimeType);
form.Add(fileContent, "file", Path.GetFileName(audioFilePath));
form.Add(new StringContent(transcriptionModel.Id), "model");
using var request = new HttpRequestMessage(HttpMethod.Post, host.TranscriptionURL());
request.Content = form;
if(requestedSecret.Success)
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION));
using var response = await this.httpClient.SendAsync(request, token);
var responseBody = response.Content.ReadAsStringAsync(token).Result;
if (!response.IsSuccessStatusCode)
return string.Empty;
var transcriptionResponse = JsonSerializer.Deserialize<TranscriptionResponse>(responseBody, JSON_SERIALIZER_OPTIONS);
if(transcriptionResponse is null)
{
this.logger.LogError("Was not able to deserialize the transcription response.");
return string.Empty;
}
return transcriptionResponse.Text;
}
catch (Exception e)
{
this.logger.LogError("Failed to perform transcription request: '{Message}'.", e.Message);
return string.Empty;
}
}
/// <summary>
/// Parse and convert API parameters from a provided JSON string into a dictionary,
/// optionally merging additional parameters and removing specific keys.

View File

@ -80,6 +80,12 @@ public sealed class ProviderDeepSeek() : BaseProvider(LLMProviders.DEEP_SEEK, "h
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
/// <inheritdoc />
public override Task<string> TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
{
return Task.FromResult(string.Empty);
}
/// <inheritdoc />
public override Task<IEnumerable<Model>> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)

View File

@ -81,6 +81,13 @@ public class ProviderFireworks() : BaseProvider(LLMProviders.FIREWORKS, "https:/
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
/// <inheritdoc />
public override async Task<string> TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
{
var requestedSecret = await RUST_SERVICE.GetAPIKey(this, isTrying: true);
return await this.PerformStandardTranscriptionRequest(requestedSecret, transcriptionModel, audioFilePath, token: token);
}
/// <inheritdoc />
public override Task<IEnumerable<Model>> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)

View File

@ -80,6 +80,13 @@ public sealed class ProviderGWDG() : BaseProvider(LLMProviders.GWDG, "https://ch
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
/// <inheritdoc />
public override async Task<string> TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
{
var requestedSecret = await RUST_SERVICE.GetAPIKey(this, isTrying: true);
return await this.PerformStandardTranscriptionRequest(requestedSecret, transcriptionModel, audioFilePath, token: token);
}
/// <inheritdoc />
public override async Task<IEnumerable<Model>> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)

View File

@ -82,6 +82,12 @@ public class ProviderGoogle() : BaseProvider(LLMProviders.GOOGLE, "https://gener
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
/// <inheritdoc />
public override Task<string> TranscribeAudioAsync(Provider.Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
{
return Task.FromResult(string.Empty);
}
/// <inheritdoc />
public override async Task<IEnumerable<Provider.Model>> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)
{

View File

@ -81,6 +81,12 @@ public class ProviderGroq() : BaseProvider(LLMProviders.GROQ, "https://api.groq.
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
/// <inheritdoc />
public override Task<string> TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
{
return Task.FromResult(string.Empty);
}
/// <inheritdoc />
public override Task<IEnumerable<Model>> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)

View File

@ -80,6 +80,12 @@ public sealed class ProviderHelmholtz() : BaseProvider(LLMProviders.HELMHOLTZ, "
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
/// <inheritdoc />
public override Task<string> TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
{
return Task.FromResult(string.Empty);
}
/// <inheritdoc />
public override async Task<IEnumerable<Model>> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)

View File

@ -85,6 +85,12 @@ public sealed class ProviderHuggingFace : BaseProvider
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
/// <inheritdoc />
public override Task<string> TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
{
return Task.FromResult(string.Empty);
}
/// <inheritdoc />
public override Task<IEnumerable<Model>> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)

View File

@ -50,6 +50,16 @@ public interface IProvider
/// <returns>The image completion stream.</returns>
public IAsyncEnumerable<ImageURL> StreamImageCompletion(Model imageModel, string promptPositive, string promptNegative = FilterOperator.String.Empty, ImageURL referenceImageURL = default, CancellationToken token = default);
/// <summary>
/// Transcribe an audio file.
/// </summary>
/// <param name="transcriptionModel">The model to use for transcription.</param>
/// <param name="audioFilePath">The audio file path.</param>
/// <param name="settingsManager">The settings manager instance to use.</param>
/// <param name="token">The cancellation token.</param>
/// <returns>>The transcription result.</returns>
public Task<string> TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default);
/// <summary>
/// Load all possible text models that can be used with this provider.
/// </summary>

View File

@ -81,6 +81,13 @@ public sealed class ProviderMistral() : BaseProvider(LLMProviders.MISTRAL, "http
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
/// <inheritdoc />
public override async Task<string> TranscribeAudioAsync(Provider.Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
{
var requestedSecret = await RUST_SERVICE.GetAPIKey(this, isTrying: true);
return await this.PerformStandardTranscriptionRequest(requestedSecret, transcriptionModel, audioFilePath, token: token);
}
/// <inheritdoc />
public override async Task<IEnumerable<Provider.Model>> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)

View File

@ -38,6 +38,8 @@ public class NoProvider : IProvider
yield break;
}
public Task<string> TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default) => Task.FromResult(string.Empty);
public IReadOnlyCollection<Capability> GetModelCapabilities(Model model) => [ Capability.NONE ];
#endregion

View File

@ -217,6 +217,13 @@ public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
/// <inheritdoc />
public override async Task<string> TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
{
var requestedSecret = await RUST_SERVICE.GetAPIKey(this, isTrying: true);
return await this.PerformStandardTranscriptionRequest(requestedSecret, transcriptionModel, audioFilePath, token: token);
}
/// <inheritdoc />
public override async Task<IEnumerable<Model>> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)

View File

@ -88,6 +88,12 @@ public sealed class ProviderOpenRouter() : BaseProvider(LLMProviders.OPEN_ROUTER
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
/// <inheritdoc />
public override Task<string> TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
{
return Task.FromResult(string.Empty);
}
/// <inheritdoc />
public override Task<IEnumerable<Model>> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)

View File

@ -88,6 +88,12 @@ public sealed class ProviderPerplexity() : BaseProvider(LLMProviders.PERPLEXITY,
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
/// <inheritdoc />
public override Task<string> TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
{
return Task.FromResult(string.Empty);
}
/// <inheritdoc />
public override Task<IEnumerable<Model>> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)

View File

@ -88,6 +88,13 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
/// <inheritdoc />
public override async Task<string> TranscribeAudioAsync(Provider.Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
{
var requestedSecret = await RUST_SERVICE.GetAPIKey(this, isTrying: true);
return await this.PerformStandardTranscriptionRequest(requestedSecret, transcriptionModel, audioFilePath, host, token);
}
public override async Task<IEnumerable<Provider.Model>> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)
{
try

View File

@ -0,0 +1,3 @@
namespace AIStudio.Provider;
public sealed record TranscriptionResponse(string Text);

View File

@ -81,7 +81,13 @@ public sealed class ProviderX() : BaseProvider(LLMProviders.X, "https://api.x.ai
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
/// <inheritdoc />
public override Task<string> TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
{
return Task.FromResult(string.Empty);
}
/// <inheritdoc />
public override async Task<IEnumerable<Model>> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)
{