diff --git a/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs b/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs
index 6b648372..9d2e0792 100644
--- a/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs
+++ b/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs
@@ -80,6 +80,12 @@ public sealed class ProviderAlibabaCloud() : BaseProvider(LLMProviders.ALIBABA_C
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
+
+ ///
+ public override Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
+ {
+ return Task.FromResult(string.Empty);
+ }
///
public override Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)
diff --git a/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs b/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs
index 42268936..2b45cc44 100644
--- a/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs
+++ b/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs
@@ -107,6 +107,12 @@ public sealed class ProviderAnthropic() : BaseProvider(LLMProviders.ANTHROPIC, "
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
+
+ ///
+ public override Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
+ {
+ return Task.FromResult(string.Empty);
+ }
///
public override Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)
diff --git a/app/MindWork AI Studio/Provider/BaseProvider.cs b/app/MindWork AI Studio/Provider/BaseProvider.cs
index c5594087..9801e16b 100644
--- a/app/MindWork AI Studio/Provider/BaseProvider.cs
+++ b/app/MindWork AI Studio/Provider/BaseProvider.cs
@@ -1,4 +1,5 @@
using System.Net;
+using System.Net.Http.Headers;
using System.Runtime.CompilerServices;
using System.Text.Json;
using System.Text.Json.Serialization;
@@ -6,10 +7,15 @@ using System.Text.Json.Serialization;
using AIStudio.Chat;
using AIStudio.Provider.Anthropic;
using AIStudio.Provider.OpenAI;
+using AIStudio.Provider.SelfHosted;
using AIStudio.Settings;
+using AIStudio.Tools.MIME;
using AIStudio.Tools.PluginSystem;
+using AIStudio.Tools.Rust;
using AIStudio.Tools.Services;
+using Host = AIStudio.Provider.SelfHosted.Host;
+
namespace AIStudio.Provider;
///
@@ -89,6 +95,9 @@ public abstract class BaseProvider : IProvider, ISecretId
///
public abstract IAsyncEnumerable StreamImageCompletion(Model imageModel, string promptPositive, string promptNegative = FilterOperator.String.Empty, ImageURL referenceImageURL = default, CancellationToken token = default);
+ ///
+ public abstract Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default);
+
///
public abstract Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default);
@@ -536,6 +545,48 @@ public abstract class BaseProvider : IProvider, ISecretId
streamReader.Dispose();
}
+ protected async Task PerformStandardTranscriptionRequest(RequestedSecret requestedSecret, Model transcriptionModel, string audioFilePath, Host host = Host.NONE, CancellationToken token = default)
+ {
+ try
+ {
+ using var form = new MultipartFormDataContent();
+ var mimeType = Builder.FromFilename(audioFilePath);
+
+ await using var fileStream = File.OpenRead(audioFilePath);
+ using var fileContent = new StreamContent(fileStream);
+ fileContent.Headers.ContentType = new MediaTypeHeaderValue(mimeType);
+
+ form.Add(fileContent, "file", Path.GetFileName(audioFilePath));
+ form.Add(new StringContent(transcriptionModel.Id), "model");
+
+ using var request = new HttpRequestMessage(HttpMethod.Post, host.TranscriptionURL());
+ request.Content = form;
+
+ if(requestedSecret.Success)
+ request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION));
+
+ using var response = await this.httpClient.SendAsync(request, token);
+ var responseBody = response.Content.ReadAsStringAsync(token).Result;
+
+ if (!response.IsSuccessStatusCode)
+ return string.Empty;
+
+ var transcriptionResponse = JsonSerializer.Deserialize(responseBody, JSON_SERIALIZER_OPTIONS);
+ if(transcriptionResponse is null)
+ {
+ this.logger.LogError("Was not able to deserialize the transcription response.");
+ return string.Empty;
+ }
+
+ return transcriptionResponse.Text;
+ }
+ catch (Exception e)
+ {
+ this.logger.LogError("Failed to perform transcription request: '{Message}'.", e.Message);
+ return string.Empty;
+ }
+ }
+
///
/// Parse and convert API parameters from a provided JSON string into a dictionary,
/// optionally merging additional parameters and removing specific keys.
diff --git a/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs b/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs
index b2715f47..39ecd21e 100644
--- a/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs
+++ b/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs
@@ -80,6 +80,12 @@ public sealed class ProviderDeepSeek() : BaseProvider(LLMProviders.DEEP_SEEK, "h
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
+
+ ///
+ public override Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
+ {
+ return Task.FromResult(string.Empty);
+ }
///
public override Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)
diff --git a/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs b/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs
index 9450134d..a3f27a07 100644
--- a/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs
+++ b/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs
@@ -81,6 +81,13 @@ public class ProviderFireworks() : BaseProvider(LLMProviders.FIREWORKS, "https:/
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
+
+ ///
+ public override async Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
+ {
+ var requestedSecret = await RUST_SERVICE.GetAPIKey(this, isTrying: true);
+ return await this.PerformStandardTranscriptionRequest(requestedSecret, transcriptionModel, audioFilePath, token: token);
+ }
///
public override Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)
diff --git a/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs b/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs
index da322942..16686b31 100644
--- a/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs
+++ b/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs
@@ -80,6 +80,13 @@ public sealed class ProviderGWDG() : BaseProvider(LLMProviders.GWDG, "https://ch
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
+
+ ///
+ public override async Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
+ {
+ var requestedSecret = await RUST_SERVICE.GetAPIKey(this, isTrying: true);
+ return await this.PerformStandardTranscriptionRequest(requestedSecret, transcriptionModel, audioFilePath, token: token);
+ }
///
public override async Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)
diff --git a/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs b/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs
index fce1a451..176bbeb5 100644
--- a/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs
+++ b/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs
@@ -82,6 +82,12 @@ public class ProviderGoogle() : BaseProvider(LLMProviders.GOOGLE, "https://gener
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
+ ///
+ public override Task TranscribeAudioAsync(Provider.Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
+ {
+ return Task.FromResult(string.Empty);
+ }
+
///
public override async Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)
{
diff --git a/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs b/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs
index b6e9137a..0bbc616f 100644
--- a/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs
+++ b/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs
@@ -81,6 +81,12 @@ public class ProviderGroq() : BaseProvider(LLMProviders.GROQ, "https://api.groq.
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
+
+ ///
+ public override Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
+ {
+ return Task.FromResult(string.Empty);
+ }
///
public override Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)
diff --git a/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs b/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs
index 213bf075..cc52cd20 100644
--- a/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs
+++ b/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs
@@ -80,6 +80,12 @@ public sealed class ProviderHelmholtz() : BaseProvider(LLMProviders.HELMHOLTZ, "
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
+
+ ///
+ public override Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
+ {
+ return Task.FromResult(string.Empty);
+ }
///
public override async Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)
diff --git a/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs b/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs
index 794b4f42..a9778988 100644
--- a/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs
+++ b/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs
@@ -85,6 +85,12 @@ public sealed class ProviderHuggingFace : BaseProvider
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
+
+ ///
+ public override Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
+ {
+ return Task.FromResult(string.Empty);
+ }
///
public override Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)
diff --git a/app/MindWork AI Studio/Provider/IProvider.cs b/app/MindWork AI Studio/Provider/IProvider.cs
index 4ae6dc6c..5c390074 100644
--- a/app/MindWork AI Studio/Provider/IProvider.cs
+++ b/app/MindWork AI Studio/Provider/IProvider.cs
@@ -50,6 +50,16 @@ public interface IProvider
/// The image completion stream.
public IAsyncEnumerable StreamImageCompletion(Model imageModel, string promptPositive, string promptNegative = FilterOperator.String.Empty, ImageURL referenceImageURL = default, CancellationToken token = default);
+ ///
+ /// Transcribe an audio file.
+ ///
+ /// The model to use for transcription.
+ /// The audio file path.
+ /// The settings manager instance to use.
+ /// The cancellation token.
+ /// >The transcription result.
+ public Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default);
+
///
/// Load all possible text models that can be used with this provider.
///
diff --git a/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs b/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs
index 598f7016..522b9e4d 100644
--- a/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs
+++ b/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs
@@ -81,6 +81,13 @@ public sealed class ProviderMistral() : BaseProvider(LLMProviders.MISTRAL, "http
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
+
+ ///
+ public override async Task TranscribeAudioAsync(Provider.Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
+ {
+ var requestedSecret = await RUST_SERVICE.GetAPIKey(this, isTrying: true);
+ return await this.PerformStandardTranscriptionRequest(requestedSecret, transcriptionModel, audioFilePath, token: token);
+ }
///
public override async Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)
diff --git a/app/MindWork AI Studio/Provider/NoProvider.cs b/app/MindWork AI Studio/Provider/NoProvider.cs
index 4f92e5c9..a650ac34 100644
--- a/app/MindWork AI Studio/Provider/NoProvider.cs
+++ b/app/MindWork AI Studio/Provider/NoProvider.cs
@@ -38,6 +38,8 @@ public class NoProvider : IProvider
yield break;
}
+ public Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default) => Task.FromResult(string.Empty);
+
public IReadOnlyCollection GetModelCapabilities(Model model) => [ Capability.NONE ];
#endregion
diff --git a/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs b/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs
index d06d6e15..76521cd4 100644
--- a/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs
+++ b/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs
@@ -217,6 +217,13 @@ public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
+
+ ///
+ public override async Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
+ {
+ var requestedSecret = await RUST_SERVICE.GetAPIKey(this, isTrying: true);
+ return await this.PerformStandardTranscriptionRequest(requestedSecret, transcriptionModel, audioFilePath, token: token);
+ }
///
public override async Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)
diff --git a/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs b/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs
index d6945799..79d02de6 100644
--- a/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs
+++ b/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs
@@ -88,6 +88,12 @@ public sealed class ProviderOpenRouter() : BaseProvider(LLMProviders.OPEN_ROUTER
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
+
+ ///
+ public override Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
+ {
+ return Task.FromResult(string.Empty);
+ }
///
public override Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)
diff --git a/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs b/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs
index 0616f2d9..38c6f9b7 100644
--- a/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs
+++ b/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs
@@ -88,6 +88,12 @@ public sealed class ProviderPerplexity() : BaseProvider(LLMProviders.PERPLEXITY,
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
+
+ ///
+ public override Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
+ {
+ return Task.FromResult(string.Empty);
+ }
///
public override Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)
diff --git a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs
index a61a3b26..8b472c09 100644
--- a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs
+++ b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs
@@ -88,6 +88,13 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
+ ///
+ public override async Task TranscribeAudioAsync(Provider.Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
+ {
+ var requestedSecret = await RUST_SERVICE.GetAPIKey(this, isTrying: true);
+ return await this.PerformStandardTranscriptionRequest(requestedSecret, transcriptionModel, audioFilePath, host, token);
+ }
+
public override async Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)
{
try
diff --git a/app/MindWork AI Studio/Provider/TranscriptionResponse.cs b/app/MindWork AI Studio/Provider/TranscriptionResponse.cs
new file mode 100644
index 00000000..7ba1f587
--- /dev/null
+++ b/app/MindWork AI Studio/Provider/TranscriptionResponse.cs
@@ -0,0 +1,3 @@
+namespace AIStudio.Provider;
+
+public sealed record TranscriptionResponse(string Text);
diff --git a/app/MindWork AI Studio/Provider/X/ProviderX.cs b/app/MindWork AI Studio/Provider/X/ProviderX.cs
index 92aad1eb..373a3b58 100644
--- a/app/MindWork AI Studio/Provider/X/ProviderX.cs
+++ b/app/MindWork AI Studio/Provider/X/ProviderX.cs
@@ -81,7 +81,13 @@ public sealed class ProviderX() : BaseProvider(LLMProviders.X, "https://api.x.ai
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
-
+
+ ///
+ public override Task TranscribeAudioAsync(Model transcriptionModel, string audioFilePath, SettingsManager settingsManager, CancellationToken token = default)
+ {
+ return Task.FromResult(string.Empty);
+ }
+
///
public override async Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default)
{