diff --git a/app/MindWork AI Studio.sln.DotSettings b/app/MindWork AI Studio.sln.DotSettings index faaedb6b..86bd9eb3 100644 --- a/app/MindWork AI Studio.sln.DotSettings +++ b/app/MindWork AI Studio.sln.DotSettings @@ -6,6 +6,7 @@ GWDG HF IERI + IMIME LLM LM MSG @@ -18,10 +19,12 @@ URL I18N True + True True True True True + True True True True \ No newline at end of file diff --git a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua index 5bbe39b8..c2429c15 100644 --- a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua +++ b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua @@ -2323,6 +2323,12 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VISION::T428040679"] = "Content creation" -- Useful assistants UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VISION::T586430036"] = "Useful assistants" +-- Stop recording and start transcription +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T224155287"] = "Stop recording and start transcription" + +-- Start recording your voice for a transcription +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T2372624045"] = "Start recording your voice for a transcription" + -- Are you sure you want to delete the chat '{0}' in the workspace '{1}'? UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::WORKSPACES::T1016188706"] = "Are you sure you want to delete the chat '{0}' in the workspace '{1}'?" @@ -5368,6 +5374,9 @@ UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T1848 -- Plugins: Preview of our plugin system where you can extend the functionality of the app UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T2056842933"] = "Plugins: Preview of our plugin system where you can extend the functionality of the app" +-- Speech to Text: Preview of our speech to text system where you can transcribe recordings and audio files into text +UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T221133923"] = "Speech to Text: Preview of our speech to text system where you can transcribe recordings and audio files into text" + -- RAG: Preview of our RAG implementation where you can refer your files or integrate enterprise data within your company UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T2708939138"] = "RAG: Preview of our RAG implementation where you can refer your files or integrate enterprise data within your company" diff --git a/app/MindWork AI Studio/Chat/IImageSourceExtensions.cs b/app/MindWork AI Studio/Chat/IImageSourceExtensions.cs index 41706047..c6461643 100644 --- a/app/MindWork AI Studio/Chat/IImageSourceExtensions.cs +++ b/app/MindWork AI Studio/Chat/IImageSourceExtensions.cs @@ -1,3 +1,4 @@ +using AIStudio.Tools.MIME; using AIStudio.Tools.PluginSystem; namespace AIStudio.Chat; @@ -6,7 +7,7 @@ public static class IImageSourceExtensions { private static string TB(string fallbackEN) => I18N.I.T(fallbackEN, typeof(IImageSourceExtensions).Namespace, nameof(IImageSourceExtensions)); - public static string DetermineMimeType(this IImageSource image) + public static MIMEType DetermineMimeType(this IImageSource image) { switch (image.SourceType) { @@ -18,13 +19,11 @@ public static class IImageSourceExtensions { var mimeEnd = base64Data.IndexOf(';'); if (mimeEnd > 5) - { - return base64Data[5..mimeEnd]; - } + return Builder.FromTextRepresentation(base64Data[5..mimeEnd]); } // Fallback: - return "application/octet-stream"; + return Builder.Create().UseApplication().UseSubtype(ApplicationSubtype.OCTET_STREAM).Build(); } case ContentImageSource.URL: @@ -32,38 +31,36 @@ public static class IImageSourceExtensions // Try to detect the mime type from the URL extension: var uri = new Uri(image.Source); var extension = Path.GetExtension(uri.AbsolutePath).ToLowerInvariant(); - return extension switch - { - ".png" => "image/png", - ".jpg" or ".jpeg" => "image/jpeg", - ".gif" => "image/gif", - ".bmp" => "image/bmp", - ".webp" => "image/webp", - - _ => "application/octet-stream" - }; + return DeriveMIMETypeFromExtension(extension); } case ContentImageSource.LOCAL_PATH: { var extension = Path.GetExtension(image.Source).ToLowerInvariant(); - return extension switch - { - ".png" => "image/png", - ".jpg" or ".jpeg" => "image/jpeg", - ".gif" => "image/gif", - ".bmp" => "image/bmp", - ".webp" => "image/webp", - - _ => "application/octet-stream" - }; + return DeriveMIMETypeFromExtension(extension); } default: - return "application/octet-stream"; + return Builder.Create().UseApplication().UseSubtype(ApplicationSubtype.OCTET_STREAM).Build(); } } - + + private static MIMEType DeriveMIMETypeFromExtension(string extension) + { + var imageBuilder = Builder.Create().UseImage(); + return extension switch + { + ".png" => imageBuilder.UseSubtype(ImageSubtype.PNG).Build(), + ".jpg" or ".jpeg" => imageBuilder.UseSubtype(ImageSubtype.JPEG).Build(), + ".gif" => imageBuilder.UseSubtype(ImageSubtype.GIF).Build(), + ".webp" => imageBuilder.UseSubtype(ImageSubtype.WEBP).Build(), + ".tiff" or ".tif" => imageBuilder.UseSubtype(ImageSubtype.TIFF).Build(), + ".heic" or ".heif" => imageBuilder.UseSubtype(ImageSubtype.HEIC).Build(), + + _ => Builder.Create().UseApplication().UseSubtype(ApplicationSubtype.OCTET_STREAM).Build() + }; + } + /// /// Read the image content as a base64 string. /// diff --git a/app/MindWork AI Studio/Components/VoiceRecorder.razor b/app/MindWork AI Studio/Components/VoiceRecorder.razor new file mode 100644 index 00000000..a3866719 --- /dev/null +++ b/app/MindWork AI Studio/Components/VoiceRecorder.razor @@ -0,0 +1,16 @@ +@using AIStudio.Settings.DataModel + +@namespace AIStudio.Components +@inherits MSGComponentBase + +@if (PreviewFeatures.PRE_SPEECH_TO_TEXT_2026.IsEnabled(this.SettingsManager)) +{ + + + +} diff --git a/app/MindWork AI Studio/Components/VoiceRecorder.razor.cs b/app/MindWork AI Studio/Components/VoiceRecorder.razor.cs new file mode 100644 index 00000000..3cfa787b --- /dev/null +++ b/app/MindWork AI Studio/Components/VoiceRecorder.razor.cs @@ -0,0 +1,197 @@ +using AIStudio.Tools.MIME; +using AIStudio.Tools.Services; + +using Microsoft.AspNetCore.Components; + +namespace AIStudio.Components; + +public partial class VoiceRecorder : MSGComponentBase +{ + [Inject] + private ILogger Logger { get; init; } = null!; + + [Inject] + private IJSRuntime JsRuntime { get; init; } = null!; + + [Inject] + private RustService RustService { get; init; } = null!; + + private uint numReceivedChunks; + private bool isRecording; + private FileStream? currentRecordingStream; + private string? currentRecordingPath; + private string? currentRecordingMimeType; + private DotNetObjectReference? dotNetReference; + + private string Tooltip => this.isRecording ? T("Stop recording and start transcription") : T("Start recording your voice for a transcription"); + + private async Task OnRecordingToggled(bool toggled) + { + if (toggled) + { + var mimeTypes = GetPreferredMimeTypes( + Builder.Create().UseAudio().UseSubtype(AudioSubtype.OGG).Build(), + Builder.Create().UseAudio().UseSubtype(AudioSubtype.AAC).Build(), + Builder.Create().UseAudio().UseSubtype(AudioSubtype.MP3).Build(), + Builder.Create().UseAudio().UseSubtype(AudioSubtype.AIFF).Build(), + Builder.Create().UseAudio().UseSubtype(AudioSubtype.WAV).Build(), + Builder.Create().UseAudio().UseSubtype(AudioSubtype.FLAC).Build() + ); + + this.Logger.LogInformation("Starting audio recording with preferred MIME types: '{PreferredMimeTypes}'.", string.Join(", ", mimeTypes)); + + // Create a DotNetObjectReference to pass to JavaScript: + this.dotNetReference = DotNetObjectReference.Create(this); + + // Initialize the file stream for writing chunks: + await this.InitializeRecordingStream(); + + var mimeTypeStrings = mimeTypes.ToStringArray(); + var actualMimeType = await this.JsRuntime.InvokeAsync("audioRecorder.start", this.dotNetReference, mimeTypeStrings); + + // Store the MIME type for later use: + this.currentRecordingMimeType = actualMimeType; + + this.Logger.LogInformation("Audio recording started with MIME type: '{ActualMimeType}'.", actualMimeType); + this.isRecording = true; + } + else + { + var result = await this.JsRuntime.InvokeAsync("audioRecorder.stop"); + if (result.ChangedMimeType) + this.Logger.LogWarning("The recorded audio MIME type was changed to '{ResultMimeType}'.", result.MimeType); + + // Close and finalize the recording stream: + await this.FinalizeRecordingStream(); + + this.isRecording = false; + this.StateHasChanged(); + } + } + + private static MIMEType[] GetPreferredMimeTypes(params MIMEType[] mimeTypes) + { + // Default list if no parameters provided: + if (mimeTypes.Length is 0) + { + var audioBuilder = Builder.Create().UseAudio(); + return + [ + audioBuilder.UseSubtype(AudioSubtype.WEBM).Build(), + audioBuilder.UseSubtype(AudioSubtype.OGG).Build(), + audioBuilder.UseSubtype(AudioSubtype.MP4).Build(), + audioBuilder.UseSubtype(AudioSubtype.MPEG).Build(), + ]; + } + + return mimeTypes; + } + + private async Task InitializeRecordingStream() + { + this.numReceivedChunks = 0; + var dataDirectory = await this.RustService.GetDataDirectory(); + var recordingDirectory = Path.Combine(dataDirectory, "audioRecordings"); + if (!Directory.Exists(recordingDirectory)) + Directory.CreateDirectory(recordingDirectory); + + var fileName = $"recording_{DateTime.UtcNow:yyyyMMdd_HHmmss}.audio"; + this.currentRecordingPath = Path.Combine(recordingDirectory, fileName); + this.currentRecordingStream = new FileStream(this.currentRecordingPath, FileMode.Create, FileAccess.Write, FileShare.None, bufferSize: 8192, useAsync: true); + + this.Logger.LogInformation("Initialized audio recording stream: '{RecordingPath}'.", this.currentRecordingPath); + } + + [JSInvokable] + public async Task OnAudioChunkReceived(byte[] chunkBytes) + { + if (this.currentRecordingStream is null) + { + this.Logger.LogWarning("Received audio chunk but no recording stream is active."); + return; + } + + try + { + this.numReceivedChunks++; + await this.currentRecordingStream.WriteAsync(chunkBytes); + await this.currentRecordingStream.FlushAsync(); + + this.Logger.LogDebug("Wrote {ByteCount} bytes to recording stream.", chunkBytes.Length); + } + catch (Exception ex) + { + this.Logger.LogError(ex, "Error writing audio chunk to stream."); + } + } + + private async Task FinalizeRecordingStream() + { + if (this.currentRecordingStream is not null) + { + await this.currentRecordingStream.FlushAsync(); + await this.currentRecordingStream.DisposeAsync(); + this.currentRecordingStream = null; + + // Rename the file with the correct extension based on MIME type: + if (this.currentRecordingPath is not null && this.currentRecordingMimeType is not null) + { + var extension = GetFileExtension(this.currentRecordingMimeType); + var newPath = Path.ChangeExtension(this.currentRecordingPath, extension); + + if (File.Exists(this.currentRecordingPath)) + { + File.Move(this.currentRecordingPath, newPath, overwrite: true); + this.Logger.LogInformation("Finalized audio recording over {NumChunks} streamed audio chunks to the file '{RecordingPath}'.", this.numReceivedChunks, newPath); + } + } + } + + this.currentRecordingPath = null; + this.currentRecordingMimeType = null; + + // Dispose the .NET reference: + this.dotNetReference?.Dispose(); + this.dotNetReference = null; + } + + private static string GetFileExtension(string mimeType) + { + var baseMimeType = mimeType.Split(';')[0].Trim().ToLowerInvariant(); + return baseMimeType switch + { + "audio/webm" => ".webm", + "audio/ogg" => ".ogg", + "audio/mp4" => ".m4a", + "audio/mpeg" => ".mp3", + "audio/wav" => ".wav", + "audio/x-wav" => ".wav", + _ => ".audio" // Fallback + }; + } + + private sealed class AudioRecordingResult + { + public string MimeType { get; init; } = string.Empty; + + public bool ChangedMimeType { get; init; } + } + + #region Overrides of MSGComponentBase + + protected override void DisposeResources() + { + // Clean up recording resources if still active: + if (this.currentRecordingStream is not null) + { + this.currentRecordingStream.Dispose(); + this.currentRecordingStream = null; + } + + this.dotNetReference?.Dispose(); + this.dotNetReference = null; + base.DisposeResources(); + } + + #endregion +} diff --git a/app/MindWork AI Studio/Layout/MainLayout.razor b/app/MindWork AI Studio/Layout/MainLayout.razor index 23937719..908411f9 100644 --- a/app/MindWork AI Studio/Layout/MainLayout.razor +++ b/app/MindWork AI Studio/Layout/MainLayout.razor @@ -1,4 +1,6 @@ @using AIStudio.Settings.DataModel +@using AIStudio.Components + @using Microsoft.AspNetCore.Components.Routing @using MudBlazor @@ -20,12 +22,20 @@ } + + + + + + + + } else { - + @foreach (var navBarItem in this.navItems) { @@ -41,6 +51,14 @@ } } + + + + + + + + } } diff --git a/app/MindWork AI Studio/Layout/MainLayout.razor.cs b/app/MindWork AI Studio/Layout/MainLayout.razor.cs index 064313cf..fc89f248 100644 --- a/app/MindWork AI Studio/Layout/MainLayout.razor.cs +++ b/app/MindWork AI Studio/Layout/MainLayout.razor.cs @@ -341,7 +341,7 @@ public partial class MainLayout : LayoutComponentBase, IMessageBusReceiver, ILan await this.MessageBus.SendMessage(this, Event.COLOR_THEME_CHANGED); this.StateHasChanged(); } - + #region Implementation of IDisposable public void Dispose() diff --git a/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua b/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua index 3e95cd2a..e2ad8227 100644 --- a/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua +++ b/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua @@ -2325,6 +2325,12 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VISION::T428040679"] = "Erstellung von In -- Useful assistants UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VISION::T586430036"] = "Nützliche Assistenten" +-- Stop recording and start transcription +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T224155287"] = "Aufnahme beenden und Transkription starten" + +-- Start recording your voice for a transcription +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T2372624045"] = "Beginnen Sie mit der Aufnahme Ihrer Stimme für eine Transkription" + -- Are you sure you want to delete the chat '{0}' in the workspace '{1}'? UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::WORKSPACES::T1016188706"] = "Möchten Sie den Chat „{0}“ im Arbeitsbereich „{1}“ wirklich löschen?" @@ -5370,6 +5376,9 @@ UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T1848 -- Plugins: Preview of our plugin system where you can extend the functionality of the app UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T2056842933"] = "Plugins: Vorschau auf unser Pluginsystems, mit dem Sie die Funktionalität der App erweitern können" +-- Speech to Text: Preview of our speech to text system where you can transcribe recordings and audio files into text +UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T221133923"] = "Sprache zu Text: Vorschau unseres Sprache-zu-Text-Systems, mit dem Sie Aufnahmen und Audiodateien in Text transkribieren können." + -- RAG: Preview of our RAG implementation where you can refer your files or integrate enterprise data within your company UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T2708939138"] = "RAG: Vorschau auf unsere RAG-Implementierung, mit der Sie auf ihre Dateien zugreifen oder Unternehmensdaten in ihrem Unternehmen integrieren können" diff --git a/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua b/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua index 9ea4cc1b..1057ad86 100644 --- a/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua +++ b/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua @@ -2325,6 +2325,12 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VISION::T428040679"] = "Content creation" -- Useful assistants UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VISION::T586430036"] = "Useful assistants" +-- Stop recording and start transcription +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T224155287"] = "Stop recording and start transcription" + +-- Start recording your voice for a transcription +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T2372624045"] = "Start recording your voice for a transcription" + -- Are you sure you want to delete the chat '{0}' in the workspace '{1}'? UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::WORKSPACES::T1016188706"] = "Are you sure you want to delete the chat '{0}' in the workspace '{1}'?" @@ -5370,6 +5376,9 @@ UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T1848 -- Plugins: Preview of our plugin system where you can extend the functionality of the app UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T2056842933"] = "Plugins: Preview of our plugin system where you can extend the functionality of the app" +-- Speech to Text: Preview of our speech to text system where you can transcribe recordings and audio files into text +UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T221133923"] = "Speech to Text: Preview of our speech to text system where you can transcribe recordings and audio files into text" + -- RAG: Preview of our RAG implementation where you can refer your files or integrate enterprise data within your company UI_TEXT_CONTENT["AISTUDIO::SETTINGS::DATAMODEL::PREVIEWFEATURESEXTENSIONS::T2708939138"] = "RAG: Preview of our RAG implementation where you can refer your files or integrate enterprise data within your company" diff --git a/app/MindWork AI Studio/Program.cs b/app/MindWork AI Studio/Program.cs index cc185180..fa7927b1 100644 --- a/app/MindWork AI Studio/Program.cs +++ b/app/MindWork AI Studio/Program.cs @@ -83,7 +83,6 @@ internal sealed class Program } var builder = WebApplication.CreateBuilder(); - builder.WebHost.ConfigureKestrel(kestrelServerOptions => { kestrelServerOptions.ConfigureEndpointDefaults(listenOptions => diff --git a/app/MindWork AI Studio/Settings/DataModel/PreviewFeatures.cs b/app/MindWork AI Studio/Settings/DataModel/PreviewFeatures.cs index 49aad8d0..d74898dd 100644 --- a/app/MindWork AI Studio/Settings/DataModel/PreviewFeatures.cs +++ b/app/MindWork AI Studio/Settings/DataModel/PreviewFeatures.cs @@ -12,4 +12,5 @@ public enum PreviewFeatures PRE_PLUGINS_2025, PRE_READ_PDF_2025, PRE_DOCUMENT_ANALYSIS_2025, + PRE_SPEECH_TO_TEXT_2026, } \ No newline at end of file diff --git a/app/MindWork AI Studio/Settings/DataModel/PreviewFeaturesExtensions.cs b/app/MindWork AI Studio/Settings/DataModel/PreviewFeaturesExtensions.cs index e80495b2..0433119c 100644 --- a/app/MindWork AI Studio/Settings/DataModel/PreviewFeaturesExtensions.cs +++ b/app/MindWork AI Studio/Settings/DataModel/PreviewFeaturesExtensions.cs @@ -14,6 +14,7 @@ public static class PreviewFeaturesExtensions PreviewFeatures.PRE_PLUGINS_2025 => TB("Plugins: Preview of our plugin system where you can extend the functionality of the app"), PreviewFeatures.PRE_READ_PDF_2025 => TB("Read PDF: Preview of our PDF reading system where you can read and extract text from PDF files"), PreviewFeatures.PRE_DOCUMENT_ANALYSIS_2025 => TB("Document Analysis: Preview of our document analysis system where you can analyze and extract information from documents"), + PreviewFeatures.PRE_SPEECH_TO_TEXT_2026 => TB("Speech to Text: Preview of our speech to text system where you can transcribe recordings and audio files into text"), _ => TB("Unknown preview feature") }; diff --git a/app/MindWork AI Studio/Settings/DataModel/PreviewVisibilityExtensions.cs b/app/MindWork AI Studio/Settings/DataModel/PreviewVisibilityExtensions.cs index bd648b24..53612acc 100644 --- a/app/MindWork AI Studio/Settings/DataModel/PreviewVisibilityExtensions.cs +++ b/app/MindWork AI Studio/Settings/DataModel/PreviewVisibilityExtensions.cs @@ -21,6 +21,7 @@ public static class PreviewVisibilityExtensions { features.Add(PreviewFeatures.PRE_RAG_2024); features.Add(PreviewFeatures.PRE_DOCUMENT_ANALYSIS_2025); + features.Add(PreviewFeatures.PRE_SPEECH_TO_TEXT_2026); } if (visibility >= PreviewVisibility.EXPERIMENTAL) diff --git a/app/MindWork AI Studio/Tools/MIME/ApplicationBuilder.cs b/app/MindWork AI Studio/Tools/MIME/ApplicationBuilder.cs new file mode 100644 index 00000000..2f452274 --- /dev/null +++ b/app/MindWork AI Studio/Tools/MIME/ApplicationBuilder.cs @@ -0,0 +1,67 @@ +namespace AIStudio.Tools.MIME; + +public class ApplicationBuilder : ISubtype +{ + private const BaseType BASE_TYPE = BaseType.APPLICATION; + + private ApplicationBuilder() + { + } + + public static ApplicationBuilder Create() => new(); + + private ApplicationSubtype subtype; + + public ApplicationBuilder UseSubtype(string subType) + { + this.subtype = subType.ToLowerInvariant() switch + { + "vnd.ms-excel" => ApplicationSubtype.EXCEL_OLD, + "vnd.ms-word" => ApplicationSubtype.WORD_OLD, + "vnd.ms-powerpoint" => ApplicationSubtype.POWERPOINT_OLD, + + "vnd.openxmlformats-officedocument.spreadsheetml.sheet" => ApplicationSubtype.EXCEL, + "vnd.openxmlformats-officedocument.wordprocessingml.document" => ApplicationSubtype.WORD, + "vnd.openxmlformats-officedocument.presentationml.presentation" => ApplicationSubtype.POWERPOINT, + + "octet-stream" => ApplicationSubtype.OCTET_STREAM, + + "json" => ApplicationSubtype.JSON, + "xml" => ApplicationSubtype.XML, + "pdf" => ApplicationSubtype.PDF, + "zip" => ApplicationSubtype.ZIP, + + "x-www-form-urlencoded" => ApplicationSubtype.X_WWW_FORM_URLENCODED, + _ => throw new ArgumentOutOfRangeException(nameof(subType), "Unsupported MIME application subtype.") + }; + + return this; + } + + public ApplicationBuilder UseSubtype(ApplicationSubtype subType) + { + this.subtype = subType; + return this; + } + + #region Implementation of IMIMESubtype + + public MIMEType Build() => new() + { + Type = this, + TextRepresentation = this.subtype switch + { + ApplicationSubtype.EXCEL_OLD => $"{BASE_TYPE}/vnd.ms-excel".ToLowerInvariant(), + ApplicationSubtype.WORD_OLD => $"{BASE_TYPE}/vnd.ms-word".ToLowerInvariant(), + ApplicationSubtype.POWERPOINT_OLD => $"{BASE_TYPE}/vnd.ms-powerpoint".ToLowerInvariant(), + + ApplicationSubtype.EXCEL => $"{BASE_TYPE}/vnd.openxmlformats-officedocument.spreadsheetml.sheet".ToLowerInvariant(), + ApplicationSubtype.WORD => $"{BASE_TYPE}/vnd.openxmlformats-officedocument.wordprocessingml.document".ToLowerInvariant(), + ApplicationSubtype.POWERPOINT => $"{BASE_TYPE}/vnd.openxmlformats-officedocument.presentationml.presentation".ToLowerInvariant(), + + _ => $"{BASE_TYPE}/{this.subtype}".ToLowerInvariant() + } + }; + + #endregion +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/MIME/ApplicationSubtype.cs b/app/MindWork AI Studio/Tools/MIME/ApplicationSubtype.cs new file mode 100644 index 00000000..4224815e --- /dev/null +++ b/app/MindWork AI Studio/Tools/MIME/ApplicationSubtype.cs @@ -0,0 +1,21 @@ +namespace AIStudio.Tools.MIME; + +public enum ApplicationSubtype +{ + OCTET_STREAM, + + JSON, + XML, + PDF, + ZIP, + X_WWW_FORM_URLENCODED, + + WORD_OLD, + WORD, + + EXCEL_OLD, + EXCEL, + + POWERPOINT_OLD, + POWERPOINT, +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/MIME/AudioBuilder.cs b/app/MindWork AI Studio/Tools/MIME/AudioBuilder.cs new file mode 100644 index 00000000..86e371fb --- /dev/null +++ b/app/MindWork AI Studio/Tools/MIME/AudioBuilder.cs @@ -0,0 +1,51 @@ +namespace AIStudio.Tools.MIME; + +public class AudioBuilder : ISubtype +{ + private const BaseType BASE_TYPE = BaseType.AUDIO; + + private AudioBuilder() + { + } + + public static AudioBuilder Create() => new(); + + private AudioSubtype subtype; + + public AudioBuilder UseSubtype(string subType) + { + this.subtype = subType.ToLowerInvariant() switch + { + "mpeg" => AudioSubtype.MPEG, + "wav" => AudioSubtype.WAV, + "ogg" => AudioSubtype.OGG, + "aac" => AudioSubtype.AAC, + "flac" => AudioSubtype.FLAC, + "webm" => AudioSubtype.WEBM, + "mp4" => AudioSubtype.MP4, + "mp3" => AudioSubtype.MP3, + "m4a" => AudioSubtype.M4A, + "aiff" => AudioSubtype.AIFF, + + _ => throw new ArgumentException("Unsupported MIME audio subtype.", nameof(subType)) + }; + + return this; + } + + public AudioBuilder UseSubtype(AudioSubtype subType) + { + this.subtype = subType; + return this; + } + + #region Implementation of IMIMESubtype + + public MIMEType Build() => new() + { + Type = this, + TextRepresentation = $"{BASE_TYPE}/{this.subtype}".ToLowerInvariant() + }; + + #endregion +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/MIME/AudioSubtype.cs b/app/MindWork AI Studio/Tools/MIME/AudioSubtype.cs new file mode 100644 index 00000000..80ccba24 --- /dev/null +++ b/app/MindWork AI Studio/Tools/MIME/AudioSubtype.cs @@ -0,0 +1,16 @@ +namespace AIStudio.Tools.MIME; + +public enum AudioSubtype +{ + WAV, + MP3, + OGG, + AAC, + FLAC, + // ReSharper disable once InconsistentNaming + M4A, + MPEG, + MP4, + WEBM, + AIFF +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/MIME/BaseType.cs b/app/MindWork AI Studio/Tools/MIME/BaseType.cs new file mode 100644 index 00000000..76443f82 --- /dev/null +++ b/app/MindWork AI Studio/Tools/MIME/BaseType.cs @@ -0,0 +1,10 @@ +namespace AIStudio.Tools.MIME; + +public enum BaseType +{ + APPLICATION, + AUDIO, + IMAGE, + VIDEO, + TEXT, +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/MIME/Builder.cs b/app/MindWork AI Studio/Tools/MIME/Builder.cs new file mode 100644 index 00000000..3a45b8db --- /dev/null +++ b/app/MindWork AI Studio/Tools/MIME/Builder.cs @@ -0,0 +1,58 @@ +namespace AIStudio.Tools.MIME; + +public class Builder +{ + private Builder() + { + } + + public static Builder Create() => new(); + + public static MIMEType FromTextRepresentation(string textRepresentation) + { + var parts = textRepresentation.Split('/'); + if (parts.Length != 2) + throw new ArgumentException("Invalid MIME type format.", nameof(textRepresentation)); + + var baseType = parts[0].ToLowerInvariant(); + var subType = parts[1].ToLowerInvariant(); + + var builder = Create(); + + switch (baseType) + { + case "application": + var appBuilder = builder.UseApplication(); + return appBuilder.UseSubtype(subType).Build(); + + case "text": + var textBuilder = builder.UseText(); + return textBuilder.UseSubtype(subType).Build(); + + case "audio": + var audioBuilder = builder.UseAudio(); + return audioBuilder.UseSubtype(subType).Build(); + + case "image": + var imageBuilder = builder.UseImage(); + return imageBuilder.UseSubtype(subType).Build(); + + case "video": + var videoBuilder = builder.UseVideo(); + return videoBuilder.UseSubtype(subType).Build(); + + default: + throw new ArgumentException("Unsupported base type.", nameof(textRepresentation)); + } + } + + public ApplicationBuilder UseApplication() => ApplicationBuilder.Create(); + + public TextBuilder UseText() => TextBuilder.Create(); + + public AudioBuilder UseAudio() => AudioBuilder.Create(); + + public ImageBuilder UseImage() => ImageBuilder.Create(); + + public VideoBuilder UseVideo() => VideoBuilder.Create(); +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/MIME/ISubtype.cs b/app/MindWork AI Studio/Tools/MIME/ISubtype.cs new file mode 100644 index 00000000..517f6a3e --- /dev/null +++ b/app/MindWork AI Studio/Tools/MIME/ISubtype.cs @@ -0,0 +1,6 @@ +namespace AIStudio.Tools.MIME; + +public interface ISubtype +{ + public MIMEType Build(); +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/MIME/ImageBuilder.cs b/app/MindWork AI Studio/Tools/MIME/ImageBuilder.cs new file mode 100644 index 00000000..b59cca4f --- /dev/null +++ b/app/MindWork AI Studio/Tools/MIME/ImageBuilder.cs @@ -0,0 +1,48 @@ +namespace AIStudio.Tools.MIME; + +public class ImageBuilder : ISubtype +{ + private const BaseType BASE_TYPE = BaseType.IMAGE; + + private ImageBuilder() + { + } + + public static ImageBuilder Create() => new(); + + private ImageSubtype subtype; + + public ImageBuilder UseSubtype(string subType) + { + this.subtype = subType.ToLowerInvariant() switch + { + "jpeg" or "jpg" => ImageSubtype.JPEG, + "png" => ImageSubtype.PNG, + "gif" => ImageSubtype.GIF, + "webp" => ImageSubtype.WEBP, + "tiff" or "tif" => ImageSubtype.TIFF, + "svg+xml" or "svg" => ImageSubtype.SVG, + "heic" => ImageSubtype.HEIC, + + _ => throw new ArgumentException("Unsupported MIME image subtype.", nameof(subType)) + }; + + return this; + } + + public ImageBuilder UseSubtype(ImageSubtype subType) + { + this.subtype = subType; + return this; + } + + #region Implementation of IMIMESubtype + + public MIMEType Build() => new() + { + Type = this, + TextRepresentation = $"{BASE_TYPE}/{this.subtype}".ToLowerInvariant() + }; + + #endregion +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/MIME/ImageSubtype.cs b/app/MindWork AI Studio/Tools/MIME/ImageSubtype.cs new file mode 100644 index 00000000..73b11896 --- /dev/null +++ b/app/MindWork AI Studio/Tools/MIME/ImageSubtype.cs @@ -0,0 +1,12 @@ +namespace AIStudio.Tools.MIME; + +public enum ImageSubtype +{ + JPEG, + PNG, + GIF, + TIFF, + WEBP, + SVG, + HEIC, +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/MIME/MIMEType.cs b/app/MindWork AI Studio/Tools/MIME/MIMEType.cs new file mode 100644 index 00000000..adf45e6d --- /dev/null +++ b/app/MindWork AI Studio/Tools/MIME/MIMEType.cs @@ -0,0 +1,16 @@ +namespace AIStudio.Tools.MIME; + +public record MIMEType +{ + public required ISubtype Type { get; init; } + + public required string TextRepresentation { get; init; } + + #region Overrides of Object + + public override string ToString() => this.TextRepresentation; + + #endregion + + public static implicit operator string(MIMEType mimeType) => mimeType.TextRepresentation; +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/MIME/MIMETypeExtensions.cs b/app/MindWork AI Studio/Tools/MIME/MIMETypeExtensions.cs new file mode 100644 index 00000000..cfcd9053 --- /dev/null +++ b/app/MindWork AI Studio/Tools/MIME/MIMETypeExtensions.cs @@ -0,0 +1,15 @@ +namespace AIStudio.Tools.MIME; + +public static class MIMETypeExtensions +{ + public static string[] ToStringArray(this MIMEType[] mimeTypes) + { + var result = new string[mimeTypes.Length]; + for (var i = 0; i < mimeTypes.Length; i++) + { + result[i] = mimeTypes[i]; + } + + return result; + } +} diff --git a/app/MindWork AI Studio/Tools/MIME/TextBuilder.cs b/app/MindWork AI Studio/Tools/MIME/TextBuilder.cs new file mode 100644 index 00000000..c4848dbf --- /dev/null +++ b/app/MindWork AI Studio/Tools/MIME/TextBuilder.cs @@ -0,0 +1,49 @@ +namespace AIStudio.Tools.MIME; + +public class TextBuilder : ISubtype +{ + private const BaseType BASE_TYPE = BaseType.TEXT; + + private TextBuilder() + { + } + + public static TextBuilder Create() => new(); + + private TextSubtype subtype; + + public TextBuilder UseSubtype(string subType) + { + this.subtype = subType.ToLowerInvariant() switch + { + "plain" => TextSubtype.PLAIN, + "html" => TextSubtype.HTML, + "css" => TextSubtype.CSS, + "csv" => TextSubtype.CSV, + "javascript" => TextSubtype.JAVASCRIPT, + "xml" => TextSubtype.XML, + "markdown" => TextSubtype.MARKDOWN, + "json" => TextSubtype.JSON, + + _ => throw new ArgumentException("Unsupported MIME text subtype.", nameof(subType)) + }; + + return this; + } + + public TextBuilder UseSubtype(TextSubtype subType) + { + this.subtype = subType; + return this; + } + + #region Implementation of IMIMESubtype + + public MIMEType Build() => new() + { + Type = this, + TextRepresentation = $"{BASE_TYPE}/{this.subtype}".ToLowerInvariant() + }; + + #endregion +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/MIME/TextSubtype.cs b/app/MindWork AI Studio/Tools/MIME/TextSubtype.cs new file mode 100644 index 00000000..c3d34829 --- /dev/null +++ b/app/MindWork AI Studio/Tools/MIME/TextSubtype.cs @@ -0,0 +1,13 @@ +namespace AIStudio.Tools.MIME; + +public enum TextSubtype +{ + PLAIN, + HTML, + CSS, + CSV, + JAVASCRIPT, + XML, + JSON, + MARKDOWN, +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/MIME/VideoBuilder.cs b/app/MindWork AI Studio/Tools/MIME/VideoBuilder.cs new file mode 100644 index 00000000..6d23d8b3 --- /dev/null +++ b/app/MindWork AI Studio/Tools/MIME/VideoBuilder.cs @@ -0,0 +1,46 @@ +namespace AIStudio.Tools.MIME; + +public class VideoBuilder : ISubtype +{ + private const BaseType BASE_TYPE = BaseType.VIDEO; + + private VideoBuilder() + { + } + + public static VideoBuilder Create() => new(); + + private VideoSubtype subtype; + + public VideoBuilder UseSubtype(string subType) + { + this.subtype = subType.ToLowerInvariant() switch + { + "mp4" => VideoSubtype.MP4, + "webm" => VideoSubtype.WEBM, + "avi" => VideoSubtype.AVI, + "mov" => VideoSubtype.MOV, + "mkv" => VideoSubtype.MKV, + + _ => throw new ArgumentException("Unsupported MIME video subtype.", nameof(subType)) + }; + + return this; + } + + public VideoBuilder UseSubtype(VideoSubtype subType) + { + this.subtype = subType; + return this; + } + + #region Implementation of IMIMESubtype + + public MIMEType Build() => new() + { + Type = this, + TextRepresentation = $"{BASE_TYPE}/{this.subtype}".ToLowerInvariant() + }; + + #endregion +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/MIME/VideoSubtype.cs b/app/MindWork AI Studio/Tools/MIME/VideoSubtype.cs new file mode 100644 index 00000000..cf152b1b --- /dev/null +++ b/app/MindWork AI Studio/Tools/MIME/VideoSubtype.cs @@ -0,0 +1,11 @@ +namespace AIStudio.Tools.MIME; + +public enum VideoSubtype +{ + MP4, + AVI, + MOV, + MKV, + WEBM, + MPEG, +} \ No newline at end of file diff --git a/app/MindWork AI Studio/wwwroot/app.js b/app/MindWork AI Studio/wwwroot/app.js index aa6b8e2b..2dd43e5c 100644 --- a/app/MindWork AI Studio/wwwroot/app.js +++ b/app/MindWork AI Studio/wwwroot/app.js @@ -25,4 +25,133 @@ window.clearDiv = function (divName) { window.scrollToBottom = function(element) { element.scrollIntoView({ behavior: 'smooth', block: 'end', inline: 'nearest' }); -} \ No newline at end of file +} + +let mediaRecorder; +let actualRecordingMimeType; +let changedMimeType = false; +let pendingChunkUploads = 0; + +window.audioRecorder = { + playSound: function(soundPath) { + try { + const audio = new Audio(soundPath); + audio.play().catch(error => { + console.warn('Failed to play sound effect:', error); + }); + } catch (error) { + console.warn('Error creating audio element:', error); + } + }, + + start: async function (dotnetRef, desiredMimeTypes = []) { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + + // Play start recording sound effect: + this.playSound('/sounds/start_recording.ogg'); + + // When only one mime type is provided as a string, convert it to an array: + if (typeof desiredMimeTypes === 'string') { + desiredMimeTypes = [desiredMimeTypes]; + } + + // Log sent mime types for debugging: + console.log('Audio recording - requested mime types: ', desiredMimeTypes); + + let mimeTypes = desiredMimeTypes.filter(type => typeof type === 'string' && type.trim() !== ''); + + // Next, we have to ensure that we have some default mime types to check as well. + // In case the provided list does not contain these, we append them: + // Use provided mime types or fallback to a default list: + const defaultMimeTypes = [ + 'audio/webm', + 'audio/ogg', + 'audio/mp4', + 'audio/mpeg', + ''// Fallback to browser default + ]; + + defaultMimeTypes.forEach(type => { + if (!mimeTypes.includes(type)) { + mimeTypes.push(type); + } + }); + + console.log('Audio recording - final mime types to check (included defaults): ', mimeTypes); + + // Find the first supported mime type: + actualRecordingMimeType = mimeTypes.find(type => + type === '' || MediaRecorder.isTypeSupported(type) + ) || ''; + + console.log('Audio recording - the browser selected the following mime type for recording: ', actualRecordingMimeType); + const options = actualRecordingMimeType ? { mimeType: actualRecordingMimeType } : {}; + mediaRecorder = new MediaRecorder(stream, options); + + // In case the browser changed the mime type: + actualRecordingMimeType = mediaRecorder.mimeType; + console.log('Audio recording - actual mime type used by the browser: ', actualRecordingMimeType); + + // Check the list of desired mime types against the actual one: + if (!desiredMimeTypes.includes(actualRecordingMimeType)) { + changedMimeType = true; + console.warn(`Audio recording - requested mime types ('${desiredMimeTypes.join(', ')}') do not include the actual mime type used by the browser ('${actualRecordingMimeType}').`); + } else { + changedMimeType = false; + } + + // Reset the pending uploads counter: + pendingChunkUploads = 0; + + // Stream each chunk directly to .NET as it becomes available: + mediaRecorder.ondataavailable = async (event) => { + if (event.data.size > 0) { + pendingChunkUploads++; + try { + const arrayBuffer = await event.data.arrayBuffer(); + const uint8Array = new Uint8Array(arrayBuffer); + await dotnetRef.invokeMethodAsync('OnAudioChunkReceived', uint8Array); + } catch (error) { + console.error('Error sending audio chunk to .NET:', error); + } finally { + pendingChunkUploads--; + } + } + }; + + mediaRecorder.start(3000); // read the recorded data in 3-second chunks + return actualRecordingMimeType; + }, + + stop: async function () { + return new Promise((resolve) => { + + // Add an event listener to handle the stop event: + mediaRecorder.onstop = async () => { + + // Wait for all pending chunk uploads to complete before finalizing: + console.log(`Audio recording - waiting for ${pendingChunkUploads} pending uploads.`); + while (pendingChunkUploads > 0) { + await new Promise(r => setTimeout(r, 10)); // wait 10 ms before checking again + } + + console.log('Audio recording - all chunks uploaded, finalizing.'); + + // Play stop recording sound effect: + window.audioRecorder.playSound('/sounds/stop_recording.ogg'); + + // Stop all tracks to release the microphone: + mediaRecorder.stream.getTracks().forEach(track => track.stop()); + + // No need to process data here anymore, just signal completion: + resolve({ + mimeType: actualRecordingMimeType, + changedMimeType: changedMimeType, + }); + }; + + // Finally, stop the recording (which will actually trigger the onstop event): + mediaRecorder.stop(); + }); + } +}; \ No newline at end of file diff --git a/app/MindWork AI Studio/wwwroot/changelog/v26.1.1.md b/app/MindWork AI Studio/wwwroot/changelog/v26.1.1.md index ec35c4ec..a1b23c7d 100644 --- a/app/MindWork AI Studio/wwwroot/changelog/v26.1.1.md +++ b/app/MindWork AI Studio/wwwroot/changelog/v26.1.1.md @@ -1,4 +1,5 @@ # v26.1.1, build 231 (2026-01-xx xx:xx UTC) - Added the option to attach files, including images, to chat templates. You can also define templates with file attachments through a configuration plugin. These file attachments aren’t copied—they’re re-read every time. That means the AI will pick up any updates you make to those files. - Added the option to use source code files in chats and document analysis. This supports software development workflows. +- Added a preview feature that lets you record your own voice in preparation for the transcription feature. The feature remains in development and appears only when the preview feature is enabled. - Improved the app versioning. Starting in 2026, each version number includes the year, followed by the month. The last digit shows the release number for that month. For example, version `26.1.1` is the first release in January 2026. \ No newline at end of file diff --git a/app/MindWork AI Studio/wwwroot/sounds/start_recording.ogg b/app/MindWork AI Studio/wwwroot/sounds/start_recording.ogg new file mode 100644 index 00000000..b67bb65e Binary files /dev/null and b/app/MindWork AI Studio/wwwroot/sounds/start_recording.ogg differ diff --git a/app/MindWork AI Studio/wwwroot/sounds/stop_recording.ogg b/app/MindWork AI Studio/wwwroot/sounds/stop_recording.ogg new file mode 100644 index 00000000..c2332408 Binary files /dev/null and b/app/MindWork AI Studio/wwwroot/sounds/stop_recording.ogg differ diff --git a/app/MindWork AI Studio/wwwroot/sounds/transcription_done.ogg b/app/MindWork AI Studio/wwwroot/sounds/transcription_done.ogg new file mode 100644 index 00000000..398acccc Binary files /dev/null and b/app/MindWork AI Studio/wwwroot/sounds/transcription_done.ogg differ diff --git a/runtime/Info.plist b/runtime/Info.plist new file mode 100644 index 00000000..61967ac4 --- /dev/null +++ b/runtime/Info.plist @@ -0,0 +1,8 @@ + + + + + NSMicrophoneUsageDescription + Request microphone access for voice recording + +