From 0db4d614345fbe77f59194f4d3094c0e3a31e6c7 Mon Sep 17 00:00:00 2001 From: Thorsten Sommer Date: Tue, 6 Jan 2026 10:25:58 +0100 Subject: [PATCH] WIP Voice recording --- .../AudioRecorderHandler.cs | 44 ++++++++-- .../Layout/MainLayout.razor.cs | 81 +++++++++++++++++-- .../Tools/MIME/MIMETypeExtensions.cs | 15 ++++ app/MindWork AI Studio/wwwroot/app.js | 67 +++++++++++++-- 4 files changed, 188 insertions(+), 19 deletions(-) create mode 100644 app/MindWork AI Studio/Tools/MIME/MIMETypeExtensions.cs diff --git a/app/MindWork AI Studio/AudioRecorderHandler.cs b/app/MindWork AI Studio/AudioRecorderHandler.cs index 2ae62501..b11adc42 100644 --- a/app/MindWork AI Studio/AudioRecorderHandler.cs +++ b/app/MindWork AI Studio/AudioRecorderHandler.cs @@ -12,22 +12,54 @@ public static class AudioRecorderHandler .DisableAntiforgery(); } - private static async Task UploadAudio(IFormFile audio, RustService rustService) + private static async Task UploadAudio(HttpRequest request, RustService rustService) { - if (audio.Length == 0) - return Results.BadRequest(); + var form = await request.ReadFormAsync(); + var file = form.Files.GetFile("audio"); + var mimeType = form["mimeType"].ToString(); + + if (file is null || file.Length == 0) + return Results.BadRequest("No audio file uploaded."); + var actualMimeType = !string.IsNullOrWhiteSpace(mimeType) + ? mimeType + : file.ContentType; + + var extension = GetFileExtension(actualMimeType); + var dataDirectory = await rustService.GetDataDirectory(); var recordingDirectory = Path.Combine(dataDirectory, "audioRecordings"); if(!Path.Exists(recordingDirectory)) Directory.CreateDirectory(recordingDirectory); - var fileName = $"recording_{DateTime.UtcNow:yyyyMMdd_HHmmss}.webm"; + var fileName = $"recording_{DateTime.UtcNow:yyyyMMdd_HHmmss}{extension}"; var filePath = Path.Combine(recordingDirectory, fileName); await using var stream = File.Create(filePath); - await audio.CopyToAsync(stream); + await file.CopyToAsync(stream); - return Results.Ok(new { FileName = fileName }); + return Results.Ok(new + { + FileName = fileName, + MimeType = actualMimeType, + Size = file.Length + }); + } + + static string GetFileExtension(string mimeType) + { + var baseMimeType = mimeType.Split(';')[0].Trim().ToLowerInvariant(); + + return baseMimeType switch + { + "audio/webm" => ".webm", + "audio/ogg" => ".ogg", + "audio/mp4" => ".m4a", + "audio/mpeg" => ".mp3", + "audio/wav" => ".wav", + "audio/x-wav" => ".wav", + "audio/aac" => ".aac", + _ => ".audio" + }; } } \ No newline at end of file diff --git a/app/MindWork AI Studio/Layout/MainLayout.razor.cs b/app/MindWork AI Studio/Layout/MainLayout.razor.cs index a578c790..af9a81ed 100644 --- a/app/MindWork AI Studio/Layout/MainLayout.razor.cs +++ b/app/MindWork AI Studio/Layout/MainLayout.razor.cs @@ -1,6 +1,7 @@ using AIStudio.Dialogs; using AIStudio.Settings; using AIStudio.Settings.DataModel; +using AIStudio.Tools.MIME; using AIStudio.Tools.PluginSystem; using AIStudio.Tools.Rust; using AIStudio.Tools.Services; @@ -353,28 +354,92 @@ public partial class MainLayout : LayoutComponentBase, IMessageBusReceiver, ILan { if (toggled) { - await this.JsRuntime.InvokeVoidAsync("audioRecorder.start"); + var mimeTypes = GetPreferredMimeTypes( + Builder.Create().UseAudio().UseSubtype(AudioSubtype.OGG).Build(), + Builder.Create().UseAudio().UseSubtype(AudioSubtype.AAC).Build(), + Builder.Create().UseAudio().UseSubtype(AudioSubtype.MP3).Build(), + Builder.Create().UseAudio().UseSubtype(AudioSubtype.AIFF).Build(), + Builder.Create().UseAudio().UseSubtype(AudioSubtype.WAV).Build(), + Builder.Create().UseAudio().UseSubtype(AudioSubtype.FLAC).Build() + ); + + this.Logger.LogInformation($"Starting audio recording with preferred MIME types: {string.Join(", ", mimeTypes)}"); + // var array = mimeTypes.ToStringArray().Cast().ToArray(); + + var mimeTypeStrings = mimeTypes.ToStringArray(); + await this.JsRuntime.InvokeVoidAsync("audioRecorder.start", (object)mimeTypeStrings); this.isRecording = true; } else { - var base64Audio = await this.JsRuntime.InvokeAsync("audioRecorder.stop"); + var result = await this.JsRuntime.InvokeAsync("audioRecorder.stop"); + if(result.ChangedMimeType) + this.Logger.LogWarning($"The recorded audio MIME type was changed to '{result.MimeType}'."); + this.isRecording = false; this.StateHasChanged(); - - await this.SendAudioToBackend(base64Audio); + + await this.SendAudioToBackend(result); } } - private async Task SendAudioToBackend(string base64Audio) + private static MIMEType[] GetPreferredMimeTypes(params MIMEType[] mimeTypes) { - var audioBytes = Convert.FromBase64String(base64Audio); + // Default list if no parameters provided: + if (mimeTypes.Length is 0) + { + var audioBuilder = Builder.Create().UseAudio(); + return + [ + audioBuilder.UseSubtype(AudioSubtype.WEBM).Build(), + audioBuilder.UseSubtype(AudioSubtype.OGG).Build(), + audioBuilder.UseSubtype(AudioSubtype.MP4).Build(), + audioBuilder.UseSubtype(AudioSubtype.MPEG).Build(), + ]; + } + + return mimeTypes; + } + + private async Task SendAudioToBackend(AudioRecordingResult recording) + { + #warning No need to send the recording to the backend (Blazor Hybrid) + var audioBytes = Convert.FromBase64String(recording.Data); using var content = new MultipartFormDataContent(); - content.Add(new ByteArrayContent(audioBytes), "audio", "recording.webm"); - + var fileContent = new ByteArrayContent(audioBytes); + fileContent.Headers.ContentType = new System.Net.Http.Headers.MediaTypeHeaderValue(recording.MimeType); + + var extension = GetFileExtension(recording.MimeType); + content.Add(fileContent, "audio", $"recording{extension}"); + content.Add(new StringContent(recording.MimeType), "mimeType"); + await this.HttpClient.PostAsync("/audio/upload", content); } + + private static string GetFileExtension(string mimeType) + { + // Codec-Parameter entfernen für Matching + var baseMimeType = mimeType.Split(';')[0].Trim().ToLowerInvariant(); + + return baseMimeType switch + { + "audio/webm" => ".webm", + "audio/ogg" => ".ogg", + "audio/mp4" => ".m4a", + "audio/mpeg" => ".mp3", + "audio/wav" => ".wav", + "audio/x-wav" => ".wav", + _ => ".audio" // Fallback + }; + } + + private sealed class AudioRecordingResult + { + public string Data { get; set; } = string.Empty; + public string MimeType { get; set; } = string.Empty; + public bool ChangedMimeType { get; set; } + } #region Implementation of IDisposable diff --git a/app/MindWork AI Studio/Tools/MIME/MIMETypeExtensions.cs b/app/MindWork AI Studio/Tools/MIME/MIMETypeExtensions.cs new file mode 100644 index 00000000..cfcd9053 --- /dev/null +++ b/app/MindWork AI Studio/Tools/MIME/MIMETypeExtensions.cs @@ -0,0 +1,15 @@ +namespace AIStudio.Tools.MIME; + +public static class MIMETypeExtensions +{ + public static string[] ToStringArray(this MIMEType[] mimeTypes) + { + var result = new string[mimeTypes.Length]; + for (var i = 0; i < mimeTypes.Length; i++) + { + result[i] = mimeTypes[i]; + } + + return result; + } +} diff --git a/app/MindWork AI Studio/wwwroot/app.js b/app/MindWork AI Studio/wwwroot/app.js index ada58411..03055bc8 100644 --- a/app/MindWork AI Studio/wwwroot/app.js +++ b/app/MindWork AI Studio/wwwroot/app.js @@ -29,11 +29,64 @@ window.scrollToBottom = function(element) { let mediaRecorder; let audioChunks = []; +let actualRecordingMimeType; +let changedMimeType = false; window.audioRecorder = { - start: async function () { + start: async function (desiredMimeTypes = []) { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' }); + + // When only one mime type is provided as a string, convert it to an array: + if (typeof desiredMimeTypes === 'string') { + desiredMimeTypes = [desiredMimeTypes]; + } + + // Log sent mime types for debugging: + console.log('Requested mime types:', desiredMimeTypes); + + let mimeTypes = desiredMimeTypes.filter(type => typeof type === 'string' && type.trim() !== ''); + + // Next, we have to ensure that we have some default mime types to check as well. + // In case the provided list does not contain these, we append them: + // Use provided mime types or fallback to a default list: + const defaultMimeTypes = [ + 'audio/webm', + 'audio/ogg', + 'audio/mp4', + 'audio/mpeg', + ''// Fallback to browser default + ]; + + defaultMimeTypes.forEach(type => { + if (!mimeTypes.includes(type)) { + mimeTypes.push(type); + } + }); + + console.log('Final mime types to check (included defaults):', mimeTypes); + + // Find the first supported mime type: + actualRecordingMimeType = mimeTypes.find(type => + type === '' || MediaRecorder.isTypeSupported(type) + ) || ''; + + console.log('Selected mime type for recording:', actualRecordingMimeType); + const options = actualRecordingMimeType ? { mimeType: actualRecordingMimeType } : {}; + mediaRecorder = new MediaRecorder(stream, options); + + // In case the browser changed the mime type: + actualRecordingMimeType = mediaRecorder.mimeType; + + // Check the list of desired mime types against the actual one: + if (!desiredMimeTypes.includes(actualRecordingMimeType)) { + changedMimeType = true; + console.warn(`Requested mime types ('${desiredMimeTypes.join(', ')}') do not include the actual mime type used by MediaRecorder ('${actualRecordingMimeType}').`); + } else { + changedMimeType = false; + } + + console.log('Actual mime type used by MediaRecorder:', actualRecordingMimeType); + audioChunks = []; mediaRecorder.ondataavailable = (event) => { @@ -43,20 +96,24 @@ window.audioRecorder = { }; mediaRecorder.start(); + return actualRecordingMimeType; }, stop: async function () { return new Promise((resolve) => { mediaRecorder.onstop = async () => { - const blob = new Blob(audioChunks, { type: 'audio/webm' }); + const blob = new Blob(audioChunks, { type: actualRecordingMimeType }); const arrayBuffer = await blob.arrayBuffer(); const base64 = btoa( new Uint8Array(arrayBuffer).reduce((data, byte) => data + String.fromCharCode(byte), '') ); - // Tracks stoppen, damit das Mic-Icon verschwindet mediaRecorder.stream.getTracks().forEach(track => track.stop()); - resolve(base64); + resolve({ + data: base64, + mimeType: actualRecordingMimeType, + changedMimeType: changedMimeType, + }); }; mediaRecorder.stop(); });