WIP Voice recording

This commit is contained in:
Thorsten Sommer 2026-01-06 10:25:58 +01:00
parent 164a4fb7eb
commit 0db4d61434
Signed by: tsommer
GPG Key ID: 371BBA77A02C0108
4 changed files with 188 additions and 19 deletions

View File

@ -12,22 +12,54 @@ public static class AudioRecorderHandler
.DisableAntiforgery();
}
private static async Task<IResult> UploadAudio(IFormFile audio, RustService rustService)
private static async Task<IResult> UploadAudio(HttpRequest request, RustService rustService)
{
if (audio.Length == 0)
return Results.BadRequest();
var form = await request.ReadFormAsync();
var file = form.Files.GetFile("audio");
var mimeType = form["mimeType"].ToString();
if (file is null || file.Length == 0)
return Results.BadRequest("No audio file uploaded.");
var actualMimeType = !string.IsNullOrWhiteSpace(mimeType)
? mimeType
: file.ContentType;
var extension = GetFileExtension(actualMimeType);
var dataDirectory = await rustService.GetDataDirectory();
var recordingDirectory = Path.Combine(dataDirectory, "audioRecordings");
if(!Path.Exists(recordingDirectory))
Directory.CreateDirectory(recordingDirectory);
var fileName = $"recording_{DateTime.UtcNow:yyyyMMdd_HHmmss}.webm";
var fileName = $"recording_{DateTime.UtcNow:yyyyMMdd_HHmmss}{extension}";
var filePath = Path.Combine(recordingDirectory, fileName);
await using var stream = File.Create(filePath);
await audio.CopyToAsync(stream);
await file.CopyToAsync(stream);
return Results.Ok(new { FileName = fileName });
return Results.Ok(new
{
FileName = fileName,
MimeType = actualMimeType,
Size = file.Length
});
}
static string GetFileExtension(string mimeType)
{
var baseMimeType = mimeType.Split(';')[0].Trim().ToLowerInvariant();
return baseMimeType switch
{
"audio/webm" => ".webm",
"audio/ogg" => ".ogg",
"audio/mp4" => ".m4a",
"audio/mpeg" => ".mp3",
"audio/wav" => ".wav",
"audio/x-wav" => ".wav",
"audio/aac" => ".aac",
_ => ".audio"
};
}
}

View File

@ -1,6 +1,7 @@
using AIStudio.Dialogs;
using AIStudio.Settings;
using AIStudio.Settings.DataModel;
using AIStudio.Tools.MIME;
using AIStudio.Tools.PluginSystem;
using AIStudio.Tools.Rust;
using AIStudio.Tools.Services;
@ -353,28 +354,92 @@ public partial class MainLayout : LayoutComponentBase, IMessageBusReceiver, ILan
{
if (toggled)
{
await this.JsRuntime.InvokeVoidAsync("audioRecorder.start");
var mimeTypes = GetPreferredMimeTypes(
Builder.Create().UseAudio().UseSubtype(AudioSubtype.OGG).Build(),
Builder.Create().UseAudio().UseSubtype(AudioSubtype.AAC).Build(),
Builder.Create().UseAudio().UseSubtype(AudioSubtype.MP3).Build(),
Builder.Create().UseAudio().UseSubtype(AudioSubtype.AIFF).Build(),
Builder.Create().UseAudio().UseSubtype(AudioSubtype.WAV).Build(),
Builder.Create().UseAudio().UseSubtype(AudioSubtype.FLAC).Build()
);
this.Logger.LogInformation($"Starting audio recording with preferred MIME types: {string.Join<MIMEType>(", ", mimeTypes)}");
// var array = mimeTypes.ToStringArray().Cast<object?>().ToArray();
var mimeTypeStrings = mimeTypes.ToStringArray();
await this.JsRuntime.InvokeVoidAsync("audioRecorder.start", (object)mimeTypeStrings);
this.isRecording = true;
}
else
{
var base64Audio = await this.JsRuntime.InvokeAsync<string>("audioRecorder.stop");
var result = await this.JsRuntime.InvokeAsync<AudioRecordingResult>("audioRecorder.stop");
if(result.ChangedMimeType)
this.Logger.LogWarning($"The recorded audio MIME type was changed to '{result.MimeType}'.");
this.isRecording = false;
this.StateHasChanged();
await this.SendAudioToBackend(base64Audio);
await this.SendAudioToBackend(result);
}
}
private async Task SendAudioToBackend(string base64Audio)
private static MIMEType[] GetPreferredMimeTypes(params MIMEType[] mimeTypes)
{
var audioBytes = Convert.FromBase64String(base64Audio);
// Default list if no parameters provided:
if (mimeTypes.Length is 0)
{
var audioBuilder = Builder.Create().UseAudio();
return
[
audioBuilder.UseSubtype(AudioSubtype.WEBM).Build(),
audioBuilder.UseSubtype(AudioSubtype.OGG).Build(),
audioBuilder.UseSubtype(AudioSubtype.MP4).Build(),
audioBuilder.UseSubtype(AudioSubtype.MPEG).Build(),
];
}
return mimeTypes;
}
private async Task SendAudioToBackend(AudioRecordingResult recording)
{
#warning No need to send the recording to the backend (Blazor Hybrid)
var audioBytes = Convert.FromBase64String(recording.Data);
using var content = new MultipartFormDataContent();
content.Add(new ByteArrayContent(audioBytes), "audio", "recording.webm");
var fileContent = new ByteArrayContent(audioBytes);
fileContent.Headers.ContentType = new System.Net.Http.Headers.MediaTypeHeaderValue(recording.MimeType);
var extension = GetFileExtension(recording.MimeType);
content.Add(fileContent, "audio", $"recording{extension}");
content.Add(new StringContent(recording.MimeType), "mimeType");
await this.HttpClient.PostAsync("/audio/upload", content);
}
private static string GetFileExtension(string mimeType)
{
// Codec-Parameter entfernen für Matching
var baseMimeType = mimeType.Split(';')[0].Trim().ToLowerInvariant();
return baseMimeType switch
{
"audio/webm" => ".webm",
"audio/ogg" => ".ogg",
"audio/mp4" => ".m4a",
"audio/mpeg" => ".mp3",
"audio/wav" => ".wav",
"audio/x-wav" => ".wav",
_ => ".audio" // Fallback
};
}
private sealed class AudioRecordingResult
{
public string Data { get; set; } = string.Empty;
public string MimeType { get; set; } = string.Empty;
public bool ChangedMimeType { get; set; }
}
#region Implementation of IDisposable

View File

@ -0,0 +1,15 @@
namespace AIStudio.Tools.MIME;
public static class MIMETypeExtensions
{
public static string[] ToStringArray(this MIMEType[] mimeTypes)
{
var result = new string[mimeTypes.Length];
for (var i = 0; i < mimeTypes.Length; i++)
{
result[i] = mimeTypes[i];
}
return result;
}
}

View File

@ -29,11 +29,64 @@ window.scrollToBottom = function(element) {
let mediaRecorder;
let audioChunks = [];
let actualRecordingMimeType;
let changedMimeType = false;
window.audioRecorder = {
start: async function () {
start: async function (desiredMimeTypes = []) {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' });
// When only one mime type is provided as a string, convert it to an array:
if (typeof desiredMimeTypes === 'string') {
desiredMimeTypes = [desiredMimeTypes];
}
// Log sent mime types for debugging:
console.log('Requested mime types:', desiredMimeTypes);
let mimeTypes = desiredMimeTypes.filter(type => typeof type === 'string' && type.trim() !== '');
// Next, we have to ensure that we have some default mime types to check as well.
// In case the provided list does not contain these, we append them:
// Use provided mime types or fallback to a default list:
const defaultMimeTypes = [
'audio/webm',
'audio/ogg',
'audio/mp4',
'audio/mpeg',
''// Fallback to browser default
];
defaultMimeTypes.forEach(type => {
if (!mimeTypes.includes(type)) {
mimeTypes.push(type);
}
});
console.log('Final mime types to check (included defaults):', mimeTypes);
// Find the first supported mime type:
actualRecordingMimeType = mimeTypes.find(type =>
type === '' || MediaRecorder.isTypeSupported(type)
) || '';
console.log('Selected mime type for recording:', actualRecordingMimeType);
const options = actualRecordingMimeType ? { mimeType: actualRecordingMimeType } : {};
mediaRecorder = new MediaRecorder(stream, options);
// In case the browser changed the mime type:
actualRecordingMimeType = mediaRecorder.mimeType;
// Check the list of desired mime types against the actual one:
if (!desiredMimeTypes.includes(actualRecordingMimeType)) {
changedMimeType = true;
console.warn(`Requested mime types ('${desiredMimeTypes.join(', ')}') do not include the actual mime type used by MediaRecorder ('${actualRecordingMimeType}').`);
} else {
changedMimeType = false;
}
console.log('Actual mime type used by MediaRecorder:', actualRecordingMimeType);
audioChunks = [];
mediaRecorder.ondataavailable = (event) => {
@ -43,20 +96,24 @@ window.audioRecorder = {
};
mediaRecorder.start();
return actualRecordingMimeType;
},
stop: async function () {
return new Promise((resolve) => {
mediaRecorder.onstop = async () => {
const blob = new Blob(audioChunks, { type: 'audio/webm' });
const blob = new Blob(audioChunks, { type: actualRecordingMimeType });
const arrayBuffer = await blob.arrayBuffer();
const base64 = btoa(
new Uint8Array(arrayBuffer).reduce((data, byte) => data + String.fromCharCode(byte), '')
);
// Tracks stoppen, damit das Mic-Icon verschwindet
mediaRecorder.stream.getTracks().forEach(track => track.stop());
resolve(base64);
resolve({
data: base64,
mimeType: actualRecordingMimeType,
changedMimeType: changedMimeType,
});
};
mediaRecorder.stop();
});