Fixed audio handling (#631)
Some checks are pending
Build and Release / Read metadata (push) Waiting to run
Build and Release / Build app (${{ matrix.dotnet_runtime }}) (-aarch64-apple-darwin, osx-arm64, macos-latest, aarch64-apple-darwin, dmg updater) (push) Blocked by required conditions
Build and Release / Build app (${{ matrix.dotnet_runtime }}) (-aarch64-pc-windows-msvc.exe, win-arm64, windows-latest, aarch64-pc-windows-msvc, nsis updater) (push) Blocked by required conditions
Build and Release / Build app (${{ matrix.dotnet_runtime }}) (-aarch64-unknown-linux-gnu, linux-arm64, ubuntu-22.04-arm, aarch64-unknown-linux-gnu, appimage deb updater) (push) Blocked by required conditions
Build and Release / Build app (${{ matrix.dotnet_runtime }}) (-x86_64-apple-darwin, osx-x64, macos-latest, x86_64-apple-darwin, dmg updater) (push) Blocked by required conditions
Build and Release / Build app (${{ matrix.dotnet_runtime }}) (-x86_64-pc-windows-msvc.exe, win-x64, windows-latest, x86_64-pc-windows-msvc, nsis updater) (push) Blocked by required conditions
Build and Release / Build app (${{ matrix.dotnet_runtime }}) (-x86_64-unknown-linux-gnu, linux-x64, ubuntu-22.04, x86_64-unknown-linux-gnu, appimage deb updater) (push) Blocked by required conditions
Build and Release / Prepare & create release (push) Blocked by required conditions
Build and Release / Publish release (push) Blocked by required conditions

This commit is contained in:
Thorsten Sommer 2026-01-18 16:01:02 +01:00 committed by GitHub
parent 5f18cbe2bf
commit 0a504276d9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 340 additions and 131 deletions

View File

@ -27,6 +27,7 @@
<script src="system/MudBlazor.Markdown/MudBlazor.Markdown.min.js"></script>
<script src="system/CodeBeam.MudBlazor.Extensions/MudExtensions.min.js"></script>
<script src="app.js"></script>
<script src="audio.js"></script>
</body>
</html>

View File

@ -20,6 +20,18 @@ public partial class VoiceRecorder : MSGComponentBase
[Inject]
private ISnackbar Snackbar { get; init; } = null!;
#region Overrides of MSGComponentBase
protected override async Task OnInitializedAsync()
{
await base.OnInitializedAsync();
// Initialize sound effects. This "warms up" the AudioContext and preloads all sounds for reliable playback:
await this.JsRuntime.InvokeVoidAsync("initSoundEffects");
}
#endregion
private uint numReceivedChunks;
private bool isRecording;
private bool isTranscribing;
@ -39,6 +51,9 @@ public partial class VoiceRecorder : MSGComponentBase
{
if (toggled)
{
// Warm up sound effects:
await this.JsRuntime.InvokeVoidAsync("initSoundEffects");
var mimeTypes = GetPreferredMimeTypes(
Builder.Create().UseAudio().UseSubtype(AudioSubtype.OGG).Build(),
Builder.Create().UseAudio().UseSubtype(AudioSubtype.AAC).Build(),
@ -189,7 +204,11 @@ public partial class VoiceRecorder : MSGComponentBase
private async Task TranscribeRecordingAsync()
{
if (this.finalRecordingPath is null)
{
// No recording to transcribe, but still release the microphone:
await this.ReleaseMicrophoneAsync();
return;
}
this.isTranscribing = true;
this.StateHasChanged();
@ -288,12 +307,22 @@ public partial class VoiceRecorder : MSGComponentBase
}
finally
{
await this.ReleaseMicrophoneAsync();
this.finalRecordingPath = null;
this.isTranscribing = false;
this.StateHasChanged();
}
}
private async Task ReleaseMicrophoneAsync()
{
// Wait a moment for any queued sounds to finish playing, then release the microphone.
// This allows Bluetooth headsets to switch back to A2DP profile without interrupting audio:
await Task.Delay(1_800);
await this.JsRuntime.InvokeVoidAsync("audioRecorder.releaseMicrophone");
}
private sealed class AudioRecordingResult
{
public string MimeType { get; init; } = string.Empty;

View File

@ -26,132 +26,3 @@ window.clearDiv = function (divName) {
window.scrollToBottom = function(element) {
element.scrollIntoView({ behavior: 'smooth', block: 'end', inline: 'nearest' });
}
window.playSound = function(soundPath) {
try {
const audio = new Audio(soundPath);
audio.play().catch(error => {
console.warn('Failed to play sound effect:', error);
});
} catch (error) {
console.warn('Error creating audio element:', error);
}
};
let mediaRecorder;
let actualRecordingMimeType;
let changedMimeType = false;
let pendingChunkUploads = 0;
window.audioRecorder = {
start: async function (dotnetRef, desiredMimeTypes = []) {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
// Play start recording sound effect:
window.playSound('/sounds/start_recording.ogg');
// When only one mime type is provided as a string, convert it to an array:
if (typeof desiredMimeTypes === 'string') {
desiredMimeTypes = [desiredMimeTypes];
}
// Log sent mime types for debugging:
console.log('Audio recording - requested mime types: ', desiredMimeTypes);
let mimeTypes = desiredMimeTypes.filter(type => typeof type === 'string' && type.trim() !== '');
// Next, we have to ensure that we have some default mime types to check as well.
// In case the provided list does not contain these, we append them:
// Use provided mime types or fallback to a default list:
const defaultMimeTypes = [
'audio/webm',
'audio/ogg',
'audio/mp4',
'audio/mpeg',
''// Fallback to browser default
];
defaultMimeTypes.forEach(type => {
if (!mimeTypes.includes(type)) {
mimeTypes.push(type);
}
});
console.log('Audio recording - final mime types to check (included defaults): ', mimeTypes);
// Find the first supported mime type:
actualRecordingMimeType = mimeTypes.find(type =>
type === '' || MediaRecorder.isTypeSupported(type)
) || '';
console.log('Audio recording - the browser selected the following mime type for recording: ', actualRecordingMimeType);
const options = actualRecordingMimeType ? { mimeType: actualRecordingMimeType } : {};
mediaRecorder = new MediaRecorder(stream, options);
// In case the browser changed the mime type:
actualRecordingMimeType = mediaRecorder.mimeType;
console.log('Audio recording - actual mime type used by the browser: ', actualRecordingMimeType);
// Check the list of desired mime types against the actual one:
if (!desiredMimeTypes.includes(actualRecordingMimeType)) {
changedMimeType = true;
console.warn(`Audio recording - requested mime types ('${desiredMimeTypes.join(', ')}') do not include the actual mime type used by the browser ('${actualRecordingMimeType}').`);
} else {
changedMimeType = false;
}
// Reset the pending uploads counter:
pendingChunkUploads = 0;
// Stream each chunk directly to .NET as it becomes available:
mediaRecorder.ondataavailable = async (event) => {
if (event.data.size > 0) {
pendingChunkUploads++;
try {
const arrayBuffer = await event.data.arrayBuffer();
const uint8Array = new Uint8Array(arrayBuffer);
await dotnetRef.invokeMethodAsync('OnAudioChunkReceived', uint8Array);
} catch (error) {
console.error('Error sending audio chunk to .NET:', error);
} finally {
pendingChunkUploads--;
}
}
};
mediaRecorder.start(3000); // read the recorded data in 3-second chunks
return actualRecordingMimeType;
},
stop: async function () {
return new Promise((resolve) => {
// Add an event listener to handle the stop event:
mediaRecorder.onstop = async () => {
// Wait for all pending chunk uploads to complete before finalizing:
console.log(`Audio recording - waiting for ${pendingChunkUploads} pending uploads.`);
while (pendingChunkUploads > 0) {
await new Promise(r => setTimeout(r, 10)); // wait 10 ms before checking again
}
console.log('Audio recording - all chunks uploaded, finalizing.');
// Play stop recording sound effect:
window.playSound('/sounds/stop_recording.ogg');
// Stop all tracks to release the microphone:
mediaRecorder.stream.getTracks().forEach(track => track.stop());
// No need to process data here anymore, just signal completion:
resolve({
mimeType: actualRecordingMimeType,
changedMimeType: changedMimeType,
});
};
// Finally, stop the recording (which will actually trigger the onstop event):
mediaRecorder.stop();
});
}
};

View File

@ -0,0 +1,306 @@
// Shared the audio context for sound effects (Web Audio API does not register with Media Session):
let soundEffectContext = null;
// Cache for decoded sound effect audio buffers:
const soundEffectCache = new Map();
// Track the preload state:
let soundEffectsPreloaded = false;
// Queue system: tracks when the next sound can start playing.
// This prevents sounds from overlapping and getting "swallowed" by the audio system:
let nextAvailablePlayTime = 0;
// Minimum gap between sounds in seconds (small buffer to ensure clean transitions):
const SOUND_GAP_SECONDS = 0.25;
// List of all sound effects used in the app:
const SOUND_EFFECT_PATHS = [
'/sounds/start_recording.ogg',
'/sounds/stop_recording.ogg',
'/sounds/transcription_done.ogg'
];
// Initialize the audio context with low-latency settings.
// Should be called from a user interaction (click, keypress)
// to satisfy browser autoplay policies:
window.initSoundEffects = async function() {
if (soundEffectContext && soundEffectContext.state !== 'closed') {
// Already initialized, just ensure it's running:
if (soundEffectContext.state === 'suspended') {
await soundEffectContext.resume();
}
return;
}
try {
// Create the context with the interactive latency hint for the lowest latency:
soundEffectContext = new (window.AudioContext || window.webkitAudioContext)({
latencyHint: 'interactive'
});
// Resume immediately (needed for Safari/macOS):
if (soundEffectContext.state === 'suspended') {
await soundEffectContext.resume();
}
// Reset the queue timing:
nextAvailablePlayTime = 0;
//
// Play a very short silent buffer to "warm up" the audio pipeline.
// This helps prevent the first real sound from being cut off:
//
const silentBuffer = soundEffectContext.createBuffer(1, 1, soundEffectContext.sampleRate);
const silentSource = soundEffectContext.createBufferSource();
silentSource.buffer = silentBuffer;
silentSource.connect(soundEffectContext.destination);
silentSource.start(0);
console.log('Sound effects - AudioContext initialized with latency:', soundEffectContext.baseLatency);
// Preload all sound effects in parallel:
if (!soundEffectsPreloaded) {
await window.preloadSoundEffects();
}
} catch (error) {
console.warn('Failed to initialize sound effects:', error);
}
};
// Preload all sound effect files into the cache:
window.preloadSoundEffects = async function() {
if (soundEffectsPreloaded) {
return;
}
// Ensure that the context exists:
if (!soundEffectContext || soundEffectContext.state === 'closed') {
soundEffectContext = new (window.AudioContext || window.webkitAudioContext)({
latencyHint: 'interactive'
});
}
console.log('Sound effects - preloading', SOUND_EFFECT_PATHS.length, 'sound files...');
const preloadPromises = SOUND_EFFECT_PATHS.map(async (soundPath) => {
try {
const response = await fetch(soundPath);
const arrayBuffer = await response.arrayBuffer();
const audioBuffer = await soundEffectContext.decodeAudioData(arrayBuffer);
soundEffectCache.set(soundPath, audioBuffer);
console.log('Sound effects - preloaded:', soundPath, 'duration:', audioBuffer.duration.toFixed(2), 's');
} catch (error) {
console.warn('Sound effects - failed to preload:', soundPath, error);
}
});
await Promise.all(preloadPromises);
soundEffectsPreloaded = true;
console.log('Sound effects - all files preloaded');
};
window.playSound = async function(soundPath) {
try {
// Initialize context if needed (fallback if initSoundEffects wasn't called):
if (!soundEffectContext || soundEffectContext.state === 'closed') {
soundEffectContext = new (window.AudioContext || window.webkitAudioContext)({
latencyHint: 'interactive'
});
nextAvailablePlayTime = 0;
}
// Resume if suspended (browser autoplay policy):
if (soundEffectContext.state === 'suspended') {
await soundEffectContext.resume();
}
// Check the cache for already decoded audio:
let audioBuffer = soundEffectCache.get(soundPath);
if (!audioBuffer) {
// Fetch and decode the audio file (fallback if not preloaded):
console.log('Sound effects - loading on demand:', soundPath);
const response = await fetch(soundPath);
const arrayBuffer = await response.arrayBuffer();
audioBuffer = await soundEffectContext.decodeAudioData(arrayBuffer);
soundEffectCache.set(soundPath, audioBuffer);
}
// Calculate when this sound should start:
const currentTime = soundEffectContext.currentTime;
let startTime;
if (currentTime >= nextAvailablePlayTime) {
// No sound is playing, or the previous sound has finished; start immediately:
startTime = 0; // 0 means "now" in Web Audio API
nextAvailablePlayTime = currentTime + audioBuffer.duration + SOUND_GAP_SECONDS;
} else {
// A sound is still playing; schedule this sound to start after it:
startTime = nextAvailablePlayTime;
nextAvailablePlayTime = startTime + audioBuffer.duration + SOUND_GAP_SECONDS;
console.log('Sound effects - queued:', soundPath, 'will play in', (startTime - currentTime).toFixed(2), 's');
}
// Create a new source node and schedule playback:
const source = soundEffectContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(soundEffectContext.destination);
source.start(startTime);
console.log('Sound effects - playing:', soundPath);
} catch (error) {
console.warn('Failed to play sound effect:', error);
}
};
let mediaRecorder;
let actualRecordingMimeType;
let changedMimeType = false;
let pendingChunkUploads = 0;
// Store the media stream so we can close the microphone later:
let activeMediaStream = null;
// Delay in milliseconds to wait after getUserMedia() for Bluetooth profile switch (A2DP → HFP):
const BLUETOOTH_PROFILE_SWITCH_DELAY_MS = 1_600;
window.audioRecorder = {
start: async function (dotnetRef, desiredMimeTypes = []) {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
activeMediaStream = stream;
// Wait for Bluetooth headsets to complete the profile switch from A2DP to HFP.
// This prevents the first sound from being cut off during the switch:
console.log('Audio recording - waiting for Bluetooth profile switch...');
await new Promise(r => setTimeout(r, BLUETOOTH_PROFILE_SWITCH_DELAY_MS));
// Play start recording sound effect:
await window.playSound('/sounds/start_recording.ogg');
// When only one mime type is provided as a string, convert it to an array:
if (typeof desiredMimeTypes === 'string') {
desiredMimeTypes = [desiredMimeTypes];
}
// Log sent mime types for debugging:
console.log('Audio recording - requested mime types: ', desiredMimeTypes);
let mimeTypes = desiredMimeTypes.filter(type => typeof type === 'string' && type.trim() !== '');
// Next, we have to ensure that we have some default mime types to check as well.
// In case the provided list does not contain these, we append them:
// Use provided mime types or fallback to a default list:
const defaultMimeTypes = [
'audio/webm',
'audio/ogg',
'audio/mp4',
'audio/mpeg',
''// Fallback to browser default
];
defaultMimeTypes.forEach(type => {
if (!mimeTypes.includes(type)) {
mimeTypes.push(type);
}
});
console.log('Audio recording - final mime types to check (included defaults): ', mimeTypes);
// Find the first supported mime type:
actualRecordingMimeType = mimeTypes.find(type =>
type === '' || MediaRecorder.isTypeSupported(type)
) || '';
console.log('Audio recording - the browser selected the following mime type for recording: ', actualRecordingMimeType);
const options = actualRecordingMimeType ? { mimeType: actualRecordingMimeType } : {};
mediaRecorder = new MediaRecorder(stream, options);
// In case the browser changed the mime type:
actualRecordingMimeType = mediaRecorder.mimeType;
console.log('Audio recording - actual mime type used by the browser: ', actualRecordingMimeType);
// Check the list of desired mime types against the actual one:
if (!desiredMimeTypes.includes(actualRecordingMimeType)) {
changedMimeType = true;
console.warn(`Audio recording - requested mime types ('${desiredMimeTypes.join(', ')}') do not include the actual mime type used by the browser ('${actualRecordingMimeType}').`);
} else {
changedMimeType = false;
}
// Reset the pending uploads counter:
pendingChunkUploads = 0;
// Stream each chunk directly to .NET as it becomes available:
mediaRecorder.ondataavailable = async (event) => {
if (event.data.size > 0) {
pendingChunkUploads++;
try {
const arrayBuffer = await event.data.arrayBuffer();
const uint8Array = new Uint8Array(arrayBuffer);
await dotnetRef.invokeMethodAsync('OnAudioChunkReceived', uint8Array);
} catch (error) {
console.error('Error sending audio chunk to .NET:', error);
} finally {
pendingChunkUploads--;
}
}
};
mediaRecorder.start(3000); // read the recorded data in 3-second chunks
return actualRecordingMimeType;
},
stop: async function () {
return new Promise((resolve) => {
// Add an event listener to handle the stop event:
mediaRecorder.onstop = async () => {
// Wait for all pending chunk uploads to complete before finalizing:
console.log(`Audio recording - waiting for ${pendingChunkUploads} pending uploads.`);
while (pendingChunkUploads > 0) {
await new Promise(r => setTimeout(r, 10)); // wait 10 ms before checking again
}
console.log('Audio recording - all chunks uploaded, finalizing.');
// Play stop recording sound effect:
await window.playSound('/sounds/stop_recording.ogg');
//
// IMPORTANT: Do NOT release the microphone here!
// Bluetooth headsets switch profiles (HFP → A2DP) when the microphone is released,
// which causes audio to be interrupted. We keep the microphone open so that the
// stop_recording and transcription_done sounds can play without interruption.
//
// Call window.audioRecorder.releaseMicrophone() after the last sound has played.
//
// No need to process data here anymore, just signal completion:
resolve({
mimeType: actualRecordingMimeType,
changedMimeType: changedMimeType,
});
};
// Finally, stop the recording (which will actually trigger the onstop event):
mediaRecorder.stop();
});
},
// Release the microphone after all sounds have been played.
// This should be called after the transcription_done sound to allow
// Bluetooth headsets to switch back to A2DP profile without interrupting audio:
releaseMicrophone: function () {
if (activeMediaStream) {
console.log('Audio recording - releasing microphone (Bluetooth will switch back to A2DP)');
activeMediaStream.getTracks().forEach(track => track.stop());
activeMediaStream = null;
}
}
};

View File

@ -1,3 +1,5 @@
# v26.1.2, build 232 (2026-01-xx xx:xx UTC)
- Added the option to hide specific assistants by configuration plugins. This is useful for enterprise environments in organizations.
- Improved the microphone handling (transcription preview) so that all sound effects and the voice recording are processed without interruption.
- Fixed a logging bug that prevented log events from being recorded in some cases.
- Fixed a bug affecting the transcription preview: previously, when you stopped music or other media, recorded or dictated text, and then tried to resume playback, the media wouldnt resume as expected. This behavior is now fixed.