From 06075f2a53c88252a100e7d784b146f26feb9d6c Mon Sep 17 00:00:00 2001
From: Thorsten Sommer <mail@tsommer.org>
Date: Sun, 18 Jan 2026 15:42:40 +0100
Subject: [PATCH] Improved microphone handling and sound effect playback

---
 .../Components/VoiceRecorder.razor.cs         |  29 +++
 app/MindWork AI Studio/wwwroot/app.js         | 175 ++++++++++++++++--
 .../wwwroot/changelog/v26.1.2.md              |   1 +
 3 files changed, 193 insertions(+), 12 deletions(-)

diff --git a/app/MindWork AI Studio/Components/VoiceRecorder.razor.cs b/app/MindWork AI Studio/Components/VoiceRecorder.razor.cs
index 4ea00180..d29e28dd 100644
--- a/app/MindWork AI Studio/Components/VoiceRecorder.razor.cs	
+++ b/app/MindWork AI Studio/Components/VoiceRecorder.razor.cs	
@@ -20,6 +20,18 @@ public partial class VoiceRecorder : MSGComponentBase
     [Inject]
     private ISnackbar Snackbar { get; init; } = null!;
 
+    #region Overrides of MSGComponentBase
+
+    protected override async Task OnInitializedAsync()
+    {
+        await base.OnInitializedAsync();
+        
+        // Initialize sound effects. This "warms up" the AudioContext and preloads all sounds for reliable playback:
+        await this.JsRuntime.InvokeVoidAsync("initSoundEffects");
+    }
+
+    #endregion
+
     private uint numReceivedChunks;
     private bool isRecording;
     private bool isTranscribing;
@@ -39,6 +51,9 @@ public partial class VoiceRecorder : MSGComponentBase
     {
         if (toggled)
         {
+            // Warm up sound effects:
+            await this.JsRuntime.InvokeVoidAsync("initSoundEffects");
+            
             var mimeTypes = GetPreferredMimeTypes(
                 Builder.Create().UseAudio().UseSubtype(AudioSubtype.OGG).Build(),
                 Builder.Create().UseAudio().UseSubtype(AudioSubtype.AAC).Build(),
@@ -189,7 +204,11 @@ public partial class VoiceRecorder : MSGComponentBase
     private async Task TranscribeRecordingAsync()
     {
         if (this.finalRecordingPath is null)
+        {
+            // No recording to transcribe, but still release the microphone:
+            await this.ReleaseMicrophoneAsync();
             return;
+        }
 
         this.isTranscribing = true;
         this.StateHasChanged();
@@ -288,12 +307,22 @@ public partial class VoiceRecorder : MSGComponentBase
         }
         finally
         {
+            await this.ReleaseMicrophoneAsync();
+
             this.finalRecordingPath = null;
             this.isTranscribing = false;
             this.StateHasChanged();
         }
     }
 
+    private async Task ReleaseMicrophoneAsync()
+    {
+        // Wait a moment for any queued sounds to finish playing, then release the microphone.
+        // This allows Bluetooth headsets to switch back to A2DP profile without interrupting audio:
+        await Task.Delay(1_800);
+        await this.JsRuntime.InvokeVoidAsync("audioRecorder.releaseMicrophone");
+    }
+
     private sealed class AudioRecordingResult
     {
         public string MimeType { get; init; } = string.Empty;
diff --git a/app/MindWork AI Studio/wwwroot/app.js b/app/MindWork AI Studio/wwwroot/app.js
index 6c1ebaf4..c405df39 100644
--- a/app/MindWork AI Studio/wwwroot/app.js	
+++ b/app/MindWork AI Studio/wwwroot/app.js	
@@ -27,15 +27,120 @@ window.scrollToBottom = function(element) {
     element.scrollIntoView({ behavior: 'smooth', block: 'end', inline: 'nearest' });
 }
 
-// Shared audio context for sound effects (Web Audio API does not register with Media Session):
+// Shared the audio context for sound effects (Web Audio API does not register with Media Session):
 let soundEffectContext = null;
+
+// Cache for decoded sound effect audio buffers:
 const soundEffectCache = new Map();
 
+// Track the preload state:
+let soundEffectsPreloaded = false;
+
+// Queue system: tracks when the next sound can start playing.
+// This prevents sounds from overlapping and getting "swallowed" by the audio system:
+let nextAvailablePlayTime = 0;
+
+// Minimum gap between sounds in seconds (small buffer to ensure clean transitions):
+const SOUND_GAP_SECONDS = 0.55;
+
+// List of all sound effects used in the app:
+const SOUND_EFFECT_PATHS = [
+    '/sounds/start_recording.ogg',
+    '/sounds/stop_recording.ogg',
+    '/sounds/transcription_done.ogg'
+];
+
+// Initialize the audio context with low-latency settings.
+// Should be called from a user interaction (click, keypress)
+// to satisfy browser autoplay policies:
+window.initSoundEffects = async function() {
+    
+    if (soundEffectContext && soundEffectContext.state !== 'closed') {
+        // Already initialized, just ensure it's running:
+        if (soundEffectContext.state === 'suspended') {
+            await soundEffectContext.resume();
+        }
+        
+        return;
+    }
+
+    try {
+        // Create the context with the interactive latency hint for the lowest latency:
+        soundEffectContext = new (window.AudioContext || window.webkitAudioContext)({
+            latencyHint: 'interactive'
+        });
+
+        // Resume immediately (needed for Safari/macOS):
+        if (soundEffectContext.state === 'suspended') {
+            await soundEffectContext.resume();
+        }
+
+        // Reset the queue timing:
+        nextAvailablePlayTime = 0;
+
+        //
+        // Play a very short silent buffer to "warm up" the audio pipeline.
+        // This helps prevent the first real sound from being cut off:
+        //
+        const silentBuffer = soundEffectContext.createBuffer(1, 1, soundEffectContext.sampleRate);
+        const silentSource = soundEffectContext.createBufferSource();
+        silentSource.buffer = silentBuffer;
+        silentSource.connect(soundEffectContext.destination);
+        silentSource.start(0);
+
+        console.log('Sound effects - AudioContext initialized with latency:', soundEffectContext.baseLatency);
+
+        // Preload all sound effects in parallel:
+        if (!soundEffectsPreloaded) {
+            await window.preloadSoundEffects();
+        }
+    } catch (error) {
+        console.warn('Failed to initialize sound effects:', error);
+    }
+};
+
+// Preload all sound effect files into the cache:
+window.preloadSoundEffects = async function() {
+    if (soundEffectsPreloaded) {
+        return;
+    }
+
+    // Ensure that the context exists:
+    if (!soundEffectContext || soundEffectContext.state === 'closed') {
+        soundEffectContext = new (window.AudioContext || window.webkitAudioContext)({
+            latencyHint: 'interactive'
+        });
+    }
+
+    console.log('Sound effects - preloading', SOUND_EFFECT_PATHS.length, 'sound files...');
+
+    const preloadPromises = SOUND_EFFECT_PATHS.map(async (soundPath) => {
+        try {
+            const response = await fetch(soundPath);
+            const arrayBuffer = await response.arrayBuffer();
+            const audioBuffer = await soundEffectContext.decodeAudioData(arrayBuffer);
+            soundEffectCache.set(soundPath, audioBuffer);
+            
+            console.log('Sound effects - preloaded:', soundPath, 'duration:', audioBuffer.duration.toFixed(2), 's');
+        } catch (error) {
+            console.warn('Sound effects - failed to preload:', soundPath, error);
+        }
+    });
+
+    await Promise.all(preloadPromises);
+    soundEffectsPreloaded = true;
+    console.log('Sound effects - all files preloaded');
+};
+
 window.playSound = async function(soundPath) {
     try {
-        // Create or reuse the audio context:
+        // Initialize context if needed (fallback if initSoundEffects wasn't called):
         if (!soundEffectContext || soundEffectContext.state === 'closed') {
-            soundEffectContext = new (window.AudioContext || window.webkitAudioContext)();
+            soundEffectContext = new (window.AudioContext || window.webkitAudioContext)({
+                latencyHint: 'interactive'
+            });
+            
+            nextAvailablePlayTime = 0;
         }
 
         // Resume if suspended (browser autoplay policy):
@@ -47,19 +152,36 @@ window.playSound = async function(soundPath) {
         let audioBuffer = soundEffectCache.get(soundPath);
 
         if (!audioBuffer) {
-            // Fetch and decode the audio file:
+            // Fetch and decode the audio file (fallback if not preloaded):
+            console.log('Sound effects - loading on demand:', soundPath);
             const response = await fetch(soundPath);
             const arrayBuffer = await response.arrayBuffer();
             audioBuffer = await soundEffectContext.decodeAudioData(arrayBuffer);
             soundEffectCache.set(soundPath, audioBuffer);
         }
 
-        // Create a new source node and play:
+        // Calculate when this sound should start:
+        const currentTime = soundEffectContext.currentTime;
+        let startTime;
+
+        if (currentTime >= nextAvailablePlayTime) {
+            // No sound is playing, or the previous sound has finished; start immediately:
+            startTime = 0; // 0 means "now" in Web Audio API
+            nextAvailablePlayTime = currentTime + audioBuffer.duration + SOUND_GAP_SECONDS;
+        } else {
+            // A sound is still playing; schedule this sound to start after it:
+            startTime = nextAvailablePlayTime;
+            nextAvailablePlayTime = startTime + audioBuffer.duration + SOUND_GAP_SECONDS;
+            console.log('Sound effects - queued:', soundPath, 'will play in', (startTime - currentTime).toFixed(2), 's');
+        }
+        
+        // Create a new source node and schedule playback:
         const source = soundEffectContext.createBufferSource();
         source.buffer = audioBuffer;
         source.connect(soundEffectContext.destination);
-        source.start(0);
-
+        source.start(startTime);
+        console.log('Sound effects - playing:', soundPath);
+        
     } catch (error) {
         console.warn('Failed to play sound effect:', error);
     }
@@ -70,12 +192,24 @@ let actualRecordingMimeType;
 let changedMimeType = false;
 let pendingChunkUploads = 0;
 
+// Store the media stream so we can close the microphone later:
+let activeMediaStream = null;
+
+// Delay in milliseconds to wait after getUserMedia() for Bluetooth profile switch (A2DP → HFP):
+const BLUETOOTH_PROFILE_SWITCH_DELAY_MS = 1_600;
+
 window.audioRecorder = {
     start: async function (dotnetRef, desiredMimeTypes = []) {
         const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+        activeMediaStream = stream;
+
+        // Wait for Bluetooth headsets to complete the profile switch from A2DP to HFP.
+        // This prevents the first sound from being cut off during the switch:
+        console.log('Audio recording - waiting for Bluetooth profile switch...');
+        await new Promise(r => setTimeout(r, BLUETOOTH_PROFILE_SWITCH_DELAY_MS));
 
         // Play start recording sound effect:
-        window.playSound('/sounds/start_recording.ogg');
+        await window.playSound('/sounds/start_recording.ogg');
 
         // When only one mime type is provided as a string, convert it to an array:
         if (typeof desiredMimeTypes === 'string') {
@@ -165,11 +299,17 @@ window.audioRecorder = {
                 console.log('Audio recording - all chunks uploaded, finalizing.');
 
                 // Play stop recording sound effect:
-                window.playSound('/sounds/stop_recording.ogg');
+                await window.playSound('/sounds/stop_recording.ogg');
+
+                //
+                // IMPORTANT: Do NOT release the microphone here!
+                // Bluetooth headsets switch profiles (HFP → A2DP) when the microphone is released,
+                // which causes audio to be interrupted. We keep the microphone open so that the
+                // stop_recording and transcription_done sounds can play without interruption.
+                //
+                // Call window.audioRecorder.releaseMicrophone() after the last sound has played.
+                //
 
-                // Stop all tracks to release the microphone:
-                mediaRecorder.stream.getTracks().forEach(track => track.stop());
-                
                 // No need to process data here anymore, just signal completion:
                 resolve({
                     mimeType: actualRecordingMimeType,
@@ -180,5 +320,16 @@ window.audioRecorder = {
             // Finally, stop the recording (which will actually trigger the onstop event):
             mediaRecorder.stop();
         });
+    },
+
+    // Release the microphone after all sounds have been played.
+    // This should be called after the transcription_done sound to allow
+    // Bluetooth headsets to switch back to A2DP profile without interrupting audio:
+    releaseMicrophone: function () {
+        if (activeMediaStream) {
+            console.log('Audio recording - releasing microphone (Bluetooth will switch back to A2DP)');
+            activeMediaStream.getTracks().forEach(track => track.stop());
+            activeMediaStream = null;
+        }
     }
 };
\ No newline at end of file
diff --git a/app/MindWork AI Studio/wwwroot/changelog/v26.1.2.md b/app/MindWork AI Studio/wwwroot/changelog/v26.1.2.md
index 04ce8d99..a1de1efb 100644
--- a/app/MindWork AI Studio/wwwroot/changelog/v26.1.2.md	
+++ b/app/MindWork AI Studio/wwwroot/changelog/v26.1.2.md	
@@ -1,4 +1,5 @@
 # v26.1.2, build 232 (2026-01-xx xx:xx UTC)
 - Added the option to hide specific assistants by configuration plugins. This is useful for enterprise environments in organizations.
+- Improved the microphone handling (transcription preview) so that all sound effects and the voice recording are processed without interruption.
 - Fixed a logging bug that prevented log events from being recorded in some cases.
 - Fixed a bug affecting the transcription preview: previously, when you stopped music or other media, recorded or dictated text, and then tried to resume playback, the media wouldn’t resume as expected. This behavior is now fixed.
\ No newline at end of file