diff --git a/app/MindWork AI Studio/App.razor b/app/MindWork AI Studio/App.razor index 37492a67..b314b033 100644 --- a/app/MindWork AI Studio/App.razor +++ b/app/MindWork AI Studio/App.razor @@ -27,6 +27,7 @@ + \ No newline at end of file diff --git a/app/MindWork AI Studio/Components/VoiceRecorder.razor.cs b/app/MindWork AI Studio/Components/VoiceRecorder.razor.cs index 4ea00180..d29e28dd 100644 --- a/app/MindWork AI Studio/Components/VoiceRecorder.razor.cs +++ b/app/MindWork AI Studio/Components/VoiceRecorder.razor.cs @@ -20,6 +20,18 @@ public partial class VoiceRecorder : MSGComponentBase [Inject] private ISnackbar Snackbar { get; init; } = null!; + #region Overrides of MSGComponentBase + + protected override async Task OnInitializedAsync() + { + await base.OnInitializedAsync(); + + // Initialize sound effects. This "warms up" the AudioContext and preloads all sounds for reliable playback: + await this.JsRuntime.InvokeVoidAsync("initSoundEffects"); + } + + #endregion + private uint numReceivedChunks; private bool isRecording; private bool isTranscribing; @@ -39,6 +51,9 @@ public partial class VoiceRecorder : MSGComponentBase { if (toggled) { + // Warm up sound effects: + await this.JsRuntime.InvokeVoidAsync("initSoundEffects"); + var mimeTypes = GetPreferredMimeTypes( Builder.Create().UseAudio().UseSubtype(AudioSubtype.OGG).Build(), Builder.Create().UseAudio().UseSubtype(AudioSubtype.AAC).Build(), @@ -189,7 +204,11 @@ public partial class VoiceRecorder : MSGComponentBase private async Task TranscribeRecordingAsync() { if (this.finalRecordingPath is null) + { + // No recording to transcribe, but still release the microphone: + await this.ReleaseMicrophoneAsync(); return; + } this.isTranscribing = true; this.StateHasChanged(); @@ -288,12 +307,22 @@ public partial class VoiceRecorder : MSGComponentBase } finally { + await this.ReleaseMicrophoneAsync(); + this.finalRecordingPath = null; this.isTranscribing = false; this.StateHasChanged(); } } + private async Task ReleaseMicrophoneAsync() + { + // Wait a moment for any queued sounds to finish playing, then release the microphone. + // This allows Bluetooth headsets to switch back to A2DP profile without interrupting audio: + await Task.Delay(1_800); + await this.JsRuntime.InvokeVoidAsync("audioRecorder.releaseMicrophone"); + } + private sealed class AudioRecordingResult { public string MimeType { get; init; } = string.Empty; diff --git a/app/MindWork AI Studio/wwwroot/app.js b/app/MindWork AI Studio/wwwroot/app.js index 8a4e036d..aa6b8e2b 100644 --- a/app/MindWork AI Studio/wwwroot/app.js +++ b/app/MindWork AI Studio/wwwroot/app.js @@ -25,133 +25,4 @@ window.clearDiv = function (divName) { window.scrollToBottom = function(element) { element.scrollIntoView({ behavior: 'smooth', block: 'end', inline: 'nearest' }); -} - -window.playSound = function(soundPath) { - try { - const audio = new Audio(soundPath); - audio.play().catch(error => { - console.warn('Failed to play sound effect:', error); - }); - } catch (error) { - console.warn('Error creating audio element:', error); - } -}; - -let mediaRecorder; -let actualRecordingMimeType; -let changedMimeType = false; -let pendingChunkUploads = 0; - -window.audioRecorder = { - start: async function (dotnetRef, desiredMimeTypes = []) { - const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - - // Play start recording sound effect: - window.playSound('/sounds/start_recording.ogg'); - - // When only one mime type is provided as a string, convert it to an array: - if (typeof desiredMimeTypes === 'string') { - desiredMimeTypes = [desiredMimeTypes]; - } - - // Log sent mime types for debugging: - console.log('Audio recording - requested mime types: ', desiredMimeTypes); - - let mimeTypes = desiredMimeTypes.filter(type => typeof type === 'string' && type.trim() !== ''); - - // Next, we have to ensure that we have some default mime types to check as well. - // In case the provided list does not contain these, we append them: - // Use provided mime types or fallback to a default list: - const defaultMimeTypes = [ - 'audio/webm', - 'audio/ogg', - 'audio/mp4', - 'audio/mpeg', - ''// Fallback to browser default - ]; - - defaultMimeTypes.forEach(type => { - if (!mimeTypes.includes(type)) { - mimeTypes.push(type); - } - }); - - console.log('Audio recording - final mime types to check (included defaults): ', mimeTypes); - - // Find the first supported mime type: - actualRecordingMimeType = mimeTypes.find(type => - type === '' || MediaRecorder.isTypeSupported(type) - ) || ''; - - console.log('Audio recording - the browser selected the following mime type for recording: ', actualRecordingMimeType); - const options = actualRecordingMimeType ? { mimeType: actualRecordingMimeType } : {}; - mediaRecorder = new MediaRecorder(stream, options); - - // In case the browser changed the mime type: - actualRecordingMimeType = mediaRecorder.mimeType; - console.log('Audio recording - actual mime type used by the browser: ', actualRecordingMimeType); - - // Check the list of desired mime types against the actual one: - if (!desiredMimeTypes.includes(actualRecordingMimeType)) { - changedMimeType = true; - console.warn(`Audio recording - requested mime types ('${desiredMimeTypes.join(', ')}') do not include the actual mime type used by the browser ('${actualRecordingMimeType}').`); - } else { - changedMimeType = false; - } - - // Reset the pending uploads counter: - pendingChunkUploads = 0; - - // Stream each chunk directly to .NET as it becomes available: - mediaRecorder.ondataavailable = async (event) => { - if (event.data.size > 0) { - pendingChunkUploads++; - try { - const arrayBuffer = await event.data.arrayBuffer(); - const uint8Array = new Uint8Array(arrayBuffer); - await dotnetRef.invokeMethodAsync('OnAudioChunkReceived', uint8Array); - } catch (error) { - console.error('Error sending audio chunk to .NET:', error); - } finally { - pendingChunkUploads--; - } - } - }; - - mediaRecorder.start(3000); // read the recorded data in 3-second chunks - return actualRecordingMimeType; - }, - - stop: async function () { - return new Promise((resolve) => { - - // Add an event listener to handle the stop event: - mediaRecorder.onstop = async () => { - - // Wait for all pending chunk uploads to complete before finalizing: - console.log(`Audio recording - waiting for ${pendingChunkUploads} pending uploads.`); - while (pendingChunkUploads > 0) { - await new Promise(r => setTimeout(r, 10)); // wait 10 ms before checking again - } - - console.log('Audio recording - all chunks uploaded, finalizing.'); - - // Play stop recording sound effect: - window.playSound('/sounds/stop_recording.ogg'); - - // Stop all tracks to release the microphone: - mediaRecorder.stream.getTracks().forEach(track => track.stop()); - - // No need to process data here anymore, just signal completion: - resolve({ - mimeType: actualRecordingMimeType, - changedMimeType: changedMimeType, - }); - }; - - // Finally, stop the recording (which will actually trigger the onstop event): - mediaRecorder.stop(); - }); - } -}; \ No newline at end of file +} \ No newline at end of file diff --git a/app/MindWork AI Studio/wwwroot/audio.js b/app/MindWork AI Studio/wwwroot/audio.js new file mode 100644 index 00000000..689bc50f --- /dev/null +++ b/app/MindWork AI Studio/wwwroot/audio.js @@ -0,0 +1,306 @@ +// Shared the audio context for sound effects (Web Audio API does not register with Media Session): +let soundEffectContext = null; + +// Cache for decoded sound effect audio buffers: +const soundEffectCache = new Map(); + +// Track the preload state: +let soundEffectsPreloaded = false; + +// Queue system: tracks when the next sound can start playing. +// This prevents sounds from overlapping and getting "swallowed" by the audio system: +let nextAvailablePlayTime = 0; + +// Minimum gap between sounds in seconds (small buffer to ensure clean transitions): +const SOUND_GAP_SECONDS = 0.25; + +// List of all sound effects used in the app: +const SOUND_EFFECT_PATHS = [ + '/sounds/start_recording.ogg', + '/sounds/stop_recording.ogg', + '/sounds/transcription_done.ogg' +]; + +// Initialize the audio context with low-latency settings. +// Should be called from a user interaction (click, keypress) +// to satisfy browser autoplay policies: +window.initSoundEffects = async function() { + + if (soundEffectContext && soundEffectContext.state !== 'closed') { + // Already initialized, just ensure it's running: + if (soundEffectContext.state === 'suspended') { + await soundEffectContext.resume(); + } + + return; + } + + try { + // Create the context with the interactive latency hint for the lowest latency: + soundEffectContext = new (window.AudioContext || window.webkitAudioContext)({ + latencyHint: 'interactive' + }); + + // Resume immediately (needed for Safari/macOS): + if (soundEffectContext.state === 'suspended') { + await soundEffectContext.resume(); + } + + // Reset the queue timing: + nextAvailablePlayTime = 0; + + // + // Play a very short silent buffer to "warm up" the audio pipeline. + // This helps prevent the first real sound from being cut off: + // + const silentBuffer = soundEffectContext.createBuffer(1, 1, soundEffectContext.sampleRate); + const silentSource = soundEffectContext.createBufferSource(); + silentSource.buffer = silentBuffer; + silentSource.connect(soundEffectContext.destination); + silentSource.start(0); + + console.log('Sound effects - AudioContext initialized with latency:', soundEffectContext.baseLatency); + + // Preload all sound effects in parallel: + if (!soundEffectsPreloaded) { + await window.preloadSoundEffects(); + } + } catch (error) { + console.warn('Failed to initialize sound effects:', error); + } +}; + +// Preload all sound effect files into the cache: +window.preloadSoundEffects = async function() { + if (soundEffectsPreloaded) { + return; + } + + // Ensure that the context exists: + if (!soundEffectContext || soundEffectContext.state === 'closed') { + soundEffectContext = new (window.AudioContext || window.webkitAudioContext)({ + latencyHint: 'interactive' + }); + } + + console.log('Sound effects - preloading', SOUND_EFFECT_PATHS.length, 'sound files...'); + + const preloadPromises = SOUND_EFFECT_PATHS.map(async (soundPath) => { + try { + const response = await fetch(soundPath); + const arrayBuffer = await response.arrayBuffer(); + const audioBuffer = await soundEffectContext.decodeAudioData(arrayBuffer); + soundEffectCache.set(soundPath, audioBuffer); + + console.log('Sound effects - preloaded:', soundPath, 'duration:', audioBuffer.duration.toFixed(2), 's'); + } catch (error) { + console.warn('Sound effects - failed to preload:', soundPath, error); + } + }); + + await Promise.all(preloadPromises); + soundEffectsPreloaded = true; + console.log('Sound effects - all files preloaded'); +}; + +window.playSound = async function(soundPath) { + try { + // Initialize context if needed (fallback if initSoundEffects wasn't called): + if (!soundEffectContext || soundEffectContext.state === 'closed') { + soundEffectContext = new (window.AudioContext || window.webkitAudioContext)({ + latencyHint: 'interactive' + }); + + nextAvailablePlayTime = 0; + } + + // Resume if suspended (browser autoplay policy): + if (soundEffectContext.state === 'suspended') { + await soundEffectContext.resume(); + } + + // Check the cache for already decoded audio: + let audioBuffer = soundEffectCache.get(soundPath); + + if (!audioBuffer) { + // Fetch and decode the audio file (fallback if not preloaded): + console.log('Sound effects - loading on demand:', soundPath); + const response = await fetch(soundPath); + const arrayBuffer = await response.arrayBuffer(); + audioBuffer = await soundEffectContext.decodeAudioData(arrayBuffer); + soundEffectCache.set(soundPath, audioBuffer); + } + + // Calculate when this sound should start: + const currentTime = soundEffectContext.currentTime; + let startTime; + + if (currentTime >= nextAvailablePlayTime) { + // No sound is playing, or the previous sound has finished; start immediately: + startTime = 0; // 0 means "now" in Web Audio API + nextAvailablePlayTime = currentTime + audioBuffer.duration + SOUND_GAP_SECONDS; + } else { + // A sound is still playing; schedule this sound to start after it: + startTime = nextAvailablePlayTime; + nextAvailablePlayTime = startTime + audioBuffer.duration + SOUND_GAP_SECONDS; + console.log('Sound effects - queued:', soundPath, 'will play in', (startTime - currentTime).toFixed(2), 's'); + } + + // Create a new source node and schedule playback: + const source = soundEffectContext.createBufferSource(); + source.buffer = audioBuffer; + source.connect(soundEffectContext.destination); + source.start(startTime); + console.log('Sound effects - playing:', soundPath); + + } catch (error) { + console.warn('Failed to play sound effect:', error); + } +}; + +let mediaRecorder; +let actualRecordingMimeType; +let changedMimeType = false; +let pendingChunkUploads = 0; + +// Store the media stream so we can close the microphone later: +let activeMediaStream = null; + +// Delay in milliseconds to wait after getUserMedia() for Bluetooth profile switch (A2DP → HFP): +const BLUETOOTH_PROFILE_SWITCH_DELAY_MS = 1_600; + +window.audioRecorder = { + start: async function (dotnetRef, desiredMimeTypes = []) { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + activeMediaStream = stream; + + // Wait for Bluetooth headsets to complete the profile switch from A2DP to HFP. + // This prevents the first sound from being cut off during the switch: + console.log('Audio recording - waiting for Bluetooth profile switch...'); + await new Promise(r => setTimeout(r, BLUETOOTH_PROFILE_SWITCH_DELAY_MS)); + + // Play start recording sound effect: + await window.playSound('/sounds/start_recording.ogg'); + + // When only one mime type is provided as a string, convert it to an array: + if (typeof desiredMimeTypes === 'string') { + desiredMimeTypes = [desiredMimeTypes]; + } + + // Log sent mime types for debugging: + console.log('Audio recording - requested mime types: ', desiredMimeTypes); + + let mimeTypes = desiredMimeTypes.filter(type => typeof type === 'string' && type.trim() !== ''); + + // Next, we have to ensure that we have some default mime types to check as well. + // In case the provided list does not contain these, we append them: + // Use provided mime types or fallback to a default list: + const defaultMimeTypes = [ + 'audio/webm', + 'audio/ogg', + 'audio/mp4', + 'audio/mpeg', + ''// Fallback to browser default + ]; + + defaultMimeTypes.forEach(type => { + if (!mimeTypes.includes(type)) { + mimeTypes.push(type); + } + }); + + console.log('Audio recording - final mime types to check (included defaults): ', mimeTypes); + + // Find the first supported mime type: + actualRecordingMimeType = mimeTypes.find(type => + type === '' || MediaRecorder.isTypeSupported(type) + ) || ''; + + console.log('Audio recording - the browser selected the following mime type for recording: ', actualRecordingMimeType); + const options = actualRecordingMimeType ? { mimeType: actualRecordingMimeType } : {}; + mediaRecorder = new MediaRecorder(stream, options); + + // In case the browser changed the mime type: + actualRecordingMimeType = mediaRecorder.mimeType; + console.log('Audio recording - actual mime type used by the browser: ', actualRecordingMimeType); + + // Check the list of desired mime types against the actual one: + if (!desiredMimeTypes.includes(actualRecordingMimeType)) { + changedMimeType = true; + console.warn(`Audio recording - requested mime types ('${desiredMimeTypes.join(', ')}') do not include the actual mime type used by the browser ('${actualRecordingMimeType}').`); + } else { + changedMimeType = false; + } + + // Reset the pending uploads counter: + pendingChunkUploads = 0; + + // Stream each chunk directly to .NET as it becomes available: + mediaRecorder.ondataavailable = async (event) => { + if (event.data.size > 0) { + pendingChunkUploads++; + try { + const arrayBuffer = await event.data.arrayBuffer(); + const uint8Array = new Uint8Array(arrayBuffer); + await dotnetRef.invokeMethodAsync('OnAudioChunkReceived', uint8Array); + } catch (error) { + console.error('Error sending audio chunk to .NET:', error); + } finally { + pendingChunkUploads--; + } + } + }; + + mediaRecorder.start(3000); // read the recorded data in 3-second chunks + return actualRecordingMimeType; + }, + + stop: async function () { + return new Promise((resolve) => { + + // Add an event listener to handle the stop event: + mediaRecorder.onstop = async () => { + + // Wait for all pending chunk uploads to complete before finalizing: + console.log(`Audio recording - waiting for ${pendingChunkUploads} pending uploads.`); + while (pendingChunkUploads > 0) { + await new Promise(r => setTimeout(r, 10)); // wait 10 ms before checking again + } + + console.log('Audio recording - all chunks uploaded, finalizing.'); + + // Play stop recording sound effect: + await window.playSound('/sounds/stop_recording.ogg'); + + // + // IMPORTANT: Do NOT release the microphone here! + // Bluetooth headsets switch profiles (HFP → A2DP) when the microphone is released, + // which causes audio to be interrupted. We keep the microphone open so that the + // stop_recording and transcription_done sounds can play without interruption. + // + // Call window.audioRecorder.releaseMicrophone() after the last sound has played. + // + + // No need to process data here anymore, just signal completion: + resolve({ + mimeType: actualRecordingMimeType, + changedMimeType: changedMimeType, + }); + }; + + // Finally, stop the recording (which will actually trigger the onstop event): + mediaRecorder.stop(); + }); + }, + + // Release the microphone after all sounds have been played. + // This should be called after the transcription_done sound to allow + // Bluetooth headsets to switch back to A2DP profile without interrupting audio: + releaseMicrophone: function () { + if (activeMediaStream) { + console.log('Audio recording - releasing microphone (Bluetooth will switch back to A2DP)'); + activeMediaStream.getTracks().forEach(track => track.stop()); + activeMediaStream = null; + } + } +}; diff --git a/app/MindWork AI Studio/wwwroot/changelog/v26.1.2.md b/app/MindWork AI Studio/wwwroot/changelog/v26.1.2.md index a861ccb0..a1de1efb 100644 --- a/app/MindWork AI Studio/wwwroot/changelog/v26.1.2.md +++ b/app/MindWork AI Studio/wwwroot/changelog/v26.1.2.md @@ -1,3 +1,5 @@ # v26.1.2, build 232 (2026-01-xx xx:xx UTC) - Added the option to hide specific assistants by configuration plugins. This is useful for enterprise environments in organizations. -- Fixed a logging bug that prevented log events from being recorded in some cases. \ No newline at end of file +- Improved the microphone handling (transcription preview) so that all sound effects and the voice recording are processed without interruption. +- Fixed a logging bug that prevented log events from being recorded in some cases. +- Fixed a bug affecting the transcription preview: previously, when you stopped music or other media, recorded or dictated text, and then tried to resume playback, the media wouldn’t resume as expected. This behavior is now fixed. \ No newline at end of file