mirror of
https://github.com/MindWorkAI/AI-Studio.git
synced 2026-02-15 00:41:37 +00:00
Improved microphone handling and sound effect playback
This commit is contained in:
parent
06a7cfb0fe
commit
06075f2a53
@ -20,6 +20,18 @@ public partial class VoiceRecorder : MSGComponentBase
|
|||||||
[Inject]
|
[Inject]
|
||||||
private ISnackbar Snackbar { get; init; } = null!;
|
private ISnackbar Snackbar { get; init; } = null!;
|
||||||
|
|
||||||
|
#region Overrides of MSGComponentBase
|
||||||
|
|
||||||
|
protected override async Task OnInitializedAsync()
|
||||||
|
{
|
||||||
|
await base.OnInitializedAsync();
|
||||||
|
|
||||||
|
// Initialize sound effects. This "warms up" the AudioContext and preloads all sounds for reliable playback:
|
||||||
|
await this.JsRuntime.InvokeVoidAsync("initSoundEffects");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
private uint numReceivedChunks;
|
private uint numReceivedChunks;
|
||||||
private bool isRecording;
|
private bool isRecording;
|
||||||
private bool isTranscribing;
|
private bool isTranscribing;
|
||||||
@ -39,6 +51,9 @@ public partial class VoiceRecorder : MSGComponentBase
|
|||||||
{
|
{
|
||||||
if (toggled)
|
if (toggled)
|
||||||
{
|
{
|
||||||
|
// Warm up sound effects:
|
||||||
|
await this.JsRuntime.InvokeVoidAsync("initSoundEffects");
|
||||||
|
|
||||||
var mimeTypes = GetPreferredMimeTypes(
|
var mimeTypes = GetPreferredMimeTypes(
|
||||||
Builder.Create().UseAudio().UseSubtype(AudioSubtype.OGG).Build(),
|
Builder.Create().UseAudio().UseSubtype(AudioSubtype.OGG).Build(),
|
||||||
Builder.Create().UseAudio().UseSubtype(AudioSubtype.AAC).Build(),
|
Builder.Create().UseAudio().UseSubtype(AudioSubtype.AAC).Build(),
|
||||||
@ -189,7 +204,11 @@ public partial class VoiceRecorder : MSGComponentBase
|
|||||||
private async Task TranscribeRecordingAsync()
|
private async Task TranscribeRecordingAsync()
|
||||||
{
|
{
|
||||||
if (this.finalRecordingPath is null)
|
if (this.finalRecordingPath is null)
|
||||||
|
{
|
||||||
|
// No recording to transcribe, but still release the microphone:
|
||||||
|
await this.ReleaseMicrophoneAsync();
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
this.isTranscribing = true;
|
this.isTranscribing = true;
|
||||||
this.StateHasChanged();
|
this.StateHasChanged();
|
||||||
@ -288,12 +307,22 @@ public partial class VoiceRecorder : MSGComponentBase
|
|||||||
}
|
}
|
||||||
finally
|
finally
|
||||||
{
|
{
|
||||||
|
await this.ReleaseMicrophoneAsync();
|
||||||
|
|
||||||
this.finalRecordingPath = null;
|
this.finalRecordingPath = null;
|
||||||
this.isTranscribing = false;
|
this.isTranscribing = false;
|
||||||
this.StateHasChanged();
|
this.StateHasChanged();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private async Task ReleaseMicrophoneAsync()
|
||||||
|
{
|
||||||
|
// Wait a moment for any queued sounds to finish playing, then release the microphone.
|
||||||
|
// This allows Bluetooth headsets to switch back to A2DP profile without interrupting audio:
|
||||||
|
await Task.Delay(1_800);
|
||||||
|
await this.JsRuntime.InvokeVoidAsync("audioRecorder.releaseMicrophone");
|
||||||
|
}
|
||||||
|
|
||||||
private sealed class AudioRecordingResult
|
private sealed class AudioRecordingResult
|
||||||
{
|
{
|
||||||
public string MimeType { get; init; } = string.Empty;
|
public string MimeType { get; init; } = string.Empty;
|
||||||
|
|||||||
@ -27,15 +27,120 @@ window.scrollToBottom = function(element) {
|
|||||||
element.scrollIntoView({ behavior: 'smooth', block: 'end', inline: 'nearest' });
|
element.scrollIntoView({ behavior: 'smooth', block: 'end', inline: 'nearest' });
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shared audio context for sound effects (Web Audio API does not register with Media Session):
|
// Shared the audio context for sound effects (Web Audio API does not register with Media Session):
|
||||||
let soundEffectContext = null;
|
let soundEffectContext = null;
|
||||||
|
|
||||||
|
// Cache for decoded sound effect audio buffers:
|
||||||
const soundEffectCache = new Map();
|
const soundEffectCache = new Map();
|
||||||
|
|
||||||
|
// Track the preload state:
|
||||||
|
let soundEffectsPreloaded = false;
|
||||||
|
|
||||||
|
// Queue system: tracks when the next sound can start playing.
|
||||||
|
// This prevents sounds from overlapping and getting "swallowed" by the audio system:
|
||||||
|
let nextAvailablePlayTime = 0;
|
||||||
|
|
||||||
|
// Minimum gap between sounds in seconds (small buffer to ensure clean transitions):
|
||||||
|
const SOUND_GAP_SECONDS = 0.55;
|
||||||
|
|
||||||
|
// List of all sound effects used in the app:
|
||||||
|
const SOUND_EFFECT_PATHS = [
|
||||||
|
'/sounds/start_recording.ogg',
|
||||||
|
'/sounds/stop_recording.ogg',
|
||||||
|
'/sounds/transcription_done.ogg'
|
||||||
|
];
|
||||||
|
|
||||||
|
// Initialize the audio context with low-latency settings.
|
||||||
|
// Should be called from a user interaction (click, keypress)
|
||||||
|
// to satisfy browser autoplay policies:
|
||||||
|
window.initSoundEffects = async function() {
|
||||||
|
|
||||||
|
if (soundEffectContext && soundEffectContext.state !== 'closed') {
|
||||||
|
// Already initialized, just ensure it's running:
|
||||||
|
if (soundEffectContext.state === 'suspended') {
|
||||||
|
await soundEffectContext.resume();
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Create the context with the interactive latency hint for the lowest latency:
|
||||||
|
soundEffectContext = new (window.AudioContext || window.webkitAudioContext)({
|
||||||
|
latencyHint: 'interactive'
|
||||||
|
});
|
||||||
|
|
||||||
|
// Resume immediately (needed for Safari/macOS):
|
||||||
|
if (soundEffectContext.state === 'suspended') {
|
||||||
|
await soundEffectContext.resume();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset the queue timing:
|
||||||
|
nextAvailablePlayTime = 0;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Play a very short silent buffer to "warm up" the audio pipeline.
|
||||||
|
// This helps prevent the first real sound from being cut off:
|
||||||
|
//
|
||||||
|
const silentBuffer = soundEffectContext.createBuffer(1, 1, soundEffectContext.sampleRate);
|
||||||
|
const silentSource = soundEffectContext.createBufferSource();
|
||||||
|
silentSource.buffer = silentBuffer;
|
||||||
|
silentSource.connect(soundEffectContext.destination);
|
||||||
|
silentSource.start(0);
|
||||||
|
|
||||||
|
console.log('Sound effects - AudioContext initialized with latency:', soundEffectContext.baseLatency);
|
||||||
|
|
||||||
|
// Preload all sound effects in parallel:
|
||||||
|
if (!soundEffectsPreloaded) {
|
||||||
|
await window.preloadSoundEffects();
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Failed to initialize sound effects:', error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Preload all sound effect files into the cache:
|
||||||
|
window.preloadSoundEffects = async function() {
|
||||||
|
if (soundEffectsPreloaded) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that the context exists:
|
||||||
|
if (!soundEffectContext || soundEffectContext.state === 'closed') {
|
||||||
|
soundEffectContext = new (window.AudioContext || window.webkitAudioContext)({
|
||||||
|
latencyHint: 'interactive'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('Sound effects - preloading', SOUND_EFFECT_PATHS.length, 'sound files...');
|
||||||
|
|
||||||
|
const preloadPromises = SOUND_EFFECT_PATHS.map(async (soundPath) => {
|
||||||
|
try {
|
||||||
|
const response = await fetch(soundPath);
|
||||||
|
const arrayBuffer = await response.arrayBuffer();
|
||||||
|
const audioBuffer = await soundEffectContext.decodeAudioData(arrayBuffer);
|
||||||
|
soundEffectCache.set(soundPath, audioBuffer);
|
||||||
|
|
||||||
|
console.log('Sound effects - preloaded:', soundPath, 'duration:', audioBuffer.duration.toFixed(2), 's');
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Sound effects - failed to preload:', soundPath, error);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
await Promise.all(preloadPromises);
|
||||||
|
soundEffectsPreloaded = true;
|
||||||
|
console.log('Sound effects - all files preloaded');
|
||||||
|
};
|
||||||
|
|
||||||
window.playSound = async function(soundPath) {
|
window.playSound = async function(soundPath) {
|
||||||
try {
|
try {
|
||||||
// Create or reuse the audio context:
|
// Initialize context if needed (fallback if initSoundEffects wasn't called):
|
||||||
if (!soundEffectContext || soundEffectContext.state === 'closed') {
|
if (!soundEffectContext || soundEffectContext.state === 'closed') {
|
||||||
soundEffectContext = new (window.AudioContext || window.webkitAudioContext)();
|
soundEffectContext = new (window.AudioContext || window.webkitAudioContext)({
|
||||||
|
latencyHint: 'interactive'
|
||||||
|
});
|
||||||
|
|
||||||
|
nextAvailablePlayTime = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Resume if suspended (browser autoplay policy):
|
// Resume if suspended (browser autoplay policy):
|
||||||
@ -47,19 +152,36 @@ window.playSound = async function(soundPath) {
|
|||||||
let audioBuffer = soundEffectCache.get(soundPath);
|
let audioBuffer = soundEffectCache.get(soundPath);
|
||||||
|
|
||||||
if (!audioBuffer) {
|
if (!audioBuffer) {
|
||||||
// Fetch and decode the audio file:
|
// Fetch and decode the audio file (fallback if not preloaded):
|
||||||
|
console.log('Sound effects - loading on demand:', soundPath);
|
||||||
const response = await fetch(soundPath);
|
const response = await fetch(soundPath);
|
||||||
const arrayBuffer = await response.arrayBuffer();
|
const arrayBuffer = await response.arrayBuffer();
|
||||||
audioBuffer = await soundEffectContext.decodeAudioData(arrayBuffer);
|
audioBuffer = await soundEffectContext.decodeAudioData(arrayBuffer);
|
||||||
soundEffectCache.set(soundPath, audioBuffer);
|
soundEffectCache.set(soundPath, audioBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a new source node and play:
|
// Calculate when this sound should start:
|
||||||
|
const currentTime = soundEffectContext.currentTime;
|
||||||
|
let startTime;
|
||||||
|
|
||||||
|
if (currentTime >= nextAvailablePlayTime) {
|
||||||
|
// No sound is playing, or the previous sound has finished; start immediately:
|
||||||
|
startTime = 0; // 0 means "now" in Web Audio API
|
||||||
|
nextAvailablePlayTime = currentTime + audioBuffer.duration + SOUND_GAP_SECONDS;
|
||||||
|
} else {
|
||||||
|
// A sound is still playing; schedule this sound to start after it:
|
||||||
|
startTime = nextAvailablePlayTime;
|
||||||
|
nextAvailablePlayTime = startTime + audioBuffer.duration + SOUND_GAP_SECONDS;
|
||||||
|
console.log('Sound effects - queued:', soundPath, 'will play in', (startTime - currentTime).toFixed(2), 's');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new source node and schedule playback:
|
||||||
const source = soundEffectContext.createBufferSource();
|
const source = soundEffectContext.createBufferSource();
|
||||||
source.buffer = audioBuffer;
|
source.buffer = audioBuffer;
|
||||||
source.connect(soundEffectContext.destination);
|
source.connect(soundEffectContext.destination);
|
||||||
source.start(0);
|
source.start(startTime);
|
||||||
|
console.log('Sound effects - playing:', soundPath);
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.warn('Failed to play sound effect:', error);
|
console.warn('Failed to play sound effect:', error);
|
||||||
}
|
}
|
||||||
@ -70,12 +192,24 @@ let actualRecordingMimeType;
|
|||||||
let changedMimeType = false;
|
let changedMimeType = false;
|
||||||
let pendingChunkUploads = 0;
|
let pendingChunkUploads = 0;
|
||||||
|
|
||||||
|
// Store the media stream so we can close the microphone later:
|
||||||
|
let activeMediaStream = null;
|
||||||
|
|
||||||
|
// Delay in milliseconds to wait after getUserMedia() for Bluetooth profile switch (A2DP → HFP):
|
||||||
|
const BLUETOOTH_PROFILE_SWITCH_DELAY_MS = 1_600;
|
||||||
|
|
||||||
window.audioRecorder = {
|
window.audioRecorder = {
|
||||||
start: async function (dotnetRef, desiredMimeTypes = []) {
|
start: async function (dotnetRef, desiredMimeTypes = []) {
|
||||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||||
|
activeMediaStream = stream;
|
||||||
|
|
||||||
|
// Wait for Bluetooth headsets to complete the profile switch from A2DP to HFP.
|
||||||
|
// This prevents the first sound from being cut off during the switch:
|
||||||
|
console.log('Audio recording - waiting for Bluetooth profile switch...');
|
||||||
|
await new Promise(r => setTimeout(r, BLUETOOTH_PROFILE_SWITCH_DELAY_MS));
|
||||||
|
|
||||||
// Play start recording sound effect:
|
// Play start recording sound effect:
|
||||||
window.playSound('/sounds/start_recording.ogg');
|
await window.playSound('/sounds/start_recording.ogg');
|
||||||
|
|
||||||
// When only one mime type is provided as a string, convert it to an array:
|
// When only one mime type is provided as a string, convert it to an array:
|
||||||
if (typeof desiredMimeTypes === 'string') {
|
if (typeof desiredMimeTypes === 'string') {
|
||||||
@ -165,11 +299,17 @@ window.audioRecorder = {
|
|||||||
console.log('Audio recording - all chunks uploaded, finalizing.');
|
console.log('Audio recording - all chunks uploaded, finalizing.');
|
||||||
|
|
||||||
// Play stop recording sound effect:
|
// Play stop recording sound effect:
|
||||||
window.playSound('/sounds/stop_recording.ogg');
|
await window.playSound('/sounds/stop_recording.ogg');
|
||||||
|
|
||||||
|
//
|
||||||
|
// IMPORTANT: Do NOT release the microphone here!
|
||||||
|
// Bluetooth headsets switch profiles (HFP → A2DP) when the microphone is released,
|
||||||
|
// which causes audio to be interrupted. We keep the microphone open so that the
|
||||||
|
// stop_recording and transcription_done sounds can play without interruption.
|
||||||
|
//
|
||||||
|
// Call window.audioRecorder.releaseMicrophone() after the last sound has played.
|
||||||
|
//
|
||||||
|
|
||||||
// Stop all tracks to release the microphone:
|
|
||||||
mediaRecorder.stream.getTracks().forEach(track => track.stop());
|
|
||||||
|
|
||||||
// No need to process data here anymore, just signal completion:
|
// No need to process data here anymore, just signal completion:
|
||||||
resolve({
|
resolve({
|
||||||
mimeType: actualRecordingMimeType,
|
mimeType: actualRecordingMimeType,
|
||||||
@ -180,5 +320,16 @@ window.audioRecorder = {
|
|||||||
// Finally, stop the recording (which will actually trigger the onstop event):
|
// Finally, stop the recording (which will actually trigger the onstop event):
|
||||||
mediaRecorder.stop();
|
mediaRecorder.stop();
|
||||||
});
|
});
|
||||||
|
},
|
||||||
|
|
||||||
|
// Release the microphone after all sounds have been played.
|
||||||
|
// This should be called after the transcription_done sound to allow
|
||||||
|
// Bluetooth headsets to switch back to A2DP profile without interrupting audio:
|
||||||
|
releaseMicrophone: function () {
|
||||||
|
if (activeMediaStream) {
|
||||||
|
console.log('Audio recording - releasing microphone (Bluetooth will switch back to A2DP)');
|
||||||
|
activeMediaStream.getTracks().forEach(track => track.stop());
|
||||||
|
activeMediaStream = null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -1,4 +1,5 @@
|
|||||||
# v26.1.2, build 232 (2026-01-xx xx:xx UTC)
|
# v26.1.2, build 232 (2026-01-xx xx:xx UTC)
|
||||||
- Added the option to hide specific assistants by configuration plugins. This is useful for enterprise environments in organizations.
|
- Added the option to hide specific assistants by configuration plugins. This is useful for enterprise environments in organizations.
|
||||||
|
- Improved the microphone handling (transcription preview) so that all sound effects and the voice recording are processed without interruption.
|
||||||
- Fixed a logging bug that prevented log events from being recorded in some cases.
|
- Fixed a logging bug that prevented log events from being recorded in some cases.
|
||||||
- Fixed a bug affecting the transcription preview: previously, when you stopped music or other media, recorded or dictated text, and then tried to resume playback, the media wouldn’t resume as expected. This behavior is now fixed.
|
- Fixed a bug affecting the transcription preview: previously, when you stopped music or other media, recorded or dictated text, and then tried to resume playback, the media wouldn’t resume as expected. This behavior is now fixed.
|
||||||
Loading…
Reference in New Issue
Block a user