Improved transcription preview (#632)
Some checks are pending
Build and Release / Read metadata (push) Waiting to run
Build and Release / Build app (${{ matrix.dotnet_runtime }}) (-aarch64-apple-darwin, osx-arm64, macos-latest, aarch64-apple-darwin, dmg updater) (push) Blocked by required conditions
Build and Release / Build app (${{ matrix.dotnet_runtime }}) (-aarch64-pc-windows-msvc.exe, win-arm64, windows-latest, aarch64-pc-windows-msvc, nsis updater) (push) Blocked by required conditions
Build and Release / Build app (${{ matrix.dotnet_runtime }}) (-aarch64-unknown-linux-gnu, linux-arm64, ubuntu-22.04-arm, aarch64-unknown-linux-gnu, appimage deb updater) (push) Blocked by required conditions
Build and Release / Build app (${{ matrix.dotnet_runtime }}) (-x86_64-apple-darwin, osx-x64, macos-latest, x86_64-apple-darwin, dmg updater) (push) Blocked by required conditions
Build and Release / Build app (${{ matrix.dotnet_runtime }}) (-x86_64-pc-windows-msvc.exe, win-x64, windows-latest, x86_64-pc-windows-msvc, nsis updater) (push) Blocked by required conditions
Build and Release / Build app (${{ matrix.dotnet_runtime }}) (-x86_64-unknown-linux-gnu, linux-x64, ubuntu-22.04, x86_64-unknown-linux-gnu, appimage deb updater) (push) Blocked by required conditions
Build and Release / Prepare & create release (push) Blocked by required conditions
Build and Release / Publish release (push) Blocked by required conditions

This commit is contained in:
Thorsten Sommer 2026-01-18 20:50:55 +01:00 committed by GitHub
parent cc3560fdd2
commit a1f69c8dad
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 89 additions and 18 deletions

View File

@ -2404,6 +2404,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VISION::T586430036"] = "Useful assistants
-- Failed to create the transcription provider.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T1689988905"] = "Failed to create the transcription provider."
-- Failed to start audio recording.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T2144994226"] = "Failed to start audio recording."
-- Stop recording and start transcription
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T224155287"] = "Stop recording and start transcription"
@ -2416,6 +2419,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T2851219233"] = "Transcrip
-- The configured transcription provider was not found.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T331613105"] = "The configured transcription provider was not found."
-- Failed to stop audio recording.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T3462568264"] = "Failed to stop audio recording."
-- The configured transcription provider does not meet the minimum confidence level.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T3834149033"] = "The configured transcription provider does not meet the minimum confidence level."

View File

@ -6,7 +6,7 @@
@if (PreviewFeatures.PRE_SPEECH_TO_TEXT_2026.IsEnabled(this.SettingsManager) && !string.IsNullOrWhiteSpace(this.SettingsManager.ConfigurationData.App.UseTranscriptionProvider))
{
<MudTooltip Text="@this.Tooltip">
@if (this.isTranscribing)
@if (this.isTranscribing || this.isPreparing)
{
<MudProgressCircular Size="Size.Small" Indeterminate="true" Color="Color.Primary"/>
}

View File

@ -25,15 +25,23 @@ public partial class VoiceRecorder : MSGComponentBase
protected override async Task OnInitializedAsync()
{
await base.OnInitializedAsync();
// Initialize sound effects. This "warms up" the AudioContext and preloads all sounds for reliable playback:
await this.JsRuntime.InvokeVoidAsync("initSoundEffects");
try
{
// Initialize sound effects. This "warms up" the AudioContext and preloads all sounds for reliable playback:
await this.JsRuntime.InvokeVoidAsync("initSoundEffects");
}
catch (Exception ex)
{
this.Logger.LogError(ex, "Failed to initialize sound effects.");
}
}
#endregion
private uint numReceivedChunks;
private bool isRecording;
private bool isPreparing;
private bool isTranscribing;
private FileStream? currentRecordingStream;
private string? currentRecordingPath;
@ -51,9 +59,19 @@ public partial class VoiceRecorder : MSGComponentBase
{
if (toggled)
{
// Warm up sound effects:
await this.JsRuntime.InvokeVoidAsync("initSoundEffects");
this.isPreparing = true;
this.StateHasChanged();
try
{
// Warm up sound effects:
await this.JsRuntime.InvokeVoidAsync("initSoundEffects");
}
catch (Exception ex)
{
this.Logger.LogError(ex, "Failed to initialize sound effects.");
}
var mimeTypes = GetPreferredMimeTypes(
Builder.Create().UseAudio().UseSubtype(AudioSubtype.OGG).Build(),
Builder.Create().UseAudio().UseSubtype(AudioSubtype.AAC).Build(),
@ -71,20 +89,40 @@ public partial class VoiceRecorder : MSGComponentBase
// Initialize the file stream for writing chunks:
await this.InitializeRecordingStream();
var mimeTypeStrings = mimeTypes.ToStringArray();
var actualMimeType = await this.JsRuntime.InvokeAsync<string>("audioRecorder.start", this.dotNetReference, mimeTypeStrings);
try
{
var mimeTypeStrings = mimeTypes.ToStringArray();
var actualMimeType = await this.JsRuntime.InvokeAsync<string>("audioRecorder.start", this.dotNetReference, mimeTypeStrings);
// Store the MIME type for later use:
this.currentRecordingMimeType = actualMimeType;
// Store the MIME type for later use:
this.currentRecordingMimeType = actualMimeType;
this.Logger.LogInformation("Audio recording started with MIME type: '{ActualMimeType}'.", actualMimeType);
this.isRecording = true;
this.Logger.LogInformation("Audio recording started with MIME type: '{ActualMimeType}'.", actualMimeType);
this.isPreparing = false;
this.isRecording = true;
}
catch (Exception e)
{
this.Logger.LogError(e, "Failed to start audio recording.");
await this.MessageBus.SendError(new(Icons.Material.Filled.MicOff, this.T("Failed to start audio recording.")));
// Clean up the recording stream if starting failed:
await this.FinalizeRecordingStream();
}
}
else
{
var result = await this.JsRuntime.InvokeAsync<AudioRecordingResult>("audioRecorder.stop");
if (result.ChangedMimeType)
this.Logger.LogWarning("The recorded audio MIME type was changed to '{ResultMimeType}'.", result.MimeType);
try
{
var result = await this.JsRuntime.InvokeAsync<AudioRecordingResult>("audioRecorder.stop");
if (result.ChangedMimeType)
this.Logger.LogWarning("The recorded audio MIME type was changed to '{ResultMimeType}'.", result.MimeType);
}
catch (Exception e)
{
this.Logger.LogError(e, "Failed to stop audio recording.");
await this.MessageBus.SendError(new(Icons.Material.Filled.MicOff, this.T("Failed to stop audio recording.")));
}
// Close and finalize the recording stream:
await this.FinalizeRecordingStream();
@ -280,8 +318,15 @@ public partial class VoiceRecorder : MSGComponentBase
this.Logger.LogInformation("Transcription completed successfully. Result length: {Length} characters.", transcribedText.Length);
// Play the transcription done sound effect:
await this.JsRuntime.InvokeVoidAsync("playSound", "/sounds/transcription_done.ogg");
try
{
// Play the transcription done sound effect:
await this.JsRuntime.InvokeVoidAsync("playSound", "/sounds/transcription_done.ogg");
}
catch (Exception ex)
{
this.Logger.LogError(ex, "Failed to play transcription done sound effect.");
}
// Copy the transcribed text to the clipboard:
await this.RustService.CopyText2Clipboard(this.Snackbar, transcribedText);
@ -320,7 +365,15 @@ public partial class VoiceRecorder : MSGComponentBase
// Wait a moment for any queued sounds to finish playing, then release the microphone.
// This allows Bluetooth headsets to switch back to A2DP profile without interrupting audio:
await Task.Delay(1_800);
await this.JsRuntime.InvokeVoidAsync("audioRecorder.releaseMicrophone");
try
{
await this.JsRuntime.InvokeVoidAsync("audioRecorder.releaseMicrophone");
}
catch (Exception e)
{
this.Logger.LogError(e, "Failed to release the microphone.");
}
}
private sealed class AudioRecordingResult

View File

@ -2406,6 +2406,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VISION::T586430036"] = "Nützliche Assist
-- Failed to create the transcription provider.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T1689988905"] = "Der Anbieter für die Transkription konnte nicht erstellt werden."
-- Failed to start audio recording.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T2144994226"] = "Audioaufnahme konnte nicht gestartet werden."
-- Stop recording and start transcription
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T224155287"] = "Aufnahme beenden und Transkription starten"
@ -2418,6 +2421,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T2851219233"] = "Transkrip
-- The configured transcription provider was not found.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T331613105"] = "Der konfigurierte Anbieter für die Transkription wurde nicht gefunden."
-- Failed to stop audio recording.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T3462568264"] = "Audioaufnahme konnte nicht beendet werden."
-- The configured transcription provider does not meet the minimum confidence level.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T3834149033"] = "Der konfigurierte Anbieter für die Transkription erfüllt nicht das erforderliche Mindestmaß an Vertrauenswürdigkeit."

View File

@ -2406,6 +2406,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VISION::T586430036"] = "Useful assistants
-- Failed to create the transcription provider.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T1689988905"] = "Failed to create the transcription provider."
-- Failed to start audio recording.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T2144994226"] = "Failed to start audio recording."
-- Stop recording and start transcription
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T224155287"] = "Stop recording and start transcription"
@ -2418,6 +2421,9 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T2851219233"] = "Transcrip
-- The configured transcription provider was not found.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T331613105"] = "The configured transcription provider was not found."
-- Failed to stop audio recording.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T3462568264"] = "Failed to stop audio recording."
-- The configured transcription provider does not meet the minimum confidence level.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::VOICERECORDER::T3834149033"] = "The configured transcription provider does not meet the minimum confidence level."