From 530b5f6cf845ecaf229655035a925e258dcbda48 Mon Sep 17 00:00:00 2001 From: Thorsten Sommer Date: Sat, 24 Jan 2026 21:43:00 +0100 Subject: [PATCH] Added Rust failure detection to the .NET server (#637) --- app/MindWork AI Studio/Program.cs | 3 +- app/MindWork AI Studio/Tools/Event.cs | 3 +- .../Services/EnterpriseEnvironmentService.cs | 3 + .../RustAvailabilityMonitorService.cs | 111 ++++++++++++++++++ .../Tools/Services/RustService.Events.cs | 3 +- .../Tools/Services/RustService.Log.cs | 6 + .../Tools/Services/RustService.cs | 4 +- .../wwwroot/changelog/v26.1.2.md | 1 + 8 files changed, 130 insertions(+), 4 deletions(-) create mode 100644 app/MindWork AI Studio/Tools/Services/RustAvailabilityMonitorService.cs diff --git a/app/MindWork AI Studio/Program.cs b/app/MindWork AI Studio/Program.cs index c577b2f4..9c0e2bcd 100644 --- a/app/MindWork AI Studio/Program.cs +++ b/app/MindWork AI Studio/Program.cs @@ -134,6 +134,7 @@ internal sealed class Program builder.Services.AddHostedService(); builder.Services.AddHostedService(); builder.Services.AddHostedService(); + builder.Services.AddHostedService(); // ReSharper disable AccessToDisposedClosure builder.Services.AddHostedService(_ => rust); @@ -230,4 +231,4 @@ internal sealed class Program PluginFactory.Dispose(); programLogger.LogInformation("The AI Studio server was stopped."); } -} \ No newline at end of file +} diff --git a/app/MindWork AI Studio/Tools/Event.cs b/app/MindWork AI Studio/Tools/Event.cs index 0590c5c7..b3d3628f 100644 --- a/app/MindWork AI Studio/Tools/Event.cs +++ b/app/MindWork AI Studio/Tools/Event.cs @@ -15,6 +15,7 @@ public enum Event SHOW_WARNING, SHOW_SUCCESS, TAURI_EVENT_RECEIVED, + RUST_SERVICE_UNAVAILABLE, // Update events: USER_SEARCH_FOR_UPDATE, @@ -53,4 +54,4 @@ public enum Event SEND_TO_MY_TASKS_ASSISTANT, SEND_TO_JOB_POSTING_ASSISTANT, SEND_TO_DOCUMENT_ANALYSIS_ASSISTANT, -} \ No newline at end of file +} diff --git a/app/MindWork AI Studio/Tools/Services/EnterpriseEnvironmentService.cs b/app/MindWork AI Studio/Tools/Services/EnterpriseEnvironmentService.cs index ff727129..44645dc7 100644 --- a/app/MindWork AI Studio/Tools/Services/EnterpriseEnvironmentService.cs +++ b/app/MindWork AI Studio/Tools/Services/EnterpriseEnvironmentService.cs @@ -42,6 +42,7 @@ public sealed class EnterpriseEnvironmentService(ILogger logger; + private readonly MessageBus messageBus; + private readonly RustService rustService; + private readonly IHostApplicationLifetime appLifetime; + + private int rustUnavailableCount; + private int availabilityCheckTriggered; + + // To prevent multiple shutdown triggers. We use int instead of bool for Interlocked operations. + private int shutdownTriggered; + + public RustAvailabilityMonitorService( + ILogger logger, + MessageBus messageBus, + RustService rustService, + IHostApplicationLifetime appLifetime) + { + this.logger = logger; + this.messageBus = messageBus; + this.rustService = rustService; + this.appLifetime = appLifetime; + + this.messageBus.RegisterComponent(this); + this.ApplyFilters([], [Event.RUST_SERVICE_UNAVAILABLE]); + } + + protected override async Task ExecuteAsync(CancellationToken stoppingToken) + { + this.logger.LogInformation("The Rust availability monitor service was initialized."); + await Task.Delay(Timeout.InfiniteTimeSpan, stoppingToken); + } + + public override async Task StopAsync(CancellationToken cancellationToken) + { + this.messageBus.Unregister(this); + await base.StopAsync(cancellationToken); + } + + public Task ProcessMessage(ComponentBase? sendingComponent, Event triggeredEvent, T? data) + { + if (triggeredEvent is not Event.RUST_SERVICE_UNAVAILABLE) + return Task.CompletedTask; + + var reason = data switch + { + string s when !string.IsNullOrWhiteSpace(s) => s, + _ => "unknown reason", + }; + + // Thread-safe incrementation of the unavailable count and check against the threshold: + var numEvents = Interlocked.Increment(ref this.rustUnavailableCount); + + // On the first event, trigger some Rust availability checks to confirm. + // Just fire and forget - we don't need to await this here. + if (numEvents == 1 && Interlocked.Exchange(ref this.availabilityCheckTriggered, 1) == 0) + { + // + // This is also useful to speed up the detection of Rust availability issues, + // as it triggers two immediate checks instead of waiting for the next scheduled check. + // Scheduled checks are typically every few minutes, which might be too long to wait + // in case of critical Rust service failures. + // + // On the other hand, we cannot kill the .NET server on the first failure, as it might + // be a transient issue. + // + + _ = this.VerifyRustAvailability(); + _ = this.VerifyRustAvailability(); + } + + if (numEvents <= UNAVAILABLE_EVENT_THRESHOLD) + { + this.logger.LogWarning("Rust service unavailable (num repeats={NumRepeats}, threshold={Threshold}). Reason = '{Reason}'. Waiting for more occurrences before shutting down the server.", numEvents, UNAVAILABLE_EVENT_THRESHOLD, reason); + return Task.CompletedTask; + } + + // Ensure shutdown is only triggered once: + if (Interlocked.Exchange(ref this.shutdownTriggered, 1) != 0) + return Task.CompletedTask; + + this.logger.LogError("Rust service unavailable (num repeats={NumRepeats}, threshold={Threshold}). Reason = '{Reason}'. Shutting down the server.", numEvents, UNAVAILABLE_EVENT_THRESHOLD, reason); + this.appLifetime.StopApplication(); + return Task.CompletedTask; + } + + public Task ProcessMessageWithResult(ComponentBase? sendingComponent, Event triggeredEvent, TPayload? data) + { + return Task.FromResult(default); + } + + private async Task VerifyRustAvailability() + { + try + { + await this.rustService.ReadUserLanguage(); + } + catch (Exception e) + { + this.logger.LogWarning(e, "Rust availability check failed."); + await this.messageBus.SendMessage(null, Event.RUST_SERVICE_UNAVAILABLE, "Rust availability check failed"); + } + } +} diff --git a/app/MindWork AI Studio/Tools/Services/RustService.Events.cs b/app/MindWork AI Studio/Tools/Services/RustService.Events.cs index e4d72a0f..62538938 100644 --- a/app/MindWork AI Studio/Tools/Services/RustService.Events.cs +++ b/app/MindWork AI Studio/Tools/Services/RustService.Events.cs @@ -62,6 +62,7 @@ public partial class RustService catch (Exception e) { this.logger!.LogError("Error while streaming Tauri events: {Message}", e.Message); + await this.ReportRustServiceUnavailable("Tauri event stream error"); await Task.Delay(TimeSpan.FromSeconds(3), stopToken); } } @@ -74,4 +75,4 @@ public partial class RustService this.logger!.LogWarning("Stopped streaming Tauri events."); } -} \ No newline at end of file +} diff --git a/app/MindWork AI Studio/Tools/Services/RustService.Log.cs b/app/MindWork AI Studio/Tools/Services/RustService.Log.cs index b8542ddd..c43f0ff9 100644 --- a/app/MindWork AI Studio/Tools/Services/RustService.Log.cs +++ b/app/MindWork AI Studio/Tools/Services/RustService.Log.cs @@ -32,6 +32,12 @@ public sealed partial class RustService } catch { + // + // We don't expect this to ever happen because the HTTP client cannot raise exceptions in fire-and-forget mode. + // This is because we don't await the task, so any exceptions thrown during the HTTP request are not propagated + // back to the caller. + // + Console.WriteLine("Failed to send log event to Rust service."); // Ignore errors to avoid log loops } diff --git a/app/MindWork AI Studio/Tools/Services/RustService.cs b/app/MindWork AI Studio/Tools/Services/RustService.cs index 6272378c..5d4e2b08 100644 --- a/app/MindWork AI Studio/Tools/Services/RustService.cs +++ b/app/MindWork AI Studio/Tools/Services/RustService.cs @@ -69,6 +69,8 @@ public sealed partial class RustService : BackgroundService this.encryptor = encryptionService; } + private Task ReportRustServiceUnavailable(string reason) => MessageBus.INSTANCE.SendMessage(null, Event.RUST_SERVICE_UNAVAILABLE, reason); + #region Overrides of BackgroundService /// @@ -90,4 +92,4 @@ public sealed partial class RustService : BackgroundService } #endregion -} \ No newline at end of file +} diff --git a/app/MindWork AI Studio/wwwroot/changelog/v26.1.2.md b/app/MindWork AI Studio/wwwroot/changelog/v26.1.2.md index 2199c74e..346fb2be 100644 --- a/app/MindWork AI Studio/wwwroot/changelog/v26.1.2.md +++ b/app/MindWork AI Studio/wwwroot/changelog/v26.1.2.md @@ -3,6 +3,7 @@ - Added the current date and time to the system prompt for better context in conversations. Thanks Peer `peerschuett` for the contribution. - Added the ability to control the voice recording with transcription (in preview) by using a system-wide shortcut. The shortcut can be configured in the application settings or by using a configuration plugin. Thus, a uniform shortcut can be defined for an entire organization. - Added error handling for the context window overflow, which can occur with huge file attachments in chats or the document analysis assistant. +- Added Rust failure detection to the .NET server. This is helpful in order to handle critical failures and shut down the application gracefully. - Improved the error handling for model loading in provider dialogs (LLMs, embeddings, transcriptions). - Improved the microphone handling (transcription preview) so that all sound effects and the voice recording are processed without interruption. - Improved the handling of self-hosted providers in the configuration dialogs (LLMs, embeddings, and transcriptions) when the host cannot provide a list of models.