Fix cold-start enterprise cleanup to preserve configs when servers are unreachable

This commit is contained in:
Thorsten Sommer 2026-02-16 19:14:14 +01:00
parent 3f0b2edb80
commit dbc2181ee6
Signed by untrusted user who does not match committer: tsommer
GPG Key ID: 371BBA77A02C0108
2 changed files with 72 additions and 30 deletions

View File

@ -215,29 +215,27 @@ public partial class MainLayout : LayoutComponentBase, IMessageBusReceiver, ILan
.CheckDeferredMessages<EnterpriseEnvironment>(Event.STARTUP_ENTERPRISE_ENVIRONMENT) .CheckDeferredMessages<EnterpriseEnvironment>(Event.STARTUP_ENTERPRISE_ENVIRONMENT)
.Where(env => env != default) .Where(env => env != default)
.ToList(); .ToList();
var wereDeferredDownloadsSuccessful = true;
var failedDeferredConfigIds = new HashSet<Guid>();
foreach (var env in enterpriseEnvironments) foreach (var env in enterpriseEnvironments)
{ {
var wasDownloadSuccessful = await PluginFactory.TryDownloadingConfigPluginAsync(env.ConfigurationId, env.ConfigurationServerUrl); var wasDownloadSuccessful = await PluginFactory.TryDownloadingConfigPluginAsync(env.ConfigurationId, env.ConfigurationServerUrl);
if (!wasDownloadSuccessful) if (!wasDownloadSuccessful)
{ {
wereDeferredDownloadsSuccessful = false; failedDeferredConfigIds.Add(env.ConfigurationId);
this.Logger.LogWarning("Failed to download deferred enterprise configuration '{ConfigId}' during startup. Keeping managed plugins unchanged.", env.ConfigurationId); this.Logger.LogWarning("Failed to download deferred enterprise configuration '{ConfigId}' during startup. Keeping managed plugins unchanged.", env.ConfigurationId);
break;
} }
} }
if (EnterpriseEnvironmentService.HasValidEnterpriseSnapshot && wereDeferredDownloadsSuccessful) if (EnterpriseEnvironmentService.HasValidEnterpriseSnapshot)
{ {
var activeConfigIds = EnterpriseEnvironmentService.CURRENT_ENVIRONMENTS var activeConfigIds = EnterpriseEnvironmentService.CURRENT_ENVIRONMENTS
.Select(env => env.ConfigurationId) .Select(env => env.ConfigurationId)
.ToHashSet(); .ToHashSet();
PluginFactory.RemoveUnreferencedManagedConfigurationPlugins(activeConfigIds); PluginFactory.RemoveUnreferencedManagedConfigurationPlugins(activeConfigIds);
} if (failedDeferredConfigIds.Count > 0)
else if (!wereDeferredDownloadsSuccessful) this.Logger.LogWarning("Deferred startup updates failed for {FailedCount} enterprise configuration(s). Those configurations were kept unchanged.", failedDeferredConfigIds.Count);
{
// Force a retry on the next enterprise sync cycle.
EnterpriseEnvironmentService.CURRENT_ENVIRONMENTS = [];
} }
// Initialize the enterprise encryption service for decrypting API keys: // Initialize the enterprise encryption service for decrypting API keys:

View File

@ -53,11 +53,20 @@ public sealed class EnterpriseEnvironmentService(ILogger<EnterpriseEnvironmentSe
} }
// //
// Step 2: Determine ETags and build the next environment list. // Step 2: Determine ETags and build the list of reachable configurations.
// IMPORTANT: if we cannot read the ETag for any active configuration, // IMPORTANT: when one config server fails, we continue with the others.
// do not mutate the plugin state and keep everything as-is.
// //
var nextEnvironments = new List<EnterpriseEnvironment>(); var reachableEnvironments = new List<EnterpriseEnvironment>();
var failedConfigIds = new HashSet<Guid>();
var currentEnvironmentsById = CURRENT_ENVIRONMENTS
.GroupBy(env => env.ConfigurationId)
.ToDictionary(group => group.Key, group => group.Last());
var activeFetchedEnvironmentsById = fetchedConfigs
.Where(config => config.IsActive)
.GroupBy(config => config.ConfigurationId)
.ToDictionary(group => group.Key, group => group.Last());
foreach (var config in fetchedConfigs) foreach (var config in fetchedConfigs)
{ {
if (!config.IsActive) if (!config.IsActive)
@ -69,59 +78,94 @@ public sealed class EnterpriseEnvironmentService(ILogger<EnterpriseEnvironmentSe
var etagResponse = await PluginFactory.DetermineConfigPluginETagAsync(config.ConfigurationId, config.ConfigurationServerUrl); var etagResponse = await PluginFactory.DetermineConfigPluginETagAsync(config.ConfigurationId, config.ConfigurationServerUrl);
if (!etagResponse.Success) if (!etagResponse.Success)
{ {
logger.LogWarning("Failed to read enterprise config metadata for '{ConfigId}'. Keeping current plugins unchanged.", config.ConfigurationId); failedConfigIds.Add(config.ConfigurationId);
return; logger.LogWarning("Failed to read enterprise config metadata for '{ConfigId}'. Keeping the current plugin state for this configuration.", config.ConfigurationId);
continue;
} }
nextEnvironments.Add(config with { ETag = etagResponse.ETag }); reachableEnvironments.Add(config with { ETag = etagResponse.ETag });
} }
// //
// Step 3: Compare with current environments and process changes. // Step 3: Compare with current environments and process changes.
// Download first. We only clean up obsolete plugins after all required // Download per configuration. A single failure must not block others.
// downloads have been completed successfully.
// //
var currentIds = CURRENT_ENVIRONMENTS.Select(e => e.ConfigurationId).ToHashSet();
var nextIds = nextEnvironments.Select(e => e.ConfigurationId).ToHashSet();
var shouldDeferStartupDownloads = isFirstRun && !PluginFactory.IsInitialized; var shouldDeferStartupDownloads = isFirstRun && !PluginFactory.IsInitialized;
var effectiveEnvironmentsById = new Dictionary<Guid, EnterpriseEnvironment>();
// Process new or changed configs: // Process new or changed configs:
foreach (var nextEnv in nextEnvironments) foreach (var nextEnv in reachableEnvironments)
{ {
var currentEnv = CURRENT_ENVIRONMENTS.FirstOrDefault(e => e.ConfigurationId == nextEnv.ConfigurationId); var hasCurrentEnvironment = currentEnvironmentsById.TryGetValue(nextEnv.ConfigurationId, out var currentEnv);
if (currentEnv == nextEnv) // Hint: This relies on the record equality to check if anything relevant has changed (e.g. server URL or ETag). if (hasCurrentEnvironment && currentEnv == nextEnv) // Hint: This relies on the record equality to check if anything relevant has changed (e.g. server URL or ETag).
{ {
logger.LogInformation("Enterprise configuration '{ConfigId}' has not changed. No update required.", nextEnv.ConfigurationId); logger.LogInformation("Enterprise configuration '{ConfigId}' has not changed. No update required.", nextEnv.ConfigurationId);
effectiveEnvironmentsById[nextEnv.ConfigurationId] = nextEnv;
continue; continue;
} }
var isNew = !currentIds.Contains(nextEnv.ConfigurationId); if(!hasCurrentEnvironment)
if(isNew)
logger.LogInformation("Detected new enterprise configuration with ID '{ConfigId}' and server URL '{ServerUrl}'.", nextEnv.ConfigurationId, nextEnv.ConfigurationServerUrl); logger.LogInformation("Detected new enterprise configuration with ID '{ConfigId}' and server URL '{ServerUrl}'.", nextEnv.ConfigurationId, nextEnv.ConfigurationServerUrl);
else else
logger.LogInformation("Detected change in enterprise configuration with ID '{ConfigId}'. Server URL or ETag has changed.", nextEnv.ConfigurationId); logger.LogInformation("Detected change in enterprise configuration with ID '{ConfigId}'. Server URL or ETag has changed.", nextEnv.ConfigurationId);
if (shouldDeferStartupDownloads) if (shouldDeferStartupDownloads)
{
MessageBus.INSTANCE.DeferMessage(null, Event.STARTUP_ENTERPRISE_ENVIRONMENT, nextEnv); MessageBus.INSTANCE.DeferMessage(null, Event.STARTUP_ENTERPRISE_ENVIRONMENT, nextEnv);
effectiveEnvironmentsById[nextEnv.ConfigurationId] = nextEnv;
}
else else
{ {
var wasDownloadSuccessful = await PluginFactory.TryDownloadingConfigPluginAsync(nextEnv.ConfigurationId, nextEnv.ConfigurationServerUrl); var wasDownloadSuccessful = await PluginFactory.TryDownloadingConfigPluginAsync(nextEnv.ConfigurationId, nextEnv.ConfigurationServerUrl);
if (!wasDownloadSuccessful) if (!wasDownloadSuccessful)
{ {
logger.LogWarning("Failed to update enterprise configuration '{ConfigId}'. Keeping current plugins unchanged.", nextEnv.ConfigurationId); failedConfigIds.Add(nextEnv.ConfigurationId);
return; if (hasCurrentEnvironment)
{
logger.LogWarning("Failed to update enterprise configuration '{ConfigId}'. Keeping the previously active version.", nextEnv.ConfigurationId);
effectiveEnvironmentsById[nextEnv.ConfigurationId] = currentEnv;
}
else
logger.LogWarning("Failed to download the new enterprise configuration '{ConfigId}'. Skipping activation for now.", nextEnv.ConfigurationId);
continue;
} }
effectiveEnvironmentsById[nextEnv.ConfigurationId] = nextEnv;
} }
} }
// Retain configurations for all failed IDs. On cold start there might be no
// previous in-memory snapshot yet, so we also keep the current fetched entry
// to protect it from cleanup while the server is unreachable.
foreach (var failedConfigId in failedConfigIds)
{
if (effectiveEnvironmentsById.ContainsKey(failedConfigId))
continue;
if (!currentEnvironmentsById.TryGetValue(failedConfigId, out var retainedEnvironment))
{
if (!activeFetchedEnvironmentsById.TryGetValue(failedConfigId, out retainedEnvironment))
continue;
logger.LogWarning("Could not refresh enterprise configuration '{ConfigId}'. Protecting it from cleanup until connectivity is restored.", failedConfigId);
}
else
logger.LogWarning("Could not refresh enterprise configuration '{ConfigId}'. Keeping the previously active version.", failedConfigId);
effectiveEnvironmentsById[failedConfigId] = retainedEnvironment;
}
var effectiveEnvironments = effectiveEnvironmentsById.Values.ToList();
// Cleanup is only allowed after a successful sync cycle: // Cleanup is only allowed after a successful sync cycle:
if (PluginFactory.IsInitialized && !shouldDeferStartupDownloads) if (PluginFactory.IsInitialized && !shouldDeferStartupDownloads)
PluginFactory.RemoveUnreferencedManagedConfigurationPlugins(nextIds); PluginFactory.RemoveUnreferencedManagedConfigurationPlugins(effectiveEnvironmentsById.Keys.ToHashSet());
if (nextEnvironments.Count == 0) if (effectiveEnvironments.Count == 0)
logger.LogInformation("AI Studio runs without any enterprise configurations."); logger.LogInformation("AI Studio runs without any enterprise configurations.");
CURRENT_ENVIRONMENTS = nextEnvironments; CURRENT_ENVIRONMENTS = effectiveEnvironments;
HasValidEnterpriseSnapshot = true; HasValidEnterpriseSnapshot = true;
} }
catch (Exception e) catch (Exception e)