added manual refresh and cleaned up code

This commit is contained in:
Paul Koudelka 2026-05-13 18:13:34 +02:00
parent ac677c5ac7
commit 637bf9701b
19 changed files with 920 additions and 642 deletions

View File

@ -4837,15 +4837,24 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T108494
-- Are you sure you want to delete the data source '{0}' of type {1}?
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T1096979935"] = "Are you sure you want to delete the data source '{0}' of type {1}?"
-- Automatic local data source refresh
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T1208397349"] = "Automatic local data source refresh"
-- Edit Local Directory Data Source
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T1215599168"] = "Edit Local Directory Data Source"
-- Refresh
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T135637716"] = "Refresh"
-- Add Local Directory as Data Source
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T1454193397"] = "Add Local Directory as Data Source"
-- Delete
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T1469573738"] = "Delete"
-- Refresh all
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T1503082343"] = "Refresh all"
-- External (ERI)
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T1652430727"] = "External (ERI)"
@ -4900,6 +4909,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T352566
-- No data sources configured yet.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T3549650120"] = "No data sources configured yet."
-- Local data sources refresh when files change.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T3687976654"] = "Local data sources refresh when files change."
-- Actions
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T3865031940"] = "Actions"
@ -4912,6 +4924,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T590005
-- External Data (ERI-Server v1)
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T774473996"] = "External Data (ERI-Server v1)"
-- Local data sources refresh only when triggered manually.
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T854231603"] = "Local data sources refresh only when triggered manually."
-- Local Directory
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T926703547"] = "Local Directory"
@ -7571,25 +7586,19 @@ UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T378481461"] = "Source like p
UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T4165204724"] = "Document"
-- Running
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGSERVICE::T1160324588"] = "Running"
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGMODELS::T1160324588"] = "Running"
-- Idle
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGSERVICE::T1168775091"] = "Idle"
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGMODELS::T1168775091"] = "Idle"
-- Needs attention
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGSERVICE::T1566837660"] = "Needs attention"
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGMODELS::T1566837660"] = "Needs attention"
-- Queued
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGSERVICE::T2655222900"] = "Queued"
-- Embedding
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGSERVICE::T2838542994"] = "Embedding"
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGMODELS::T2655222900"] = "Queued"
-- Completed
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGSERVICE::T3968379570"] = "Completed"
-- Embeddings
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGSERVICE::T951463987"] = "Embeddings"
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGMODELS::T3968379570"] = "Completed"
-- Pandoc Installation
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::PANDOCAVAILABILITYSERVICE::T185447014"] = "Pandoc Installation"

View File

@ -123,7 +123,8 @@
AdornmentColor="Color.Info"
Validation="@this.providerValidation.ValidatingInstanceName"
UserAttributes="@SPELLCHECK_ATTRIBUTES"/>
@if (this.DataModel != default){
@if (this.DataLLMProvider != LLMProviders.NONE)
{
<MudJustifiedText Typo="Typo.body1" Class="mb-3">
@T("For better embeddings and less storage usage, it's recommended to use a custom tokenizer to enable a more accurate token count.")
</MudJustifiedText>
@ -137,8 +138,7 @@
Error="@(!string.IsNullOrWhiteSpace(this.dataCustomTokenizerValidationIssue))"
ErrorText="@(this.dataCustomTokenizerValidationIssue)"
Validation="@this.providerValidation.ValidatingCustomTokenizer"
OnClear = "@this.ClearPathTokenizer"
/>
OnClear="@this.ClearPathTokenizer" />
}
</MudForm>
@if (this.dataStoreWasAttempted)

View File

@ -2,6 +2,7 @@ using AIStudio.Chat;
using AIStudio.Components;
using AIStudio.Provider;
using AIStudio.Settings;
using AIStudio.Tools.Rust;
using AIStudio.Tools.Services;
using AIStudio.Tools.Validation;
@ -115,7 +116,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
GetPreviousInstanceName = () => this.dataEditingPreviousInstanceName,
GetUsedInstanceNames = () => this.UsedInstanceNames,
GetHost = () => this.DataHost,
IsModelProvidedManually = () => this.DataLLMProvider is LLMProviders.SELF_HOSTED && this.DataHost is Host.OLLAMA,
IsModelProvidedManually = () => this.DataLLMProvider.IsEmbeddingModelProvidedManually(this.DataHost),
GetCustomTokenizerValidationIssue = () => this.dataCustomTokenizerValidationIssue,
};
}
@ -126,7 +127,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
Model model = default;
if(this.DataLLMProvider is LLMProviders.SELF_HOSTED)
{
if (this.DataHost is Host.OLLAMA)
if (this.DataLLMProvider.IsEmbeddingModelProvidedManually(this.DataHost))
model = new Model(this.dataManuallyModel, null);
else if (this.DataHost is Host.LM_STUDIO)
model = this.DataModel;
@ -176,7 +177,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
//
// We cannot load the API key for self-hosted providers:
//
if (this.DataLLMProvider is LLMProviders.SELF_HOSTED && this.DataHost is not Host.OLLAMA)
if (this.DataLLMProvider is LLMProviders.SELF_HOSTED && this.DataHost is not Host.OLLAMA && this.DataHost is not Host.VLLM)
{
await this.ReloadModels();
await base.OnInitializedAsync();
@ -241,10 +242,10 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
if (!this.dataIsValid)
return;
var response = await this.RustService.StoreTokenizer(TokenizerModelId.ForEmbeddingProviderId(this.DataId), this.dataFilePath);
var response = await this.StoreOrDeleteTokenizerAsync();
if (!response.Success)
{
this.dataCustomTokenizerValidationIssue = response.Message;
this.dataCustomTokenizerValidationIssue = string.IsNullOrWhiteSpace(response.Message) ? string.Empty : response.Message;
await this.form.Validate();
return;
}
@ -340,6 +341,15 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
}
}
private Task<TokenizerResponse> StoreOrDeleteTokenizerAsync()
{
var tokenizerId = TokenizerModelId.ForEmbeddingProviderId(this.DataId);
if (string.IsNullOrWhiteSpace(this.dataFilePath))
return this.RustService.DeleteTokenizer(tokenizerId);
return this.RustService.StoreTokenizer(tokenizerId, this.dataFilePath);
}
private void OnHostChanged(Host selectedHost)
{
// When the host changes, reset the model selection state:

View File

@ -4,6 +4,7 @@ using System.Text.Json;
using AIStudio.Components;
using AIStudio.Provider;
using AIStudio.Provider.HuggingFace;
using AIStudio.Tools.Rust;
using AIStudio.Tools.Services;
using AIStudio.Tools.Validation;
@ -268,7 +269,7 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
if (!this.dataIsValid)
return;
var tokenizerResponse = await this.RustService.StoreTokenizer(TokenizerModelId.ForProviderId(this.DataId), this.dataFilePath);
var tokenizerResponse = await this.StoreOrDeleteTokenizerAsync();
if (!tokenizerResponse.Success)
{
this.dataCustomTokenizerValidationIssue = tokenizerResponse.Message;
@ -367,6 +368,15 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
}
}
private Task<TokenizerResponse> StoreOrDeleteTokenizerAsync()
{
var tokenizerId = TokenizerModelId.ForProviderId(this.DataId);
if (string.IsNullOrWhiteSpace(this.dataFilePath))
return this.RustService.DeleteTokenizer(tokenizerId);
return this.RustService.StoreTokenizer(tokenizerId, this.dataFilePath);
}
private void OnHostChanged(Host selectedHost)
{
// When the host changes, reset the model selection state:

View File

@ -14,6 +14,13 @@
@T("You might configure different data sources. A data source can include one file, all files in a directory, or data from your company. Later, you can incorporate these data sources as needed when the AI requires this data to complete a certain task.")
</MudJustifiedText>
<MudStack Row="true" AlignItems="AlignItems.Center" Justify="Justify.SpaceBetween" Class="mb-3" Wrap="Wrap.Wrap">
<MudTextSwitch Label="@T("Automatic local data source refresh")" Value="@this.SettingsManager.ConfigurationData.DataSourceIndexing.AutomaticRefresh" LabelOn="@T("Local data sources refresh when files change.")" LabelOff="@T("Local data sources refresh only when triggered manually.")" ValueChanged="@this.AutomaticRefreshChanged"/>
<MudButton Variant="Variant.Filled" Color="Color.Primary" StartIcon="@Icons.Material.Filled.Sync" Disabled="@(!this.HasRefreshableDataSources())" OnClick="@this.RefreshAllDataSources">
@T("Refresh all")
</MudButton>
</MudStack>
<MudTable Items="@this.SettingsManager.ConfigurationData.DataSources" Hover="@true" Class="border-dashed border rounded-lg">
<ColGroup>
<col style="width: 3em;"/>
@ -38,6 +45,9 @@
<MudTd>
<MudStack Row="true" Class="mb-2 mt-2" Wrap="Wrap.Wrap">
<MudIconButton Variant="Variant.Filled" Color="Color.Info" Icon="@Icons.Material.Filled.Info" OnClick="() => this.ShowInformation(context)"/>
<MudButton Variant="Variant.Filled" Color="Color.Primary" StartIcon="@Icons.Material.Filled.Sync" Disabled="@(!this.CanRefreshDataSource(context))" OnClick="() => this.RefreshDataSource(context)">
@T("Refresh")
</MudButton>
<MudButton Variant="Variant.Filled" Color="Color.Info" StartIcon="@Icons.Material.Filled.Edit" OnClick="() => this.EditDataSource(context)">
@T("Edit")
</MudButton>
@ -73,4 +83,4 @@
@T("Close")
</MudButton>
</DialogActions>
</MudDialog>
</MudDialog>

View File

@ -28,6 +28,39 @@ public partial class SettingsDialogDataSources : SettingsDialogBase
return T("Unknown");
}
private bool CanRefreshDataSource(IDataSource dataSource)
{
return dataSource is DataSourceLocalDirectory or DataSourceLocalFile;
}
private bool HasRefreshableDataSources()
{
return this.SettingsManager.ConfigurationData.DataSources.Any(this.CanRefreshDataSource);
}
private async Task AutomaticRefreshChanged(bool enabled)
{
this.SettingsManager.ConfigurationData.DataSourceIndexing.AutomaticRefresh = enabled;
await this.SettingsManager.StoreSettings();
this.DataSourceEmbeddingService.RefreshAutomaticWatchers();
await this.MessageBus.SendMessage<bool>(this, Event.CONFIGURATION_CHANGED);
}
private async Task RefreshAllDataSources()
{
await this.DataSourceEmbeddingService.QueueAllInternalDataSourcesAsync();
await this.MessageBus.SendMessage<bool>(this, Event.CONFIGURATION_CHANGED);
}
private async Task RefreshDataSource(IDataSource dataSource)
{
if (!this.CanRefreshDataSource(dataSource))
return;
await this.DataSourceEmbeddingService.QueueDataSourceAsync(dataSource);
await this.MessageBus.SendMessage<bool>(this, Event.CONFIGURATION_CHANGED);
}
private async Task AddDataSource(DataSourceType type)
{

View File

@ -25,7 +25,7 @@
<MudSpacer/>
@if (this.embeddingOverview.IsVisible)
@if (this.showEmbeddingStatusIcon)
{
<MudNavMenu>
<MudTooltip Text="@this.EmbeddingNavigationTooltip" Placement="Placement.Right">
@ -64,7 +64,7 @@
<MudSpacer/>
@if (this.embeddingOverview.IsVisible)
@if (this.showEmbeddingStatusIcon)
{
<MudNavMenu>
@if (this.SettingsManager.ConfigurationData.App.NavigationBehavior is NavBehavior.NEVER_EXPAND_USE_TOOLTIPS)

View File

@ -60,9 +60,10 @@ public partial class MainLayout : LayoutComponentBase, IMessageBusReceiver, ILan
private bool startupCompleted;
private readonly SemaphoreSlim mandatoryInfoDialogSemaphore = new(1, 1);
private DataSourceEmbeddingOverview embeddingOverview = new(false, true, DataSourceEmbeddingState.COMPLETED, 0, 0, 0, string.Empty);
private DataSourceEmbeddingOverview embeddingOverview = new(false, DataSourceEmbeddingState.COMPLETED, 0, 0, 0);
private IReadOnlyCollection<NavBarItem> navItems = [];
private NavBarItem embeddingItem = new NavBarItem(string.Empty, string.Empty, string.Empty, string.Empty, string.Empty, false);
private NavBarItem embeddingItem = new (string.Empty, string.Empty, string.Empty, string.Empty, string.Empty, false);
private bool showEmbeddingStatusIcon = false;
#region Overrides of ComponentBase
@ -93,7 +94,7 @@ public partial class MainLayout : LayoutComponentBase, IMessageBusReceiver, ILan
// Ensure that all settings are loaded:
await this.SettingsManager.LoadSettings();
await this.DataSourceEmbeddingService.QueueAllInternalDataSourcesAsync();
await this.DataSourceEmbeddingService.QueueAllInternalDataSourcesIfAutomaticRefreshAsync();
// Register this component with the message bus:
this.MessageBus.RegisterComponent(this);
@ -325,6 +326,7 @@ public partial class MainLayout : LayoutComponentBase, IMessageBusReceiver, ILan
private void LoadEmbeddingItem()
{
this.embeddingOverview = this.DataSourceEmbeddingService.GetOverview();
this.showEmbeddingStatusIcon = this.embeddingOverview.IsVisible;
var palette = this.ColorTheme.GetCurrentPalette(this.SettingsManager);
(string icon, string lightcolor, string darkcolor) embeddingIcon = this.embeddingOverview.State switch
{
@ -345,7 +347,7 @@ public partial class MainLayout : LayoutComponentBase, IMessageBusReceiver, ILan
DataSourceEmbeddingState.FAILED => this.embeddingOverview.FailedFiles > 0
? string.Format(T("Some embeddings failed. {0} file(s) need attention."), this.embeddingOverview.FailedFiles)
: T("Some embeddings failed and need attention."),
_ => this.embeddingOverview.NavLabel,
_ => string.Empty
};
private async Task ShowUpdateDialog()
@ -508,4 +510,4 @@ public partial class MainLayout : LayoutComponentBase, IMessageBusReceiver, ILan
}
#endregion
}
}

View File

@ -88,7 +88,7 @@ internal sealed class Program
var embeddingStoreConfig = await rust.GetEmbeddingStoreConfiguration(EmbeddingStoreKind.QDRANT_REMOTE);
EmbeddingStore embeddingStore = EmbeddingStoreFactory.Create(embeddingStoreConfig);
var embeddingStore = EmbeddingStoreFactory.Create(embeddingStoreConfig);
var builder = WebApplication.CreateBuilder();
builder.WebHost.ConfigureKestrel(kestrelServerOptions =>

View File

@ -124,6 +124,8 @@ public sealed class Data
public DataAgentRetrievalContextValidation AgentRetrievalContextValidation { get; init; } = new();
public DataDataSourceIndexing DataSourceIndexing { get; init; } = new();
public DataAssistantPluginAudit AssistantPluginAudit { get; init; } = new(x => x.AssistantPluginAudit);
public DataAgenda Agenda { get; init; } = new();

View File

@ -0,0 +1,9 @@
namespace AIStudio.Settings.DataModel;
public sealed class DataDataSourceIndexing
{
/// <summary>
/// Whether local data source embeddings should refresh automatically when files change.
/// </summary>
public bool AutomaticRefresh { get; set; } = true;
}

View File

@ -35,14 +35,15 @@ public abstract class EmbeddingStore(string name, string path)
{
string[] suffixes = { "B", "KB", "MB", "GB", "TB", "PB" };
int suffixIndex = 0;
double convertedSize = size;
while (size >= 1024 && suffixIndex < suffixes.Length - 1)
while (convertedSize >= 1024 && suffixIndex < suffixes.Length - 1)
{
size /= 1024;
convertedSize /= 1024;
suffixIndex++;
}
return $"{size:0##} {suffixes[suffixIndex]}";
return $"{convertedSize:0.##} {suffixes[suffixIndex]}";
}
public void SetLogger(ILogger<EmbeddingStore> logService)

View File

@ -1,5 +1,6 @@
using Qdrant.Client;
using Qdrant.Client.Grpc;
using Grpc.Core;
using AIStudio.Tools.PluginSystem;
using static Qdrant.Client.Grpc.Conditions;
@ -104,14 +105,32 @@ public class QdrantClientImplementation : EmbeddingStore
return this.GrpcClient.UpsertAsync(collectionName, qdrantPoints, true, null, null, token);
}
public override Task DeleteEmbeddingByFile(string collectionName, string filePath, CancellationToken token)
public override async Task DeleteEmbeddingByFile(string collectionName, string filePath, CancellationToken token)
{
return this.GrpcClient.DeleteAsync(collectionName, MatchKeyword("file_path", filePath), true, null, null, token);
try
{
await this.GrpcClient.DeleteAsync(collectionName, MatchKeyword("file_path", filePath), true, null, null, token);
}
catch (RpcException exception) when (exception.StatusCode is StatusCode.NotFound)
{
return;
}
}
public override Task DeleteEmbeddingStore(string collectionName, CancellationToken token)
public override async Task DeleteEmbeddingStore(string collectionName, CancellationToken token)
{
return this.GrpcClient.DeleteCollectionAsync(collectionName, cancellationToken: token);
var exists = await this.GrpcClient.CollectionExistsAsync(collectionName, token);
if (!exists)
return;
try
{
await this.GrpcClient.DeleteCollectionAsync(collectionName, cancellationToken: token);
}
catch (RpcException exception) when (exception.StatusCode is StatusCode.NotFound)
{
return;
}
}
public override void Dispose() => this.GrpcClient.Dispose();

View File

@ -59,7 +59,7 @@ public static class FileTypes
// Media hierarchy
public static readonly FileTypeFilter IMAGE = FileTypeFilter.Leaf(TB("Image"),
"jpg", "jpeg", "png", "gif", "bmp", "tiff", "svg", "webp", "heic");
"jpg", "jpeg", "png", "gif", "bmp", "tiff", "svg", "webp", "heic", "avif");
public static readonly FileTypeFilter AUDIO = FileTypeFilter.Leaf(TB("Audio"),
"mp3", "wav", "wave", "aac", "flac", "ogg", "m4a", "wma", "alac", "aiff", "m4b");
public static readonly FileTypeFilter VIDEO = FileTypeFilter.Leaf(TB("Video"),

View File

@ -0,0 +1,86 @@
using AIStudio.Settings.DataModel;
using AIStudio.Tools.PluginSystem;
namespace AIStudio.Tools.Services;
public sealed record DataSourceEmbeddingOverview(
bool IsVisible,
DataSourceEmbeddingState State,
int IndexedFiles,
int TotalFiles,
int FailedFiles);
public enum DataSourceEmbeddingState
{
IDLE,
QUEUED,
RUNNING,
COMPLETED,
FAILED,
}
public sealed record DataSourceEmbeddingStatus(
string DataSourceId,
string DataSourceName,
DataSourceType DataSourceType,
DataSourceEmbeddingState State,
int TotalFiles,
int IndexedFiles,
int FailedFiles,
string CurrentFile,
string LastError)
{
private static string TB(string fallbackEN) => I18N.I.T(fallbackEN, typeof(DataSourceEmbeddingService).Namespace, nameof(DataSourceEmbeddingService));
public int ProgressPercent => this.TotalFiles <= 0 ? 0 : Math.Clamp((int)Math.Round(this.IndexedFiles * 100d / this.TotalFiles), 0, 100);
public string StateLabel => this.State switch
{
DataSourceEmbeddingState.QUEUED => TB("Queued"),
DataSourceEmbeddingState.RUNNING => TB("Running"),
DataSourceEmbeddingState.COMPLETED => TB("Completed"),
DataSourceEmbeddingState.FAILED => TB("Needs attention"),
_ => TB("Idle")
};
public int SortOrder => this.State switch
{
DataSourceEmbeddingState.RUNNING => 0,
DataSourceEmbeddingState.QUEUED => 1,
DataSourceEmbeddingState.FAILED => 2,
DataSourceEmbeddingState.COMPLETED => 3,
_ => 4,
};
}
public sealed class FileEnumerationResult
{
public List<FileInfo> Files { get; } = [];
public int FailedFiles { get; set; }
public string LastError { get; set; } = string.Empty;
}
public sealed class PersistedEmbeddingState
{
public Dictionary<string, DataSourceEmbeddingManifest> DataSources { get; init; } = new(StringComparer.OrdinalIgnoreCase);
}
public sealed class DataSourceEmbeddingManifest
{
public string EmbeddingProviderId { get; set; } = string.Empty;
public string EmbeddingSignature { get; set; } = string.Empty;
public int VectorSize { get; set; }
public Dictionary<string, EmbeddedFileRecord> Files { get; init; } = new(StringComparer.OrdinalIgnoreCase);
}
public sealed record EmbeddedFileRecord(
string Fingerprint,
long FileSize,
DateTime LastWriteUtc,
DateTime EmbeddedAtUtc,
int ChunkCount);

View File

@ -0,0 +1,242 @@
using System.Security.Cryptography;
using System.Text;
using AIStudio.Settings;
using AIStudio.Settings.DataModel;
using AIStudio.Tools.PluginSystem;
using AIStudio.Tools.Rust;
namespace AIStudio.Tools.Services;
public sealed partial class DataSourceEmbeddingService
{
private static readonly string[] ADDITIONAL_RAG_FILE_EXTENSIONS = ["csv", "tsv", "ods", "xlsm", "xlsb", "xla", "xlam"];
private static readonly string[] SKIPPED_RAG_FILE_EXTENSIONS = ["lnk"];
private async IAsyncEnumerable<string> StreamEmbeddingChunksAsync(string filePath, [System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken token)
{
if (this.IsImageFilePath(filePath))
{
yield return this.BuildImageIndexText(filePath);
yield break;
}
var currentChunk = new StringBuilder();
await foreach (var segment in this.rustService.StreamArbitraryFileData(filePath, token: token))
{
var normalized = NormalizeChunkSegment(segment);
if (string.IsNullOrWhiteSpace(normalized))
continue;
if (currentChunk.Length > 0 && currentChunk.Length + normalized.Length + Environment.NewLine.Length > MAX_CHUNK_LENGTH)
{
if (currentChunk.Length >= MIN_CHUNK_LENGTH)
{
var chunk = currentChunk.ToString().Trim();
if (!string.IsNullOrWhiteSpace(chunk))
yield return chunk;
var overlap = chunk.Length > CHUNK_OVERLAP_LENGTH
? chunk[^CHUNK_OVERLAP_LENGTH..]
: chunk;
currentChunk.Clear();
currentChunk.Append(overlap);
currentChunk.AppendLine();
}
else
{
currentChunk.AppendLine();
}
}
currentChunk.Append(normalized);
currentChunk.AppendLine();
}
var finalChunk = currentChunk.ToString().Trim();
if (!string.IsNullOrWhiteSpace(finalChunk))
yield return finalChunk;
}
private FileEnumerationResult GetInputFiles(IDataSource dataSource)
{
var result = new FileEnumerationResult();
switch (dataSource)
{
case DataSourceLocalFile localFile when File.Exists(localFile.FilePath):
if (this.IsSupportedRagFilePath(localFile.FilePath))
{
result.Files.Add(new FileInfo(localFile.FilePath));
}
else
{
result.FailedFiles = 1;
result.LastError = $"The selected file '{localFile.FilePath}' is not supported for background embeddings.";
}
return result;
case DataSourceLocalDirectory localDirectory when Directory.Exists(localDirectory.Path):
this.EnumerateAccessibleFiles(localDirectory.Path, result);
return result;
}
switch (dataSource)
{
case DataSourceLocalFile localFile:
result.FailedFiles = 1;
result.LastError = $"The selected file '{localFile.FilePath}' does not exist.";
break;
case DataSourceLocalDirectory localDirectory:
result.FailedFiles = 1;
result.LastError = $"The selected directory '{localDirectory.Path}' does not exist.";
break;
}
return result;
}
private void EnumerateAccessibleFiles(string rootPath, FileEnumerationResult result)
{
var pendingDirectories = new Stack<string>();
pendingDirectories.Push(rootPath);
while (pendingDirectories.Count > 0)
{
var currentPath = pendingDirectories.Pop();
IEnumerable<string> subDirectories;
IEnumerable<string> files;
try
{
subDirectories = Directory.EnumerateDirectories(currentPath);
files = Directory.EnumerateFiles(currentPath);
}
catch (Exception exception)
{
this.logger.LogWarning(exception, "Cannot access directory '{DirectoryPath}' while indexing.", currentPath);
result.FailedFiles++;
result.LastError = $"The directory '{currentPath}' could not be accessed.";
continue;
}
foreach (var filePath in files)
{
FileInfo fileInfo;
try
{
fileInfo = new FileInfo(filePath);
if (!fileInfo.Exists)
continue;
}
catch (Exception exception)
{
this.logger.LogWarning(exception, "Cannot inspect file '{FilePath}' while indexing.", filePath);
result.FailedFiles++;
result.LastError = $"The file '{filePath}' could not be inspected.";
continue;
}
if (!this.IsSupportedRagFilePath(fileInfo.FullName))
continue;
result.Files.Add(fileInfo);
}
foreach (var subDirectory in subDirectories)
pendingDirectories.Push(subDirectory);
}
}
private string TryGetRelativePath(IDataSource dataSource, FileInfo file) => dataSource switch
{
DataSourceLocalDirectory localDirectory => Path.GetRelativePath(localDirectory.Path, file.FullName),
_ => file.Name
};
private static string NormalizeChunkSegment(string input)
{
return input
.Replace("\r\n", "\n", StringComparison.Ordinal)
.Replace('\r', '\n')
.Trim();
}
private bool IsImageFilePath(string filePath)
{
return FileTypes.IsAllowedPath(filePath, FileTypes.IMAGE);
}
private bool IsSupportedRagFilePath(string filePath)
{
var extension = Path.GetExtension(filePath).TrimStart('.');
if (SKIPPED_RAG_FILE_EXTENSIONS.Contains(extension, StringComparer.OrdinalIgnoreCase))
return false;
return FileTypes.IsAllowedPath(filePath, FileTypes.DOCUMENT, FileTypes.IMAGE)
|| ADDITIONAL_RAG_FILE_EXTENSIONS.Contains(extension, StringComparer.OrdinalIgnoreCase);
}
private string BuildImageIndexText(string filePath)
{
var fileName = Path.GetFileName(filePath);
var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(filePath);
var extension = Path.GetExtension(filePath).TrimStart('.');
var normalizedName = fileNameWithoutExtension
.Replace('_', ' ')
.Replace('-', ' ')
.Trim();
return $$"""
Image asset
File name: {{fileName}}
Type: {{extension}}
Search terms: {{normalizedName}}
Path: {{filePath}}
Note: The current RAG embedding pipeline stores image files by metadata only. Visual content is not OCRed or captioned yet.
""";
}
private string BuildEmbeddingSignature(EmbeddingProvider embeddingProvider)
{
return string.Join('|',
embeddingProvider.Id,
embeddingProvider.UsedLLMProvider,
embeddingProvider.Model.Id,
embeddingProvider.Host,
embeddingProvider.Hostname,
embeddingProvider.TokenizerPath);
}
private string BuildFingerprint(FileInfo file)
{
var fingerprintSource = $"{file.FullName}|{file.Length}|{file.LastWriteTimeUtc.Ticks}";
var bytes = SHA256.HashData(Encoding.UTF8.GetBytes(fingerprintSource));
return Convert.ToHexString(bytes);
}
private string GetCollectionName(string dataSourceId)
{
var safeId = dataSourceId
.ToLowerInvariant()
.Replace("-", string.Empty, StringComparison.Ordinal);
return $"rag_{safeId}";
}
private string CreatePointId(string dataSourceId, string fingerprint, int chunkIndex)
{
var source = $"{dataSourceId}:{fingerprint}:{chunkIndex}";
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(source));
var guidBytes = hash[..16].ToArray();
guidBytes[6] = (byte)((guidBytes[6] & 0x0F) | 0x40);
guidBytes[8] = (byte)((guidBytes[8] & 0x3F) | 0x80);
return new Guid(guidBytes).ToString();
}
}

View File

@ -0,0 +1,89 @@
using System.Text.Json;
using AIStudio.Settings;
namespace AIStudio.Tools.Services;
public sealed partial class DataSourceEmbeddingService
{
private const string STATE_FILENAME = "rag-embedding-state.json";
private readonly JsonSerializerOptions jsonOptions = new()
{
WriteIndented = true,
};
private async Task EnsureStateLoadedAsync(CancellationToken token)
{
if (this.stateLoaded)
return;
await this.stateLock.WaitAsync(token);
try
{
if (this.stateLoaded)
return;
var statePath = this.GetStatePath();
if (!string.IsNullOrWhiteSpace(statePath) && File.Exists(statePath))
{
var json = await File.ReadAllTextAsync(statePath, token);
var persistedState = JsonSerializer.Deserialize<PersistedEmbeddingState>(json, this.jsonOptions);
this.manifests = persistedState?.DataSources ?? new Dictionary<string, DataSourceEmbeddingManifest>(StringComparer.OrdinalIgnoreCase);
}
this.stateLoaded = true;
}
finally
{
this.stateLock.Release();
}
}
private async Task<DataSourceEmbeddingManifest> GetManifestAsync(string dataSourceId, CancellationToken token)
{
await this.EnsureStateLoadedAsync(token);
if (this.manifests.TryGetValue(dataSourceId, out var manifest))
return manifest;
manifest = new DataSourceEmbeddingManifest();
this.manifests[dataSourceId] = manifest;
return manifest;
}
private async Task SaveStateAsync(CancellationToken token)
{
var statePath = this.GetStatePath();
if (string.IsNullOrWhiteSpace(statePath))
return;
var directory = Path.GetDirectoryName(statePath);
if (!string.IsNullOrWhiteSpace(directory))
Directory.CreateDirectory(directory);
var persistedState = new PersistedEmbeddingState
{
DataSources = this.manifests
};
var json = JsonSerializer.Serialize(persistedState, this.jsonOptions);
await File.WriteAllTextAsync(statePath, json, token);
}
private async Task ResetPersistedStateAsync(string dataSourceId)
{
await this.EnsureStateLoadedAsync(CancellationToken.None);
this.manifests.Remove(dataSourceId);
await this.DeleteCollectionAsync(this.GetCollectionName(dataSourceId));
await this.SaveStateAsync(CancellationToken.None);
this.logger.LogInformation("Reset persisted embedding state for data source '{DataSourceId}'.", dataSourceId);
}
private string GetStatePath()
{
if (string.IsNullOrWhiteSpace(SettingsManager.ConfigDirectory))
return string.Empty;
return Path.Combine(SettingsManager.ConfigDirectory, STATE_FILENAME);
}
}

View File

@ -0,0 +1,226 @@
using System.Collections.Concurrent;
using AIStudio.Settings;
using AIStudio.Settings.DataModel;
namespace AIStudio.Tools.Services;
public sealed partial class DataSourceEmbeddingService
{
private const int WATCHER_DEBOUNCE_SECONDS = 2;
private readonly ConcurrentDictionary<string, DataSourceWatcherRegistration> watchers = new(StringComparer.OrdinalIgnoreCase);
private readonly Dictionary<string, CancellationTokenSource> watcherDebounceTokens = new(StringComparer.OrdinalIgnoreCase);
private readonly object watcherDebounceLock = new();
private void RefreshWatchers()
{
if (!this.settingsManager.ConfigurationData.DataSourceIndexing.AutomaticRefresh)
{
this.RemoveAllWatchers();
return;
}
var supportedSources = this.settingsManager.ConfigurationData.DataSources
.Where(this.IsSupportedInternalDataSource)
.ToDictionary(source => source.Id, StringComparer.OrdinalIgnoreCase);
foreach (var existingWatcherId in this.watchers.Keys.Except(supportedSources.Keys, StringComparer.OrdinalIgnoreCase).ToList())
this.RemoveWatcher(existingWatcherId);
foreach (var dataSource in supportedSources.Values)
this.EnsureWatcher(dataSource);
}
private void EnsureWatcher(IDataSource dataSource)
{
if (!this.settingsManager.ConfigurationData.DataSourceIndexing.AutomaticRefresh)
return;
var configuration = GetWatchConfiguration(dataSource);
if (configuration is null)
return;
if (this.watchers.TryGetValue(dataSource.Id, out var existingRegistration))
{
if (IsSameWatchConfiguration(existingRegistration.Configuration, configuration))
return;
this.RemoveWatcher(dataSource.Id);
}
var watcher = this.CreateWatcher(dataSource.Id, configuration);
if (watcher is null)
return;
if (!this.watchers.TryAdd(dataSource.Id, new DataSourceWatcherRegistration(watcher, configuration)))
watcher.Dispose();
}
private FileSystemWatcher? CreateWatcher(string dataSourceId, DataSourceWatcherConfiguration configuration)
{
try
{
var watcher = new FileSystemWatcher(configuration.RootPath)
{
Filter = configuration.Filter,
IncludeSubdirectories = configuration.IncludeSubdirectories,
NotifyFilter = NotifyFilters.FileName | NotifyFilters.DirectoryName | NotifyFilters.LastWrite | NotifyFilters.CreationTime | NotifyFilters.Size,
};
watcher.Changed += (_, _) => this.OnWatchedDataSourceChanged(dataSourceId);
watcher.Deleted += (_, _) => this.OnWatchedDataSourceChanged(dataSourceId);
watcher.Created += (_, _) => this.OnWatchedDataSourceChanged(dataSourceId);
watcher.Renamed += (_, _) => this.OnWatchedDataSourceChanged(dataSourceId);
watcher.Error += (_, args) =>
{
this.logger.LogWarning(args.GetException(), "The file watcher for data source '{DataSourceId}' failed. Recreating it.", dataSourceId);
this.RemoveWatcher(dataSourceId);
this.EnsureWatcher(dataSourceId);
this.OnWatchedDataSourceChanged(dataSourceId);
};
watcher.EnableRaisingEvents = true;
return watcher;
}
catch (Exception exception)
{
this.logger.LogWarning(exception, "Failed to create file watcher for data source '{DataSourceId}' at '{RootPath}'.", dataSourceId, configuration.RootPath);
return null;
}
}
private void RemoveWatcher(string dataSourceId)
{
this.CancelPendingWatcherRefresh(dataSourceId);
if (this.watchers.TryRemove(dataSourceId, out var registration))
registration.Watcher.Dispose();
}
private void RemoveAllWatchers()
{
foreach (var watcherId in this.watchers.Keys.ToList())
this.RemoveWatcher(watcherId);
}
private void DisposeWatchers()
{
this.CancelAllPendingWatcherRefreshes();
foreach (var registration in this.watchers.Values)
registration.Watcher.Dispose();
this.watchers.Clear();
}
private void OnWatchedDataSourceChanged(string dataSourceId)
{
if (!this.settingsManager.ConfigurationData.DataSourceIndexing.AutomaticRefresh)
return;
this.logger.LogDebug("Detected file system change for data source '{DataSourceId}'. Scheduling a debounced embedding run.", dataSourceId);
var debounceToken = new CancellationTokenSource();
lock (this.watcherDebounceLock)
{
if (this.watcherDebounceTokens.Remove(dataSourceId, out var existingToken))
existingToken.Cancel();
this.watcherDebounceTokens[dataSourceId] = debounceToken;
}
_ = Task.Run(async () =>
{
try
{
await Task.Delay(TimeSpan.FromSeconds(WATCHER_DEBOUNCE_SECONDS), debounceToken.Token);
if (!this.TryCompletePendingWatcherRefresh(dataSourceId, debounceToken))
return;
var dataSource = this.settingsManager.ConfigurationData.DataSources
.FirstOrDefault(source => source.Id.Equals(dataSourceId, StringComparison.OrdinalIgnoreCase));
if (dataSource is not null)
{
this.logger.LogInformation("Queueing data source '{DataSourceName}' ({DataSourceId}) after file system changes settled.", dataSource.Name, dataSource.Id);
await this.QueueDataSourceAsync(dataSource);
}
}
catch (OperationCanceledException)
{
}
catch (Exception exception)
{
this.logger.LogWarning(exception, "Failed to queue watched data source '{DataSourceId}' after a file system change.", dataSourceId);
}
finally
{
debounceToken.Dispose();
}
});
}
private void EnsureWatcher(string dataSourceId)
{
var dataSource = this.settingsManager.ConfigurationData.DataSources
.FirstOrDefault(source => source.Id.Equals(dataSourceId, StringComparison.OrdinalIgnoreCase));
if (dataSource is not null)
this.EnsureWatcher(dataSource);
}
private void CancelPendingWatcherRefresh(string dataSourceId)
{
lock (this.watcherDebounceLock)
{
if (this.watcherDebounceTokens.Remove(dataSourceId, out var token))
token.Cancel();
}
}
private void CancelAllPendingWatcherRefreshes()
{
lock (this.watcherDebounceLock)
{
foreach (var token in this.watcherDebounceTokens.Values)
token.Cancel();
this.watcherDebounceTokens.Clear();
}
}
private bool TryCompletePendingWatcherRefresh(string dataSourceId, CancellationTokenSource debounceToken)
{
lock (this.watcherDebounceLock)
{
if (!this.watcherDebounceTokens.TryGetValue(dataSourceId, out var currentToken) || !ReferenceEquals(currentToken, debounceToken))
return false;
this.watcherDebounceTokens.Remove(dataSourceId);
return true;
}
}
private static DataSourceWatcherConfiguration? GetWatchConfiguration(IDataSource dataSource) => dataSource switch
{
DataSourceLocalDirectory localDirectory when Directory.Exists(localDirectory.Path) => new DataSourceWatcherConfiguration(
localDirectory.Path,
"*.*",
true),
DataSourceLocalFile localFile when File.Exists(localFile.FilePath) && !string.IsNullOrWhiteSpace(Path.GetDirectoryName(localFile.FilePath)) => new DataSourceWatcherConfiguration(
Path.GetDirectoryName(localFile.FilePath)!,
Path.GetFileName(localFile.FilePath),
false),
_ => null,
};
private static bool IsSameWatchConfiguration(DataSourceWatcherConfiguration left, DataSourceWatcherConfiguration right)
{
return left.IncludeSubdirectories == right.IncludeSubdirectories
&& string.Equals(left.RootPath, right.RootPath, StringComparison.OrdinalIgnoreCase)
&& string.Equals(left.Filter, right.Filter, StringComparison.OrdinalIgnoreCase);
}
private sealed record DataSourceWatcherConfiguration(string RootPath, string Filter, bool IncludeSubdirectories);
private sealed record DataSourceWatcherRegistration(FileSystemWatcher Watcher, DataSourceWatcherConfiguration Configuration);
}

View File

@ -1,7 +1,5 @@
using System.Collections.Concurrent;
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using System.Diagnostics.CodeAnalysis;
using System.Threading.Channels;
using AIStudio.Provider;
@ -13,9 +11,8 @@ using AIStudio.Tools.Rust;
namespace AIStudio.Tools.Services;
public sealed class DataSourceEmbeddingService : BackgroundService
public sealed partial class DataSourceEmbeddingService : BackgroundService
{
private const string STATE_FILENAME = "rag-embedding-state.json";
private const int MAX_CHUNK_LENGTH = 3_200;
private const int MIN_CHUNK_LENGTH = 800;
private const int CHUNK_OVERLAP_LENGTH = 320;
@ -28,12 +25,7 @@ public sealed class DataSourceEmbeddingService : BackgroundService
private readonly Channel<string> queue = Channel.CreateUnbounded<string>();
private readonly ConcurrentDictionary<string, byte> queuedIds = new(StringComparer.OrdinalIgnoreCase);
private readonly ConcurrentDictionary<string, DataSourceEmbeddingStatus> statuses = new(StringComparer.OrdinalIgnoreCase);
private readonly ConcurrentDictionary<string, FileSystemWatcher> watchers = new(StringComparer.OrdinalIgnoreCase);
private readonly SemaphoreSlim stateLock = new(1, 1);
private readonly JsonSerializerOptions jsonOptions = new()
{
WriteIndented = true,
};
private static string TB(string fallbackEN) => I18N.I.T(fallbackEN, typeof(DataSourceEmbeddingService).Namespace, nameof(DataSourceEmbeddingService));
@ -58,11 +50,7 @@ public sealed class DataSourceEmbeddingService : BackgroundService
public DataSourceEmbeddingOverview GetOverview()
{
var orderedStatuses = this.statuses.Values
.OrderBy(status => status.SortOrder)
.ThenBy(status => status.DataSourceName, StringComparer.OrdinalIgnoreCase)
.ToList();
var orderedStatuses = this.GetStatuses();
var activeStatus = orderedStatuses
.FirstOrDefault(status => status.State is DataSourceEmbeddingState.QUEUED or DataSourceEmbeddingState.RUNNING);
@ -71,21 +59,19 @@ public sealed class DataSourceEmbeddingService : BackgroundService
var total = Math.Max(activeStatus.TotalFiles, 1);
return new(
true,
false,
activeStatus.State,
activeStatus.IndexedFiles,
total,
activeStatus.FailedFiles,
$"{TB("Embedding")} {activeStatus.IndexedFiles}/{total}");
activeStatus.FailedFiles);
}
var failedStatus = orderedStatuses
.FirstOrDefault(status => status.State is DataSourceEmbeddingState.FAILED || status.FailedFiles > 0);
if (failedStatus is not null)
return new(true, true, DataSourceEmbeddingState.FAILED, failedStatus.IndexedFiles, failedStatus.TotalFiles, failedStatus.FailedFiles, TB("Embeddings"));
return new(true, DataSourceEmbeddingState.FAILED, failedStatus.IndexedFiles, failedStatus.TotalFiles, failedStatus.FailedFiles);
return new(false, true, DataSourceEmbeddingState.COMPLETED, 0, 0, 0, TB("Embeddings"));
return new(false, DataSourceEmbeddingState.COMPLETED, 0, 0, 0);
}
public Task QueueAllInternalDataSourcesAsync()
@ -99,6 +85,22 @@ public sealed class DataSourceEmbeddingService : BackgroundService
return Task.WhenAll(tasks);
}
public Task QueueAllInternalDataSourcesIfAutomaticRefreshAsync()
{
if (!this.settingsManager.ConfigurationData.DataSourceIndexing.AutomaticRefresh)
{
this.RefreshWatchers();
return Task.CompletedTask;
}
return this.QueueAllInternalDataSourcesAsync();
}
public void RefreshAutomaticWatchers()
{
this.RefreshWatchers();
}
public async Task QueueDataSourceAsync(IDataSource dataSource)
{
if (!this.IsSupportedInternalDataSource(dataSource))
@ -106,23 +108,14 @@ public sealed class DataSourceEmbeddingService : BackgroundService
this.logger.LogInformation("Queueing data source '{DataSourceName}' ({DataSourceId}) for background embeddings.", dataSource.Name, dataSource.Id);
this.RefreshWatchers();
this.logger.LogDebug("Adding watcher for data source '{DataSourceName}' ({DataSourceId}).", dataSource.Name, dataSource.Id);
if (!this.statuses.TryGetValue(dataSource.Id, out var currentStatus) || currentStatus.State is not DataSourceEmbeddingState.RUNNING)
{
this.UpsertStatus(new DataSourceEmbeddingStatus(
dataSource.Id,
dataSource.Name,
dataSource.Type,
DataSourceEmbeddingState.QUEUED,
currentStatus?.TotalFiles ?? 0,
currentStatus?.IndexedFiles ?? 0,
currentStatus?.FailedFiles ?? 0,
string.Empty,
string.Empty));
}
this.UpsertStatus(this.CreateStatus(dataSource, DataSourceEmbeddingState.QUEUED, currentStatus?.TotalFiles ?? 0, currentStatus?.IndexedFiles ?? 0, currentStatus?.FailedFiles ?? 0));
this.logger.LogDebug("Upserting status for data source '{DataSourceName}' ({DataSourceId}).", dataSource.Name, dataSource.Id);
if (this.queuedIds.TryAdd(dataSource.Id, 0))
await this.queue.Writer.WriteAsync(dataSource.Id);
this.logger.LogDebug("Queued data source '{DataSourceName}' ({DataSourceId}).", dataSource.Name, dataSource.Id);
}
public async Task RemoveDataSourceAsync(IDataSource dataSource)
@ -169,10 +162,7 @@ public sealed class DataSourceEmbeddingService : BackgroundService
public override void Dispose()
{
foreach (var watcher in this.watchers.Values)
watcher.Dispose();
this.watchers.Clear();
this.DisposeWatchers();
this.stateLock.Dispose();
base.Dispose();
}
@ -193,11 +183,7 @@ public sealed class DataSourceEmbeddingService : BackgroundService
return;
}
var embeddingProvider = this.settingsManager.ConfigurationData.EmbeddingProviders.FirstOrDefault(provider =>
dataSource is IInternalDataSource internalDataSource &&
provider.Id.Equals(internalDataSource.EmbeddingId, StringComparison.OrdinalIgnoreCase));
if (embeddingProvider == default || embeddingProvider.UsedLLMProvider is LLMProviders.NONE)
if (!this.TryResolveEmbeddingProvider(dataSource, out var embeddingProvider))
{
this.UpsertStatus(this.GetFallbackStatus(dataSource, "The selected embedding provider is not available."));
return;
@ -210,29 +196,11 @@ public sealed class DataSourceEmbeddingService : BackgroundService
dataSource.Name,
dataSource.Id);
var embeddingSignature = this.BuildEmbeddingSignature(embeddingProvider);
var manifest = await this.GetManifestAsync(dataSource.Id, token);
// A provider/model change invalidates the existing collection because the vectors are no longer comparable.
if (!string.Equals(manifest.EmbeddingSignature, embeddingSignature, StringComparison.Ordinal))
{
this.logger.LogInformation(
"Embedding configuration changed for data source '{DataSourceName}' ({DataSourceId}). Resetting persisted state and collection '{CollectionName}'.",
dataSource.Name,
dataSource.Id,
this.GetCollectionName(dataSource.Id));
await this.ResetPersistedStateAsync(dataSource.Id);
manifest = await this.GetManifestAsync(dataSource.Id, token);
manifest.EmbeddingProviderId = embeddingProvider.Id;
manifest.EmbeddingSignature = embeddingSignature;
await this.SaveStateAsync(token);
}
var collectionName = this.GetCollectionName(dataSource.Id);
var manifest = await this.EnsureCompatibleManifestAsync(dataSource, embeddingProvider, collectionName, token);
var inputFiles = this.GetInputFiles(dataSource);
var indexedFiles = inputFiles.Files;
var totalFiles = indexedFiles.Count + inputFiles.FailedFiles;
var existingPaths = indexedFiles
.Select(file => file.FullName)
.ToHashSet(StringComparer.OrdinalIgnoreCase);
this.logger.LogInformation(
"Prepared data source '{DataSourceName}' ({DataSourceId}) for embedding. AccessibleFiles={AccessibleFiles}, FailedFiles={FailedFiles}, Collection='{CollectionName}'.",
@ -240,32 +208,18 @@ public sealed class DataSourceEmbeddingService : BackgroundService
dataSource.Id,
indexedFiles.Count,
inputFiles.FailedFiles,
this.GetCollectionName(dataSource.Id));
foreach (var removedFilePath in manifest.Files.Keys.Except(existingPaths, StringComparer.OrdinalIgnoreCase).ToList())
{
await this.DeleteFilePointsAsync(this.GetCollectionName(dataSource.Id), removedFilePath, token);
manifest.Files.Remove(removedFilePath);
this.logger.LogInformation(
"Removed stale embeddings for deleted file '{FilePath}' from data source '{DataSourceName}' ({DataSourceId}).",
removedFilePath,
dataSource.Name,
dataSource.Id);
}
collectionName);
await this.RemoveMissingFileEmbeddingsAsync(dataSource, collectionName, manifest, indexedFiles, token);
await this.SaveStateAsync(token);
// Keep the UI status in sync with the long-running file loop below.
this.UpsertStatus(new DataSourceEmbeddingStatus(
dataSource.Id,
dataSource.Name,
dataSource.Type,
this.UpsertStatus(this.CreateStatus(
dataSource,
DataSourceEmbeddingState.RUNNING,
totalFiles,
0,
inputFiles.FailedFiles,
string.Empty,
inputFiles.LastError));
lastError: inputFiles.LastError));
var provider = embeddingProvider.CreateProvider();
var skippedFiles = 0;
@ -287,29 +241,11 @@ public sealed class DataSourceEmbeddingService : BackgroundService
dataSource.Name,
dataSource.Id);
skippedFiles++;
this.UpsertStatus(new DataSourceEmbeddingStatus(
dataSource.Id,
dataSource.Name,
dataSource.Type,
DataSourceEmbeddingState.RUNNING,
totalFiles,
skippedFiles + completedFiles,
failedFiles,
string.Empty,
lastError));
this.UpsertStatus(this.CreateStatus(dataSource, DataSourceEmbeddingState.RUNNING, totalFiles, skippedFiles + completedFiles, failedFiles, lastError: lastError));
continue;
}
this.UpsertStatus(new DataSourceEmbeddingStatus(
dataSource.Id,
dataSource.Name,
dataSource.Type,
DataSourceEmbeddingState.RUNNING,
totalFiles,
skippedFiles + completedFiles,
failedFiles,
file.Name,
lastError));
this.UpsertStatus(this.CreateStatus(dataSource, DataSourceEmbeddingState.RUNNING, totalFiles, skippedFiles + completedFiles, failedFiles, file.Name, lastError));
try
{
@ -343,37 +279,15 @@ public sealed class DataSourceEmbeddingService : BackgroundService
failedFiles++;
lastError = exception.Message;
manifest.Files.Remove(file.FullName);
await this.DeleteFilePointsAsync(this.GetCollectionName(dataSource.Id), file.FullName, token);
await this.DeleteFilePointsAsync(collectionName, file.FullName, token);
await this.SaveStateAsync(token);
this.logger.LogWarning(exception, "Failed to embed file '{FilePath}' for data source '{DataSourceName}'.", file.FullName, dataSource.Name);
this.UpsertStatus(new DataSourceEmbeddingStatus(
dataSource.Id,
dataSource.Name,
dataSource.Type,
DataSourceEmbeddingState.RUNNING,
totalFiles,
skippedFiles + completedFiles,
failedFiles,
file.Name,
exception.Message));
this.UpsertStatus(this.CreateStatus(dataSource, DataSourceEmbeddingState.RUNNING, totalFiles, skippedFiles + completedFiles, failedFiles, file.Name, exception.Message));
}
}
this.UpsertStatus(new DataSourceEmbeddingStatus(
dataSource.Id,
dataSource.Name,
dataSource.Type,
failedFiles > 0 ? DataSourceEmbeddingState.FAILED : DataSourceEmbeddingState.COMPLETED,
totalFiles,
skippedFiles + completedFiles,
failedFiles,
string.Empty,
failedFiles > 0
? string.IsNullOrWhiteSpace(lastError)
? "Some files could not be embedded. See the logs for details."
: lastError
: string.Empty));
this.UpsertStatus(this.CreateCompletedStatus(dataSource, totalFiles, skippedFiles + completedFiles, failedFiles, lastError));
this.logger.LogInformation(
"Finished background embeddings for data source '{DataSourceName}' ({DataSourceId}). Indexed={IndexedFiles}, Failed={FailedFiles}, Total={TotalFiles}.",
dataSource.Name,
@ -459,7 +373,6 @@ public sealed class DataSourceEmbeddingService : BackgroundService
if (manifest.VectorSize == 0)
{
// The first successful batch defines the vector size for the whole collection.
manifest.VectorSize = vectorSize;
await this.EnsureCollectionExistsAsync(collectionName, vectorSize, token);
await this.SaveStateAsync(token);
@ -490,53 +403,6 @@ public sealed class DataSourceEmbeddingService : BackgroundService
batch.Clear();
}
private async IAsyncEnumerable<string> StreamEmbeddingChunksAsync(string filePath, [System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken token)
{
if (this.IsImageFilePath(filePath))
{
yield return this.BuildImageIndexText(filePath);
yield break;
}
var currentChunk = new StringBuilder();
await foreach (var segment in this.rustService.StreamArbitraryFileData(filePath, token: token))
{
var normalized = NormalizeChunkSegment(segment);
if (string.IsNullOrWhiteSpace(normalized))
continue;
if (currentChunk.Length > 0 && currentChunk.Length + normalized.Length + Environment.NewLine.Length > MAX_CHUNK_LENGTH)
{
if (currentChunk.Length >= MIN_CHUNK_LENGTH)
{
var chunk = currentChunk.ToString().Trim();
if (!string.IsNullOrWhiteSpace(chunk))
yield return chunk;
var overlap = chunk.Length > CHUNK_OVERLAP_LENGTH
? chunk[^CHUNK_OVERLAP_LENGTH..]
: chunk;
currentChunk.Clear();
currentChunk.Append(overlap);
currentChunk.AppendLine();
}
else
{
currentChunk.AppendLine();
}
}
currentChunk.Append(normalized);
currentChunk.AppendLine();
}
var finalChunk = currentChunk.ToString().Trim();
if (!string.IsNullOrWhiteSpace(finalChunk))
yield return finalChunk;
}
private async Task EnsureCollectionExistsAsync(string collectionName, int vectorSize, CancellationToken token)
{
await this.embeddingStore.EnsureEmbeddingStoreExists(collectionName, vectorSize, token);
@ -581,72 +447,6 @@ public sealed class DataSourceEmbeddingService : BackgroundService
await this.embeddingStore.DeleteEmbeddingStore(collectionName, CancellationToken.None);
}
private async Task EnsureStateLoadedAsync(CancellationToken token)
{
if (this.stateLoaded)
return;
await this.stateLock.WaitAsync(token);
try
{
if (this.stateLoaded)
return;
var statePath = this.GetStatePath();
if (!string.IsNullOrWhiteSpace(statePath) && File.Exists(statePath))
{
var json = await File.ReadAllTextAsync(statePath, token);
var persistedState = JsonSerializer.Deserialize<PersistedEmbeddingState>(json, this.jsonOptions);
this.manifests = persistedState?.DataSources ?? new Dictionary<string, DataSourceEmbeddingManifest>(StringComparer.OrdinalIgnoreCase);
}
this.stateLoaded = true;
}
finally
{
this.stateLock.Release();
}
}
private async Task<DataSourceEmbeddingManifest> GetManifestAsync(string dataSourceId, CancellationToken token)
{
await this.EnsureStateLoadedAsync(token);
if (this.manifests.TryGetValue(dataSourceId, out var manifest))
return manifest;
manifest = new DataSourceEmbeddingManifest();
this.manifests[dataSourceId] = manifest;
return manifest;
}
private async Task SaveStateAsync(CancellationToken token)
{
var statePath = this.GetStatePath();
if (string.IsNullOrWhiteSpace(statePath))
return;
var directory = Path.GetDirectoryName(statePath);
if (!string.IsNullOrWhiteSpace(directory))
Directory.CreateDirectory(directory);
var persistedState = new PersistedEmbeddingState
{
DataSources = this.manifests
};
var json = JsonSerializer.Serialize(persistedState, this.jsonOptions);
await File.WriteAllTextAsync(statePath, json, token);
}
private async Task ResetPersistedStateAsync(string dataSourceId)
{
await this.EnsureStateLoadedAsync(CancellationToken.None);
this.manifests.Remove(dataSourceId);
await this.DeleteCollectionAsync(this.GetCollectionName(dataSourceId));
await this.SaveStateAsync(CancellationToken.None);
this.logger.LogInformation("Reset persisted embedding state for data source '{DataSourceId}'.", dataSourceId);
}
private async Task WaitForInitialSettingsAndBootstrapAsync(CancellationToken token)
{
while (!token.IsCancellationRequested)
@ -663,280 +463,8 @@ public sealed class DataSourceEmbeddingService : BackgroundService
token.ThrowIfCancellationRequested();
this.logger.LogInformation("Embedding background service is ready. Queueing all internal data sources.");
await this.QueueAllInternalDataSourcesAsync();
}
private void RefreshWatchers()
{
var supportedSources = this.settingsManager.ConfigurationData.DataSources
.Where(this.IsSupportedInternalDataSource)
.ToDictionary(source => source.Id, StringComparer.OrdinalIgnoreCase);
foreach (var existingWatcherId in this.watchers.Keys.Except(supportedSources.Keys, StringComparer.OrdinalIgnoreCase).ToList())
this.RemoveWatcher(existingWatcherId);
foreach (var dataSource in supportedSources.Values)
this.EnsureWatcher(dataSource);
}
private void EnsureWatcher(IDataSource dataSource)
{
if (this.watchers.TryGetValue(dataSource.Id, out var existingWatcher))
{
var existingTarget = $"{existingWatcher.Path}|{existingWatcher.Filter}|{existingWatcher.IncludeSubdirectories}";
var desiredTarget = this.GetWatcherTarget(dataSource);
if (string.Equals(existingTarget, desiredTarget, StringComparison.OrdinalIgnoreCase))
return;
this.RemoveWatcher(dataSource.Id);
}
var watcher = this.CreateWatcher(dataSource);
if (watcher is null)
return;
if (!this.watchers.TryAdd(dataSource.Id, watcher))
watcher.Dispose();
}
private FileSystemWatcher? CreateWatcher(IDataSource dataSource)
{
FileSystemWatcher? watcher = dataSource switch
{
DataSourceLocalDirectory localDirectory when Directory.Exists(localDirectory.Path) => new FileSystemWatcher(localDirectory.Path)
{
IncludeSubdirectories = true,
NotifyFilter = NotifyFilters.FileName | NotifyFilters.DirectoryName | NotifyFilters.LastWrite | NotifyFilters.CreationTime | NotifyFilters.Size,
},
DataSourceLocalFile localFile when File.Exists(localFile.FilePath) && !string.IsNullOrWhiteSpace(Path.GetDirectoryName(localFile.FilePath)) => new FileSystemWatcher(Path.GetDirectoryName(localFile.FilePath)!)
{
Filter = Path.GetFileName(localFile.FilePath),
NotifyFilter = NotifyFilters.FileName | NotifyFilters.LastWrite | NotifyFilters.CreationTime | NotifyFilters.Size,
},
_ => null,
};
if (watcher is null)
return null;
FileSystemEventHandler onChanged = (_, _) => this.OnWatchedDataSourceChanged(dataSource.Id);
RenamedEventHandler onRenamed = (_, _) => this.OnWatchedDataSourceChanged(dataSource.Id);
ErrorEventHandler onError = (_, errorArgs) =>
{
this.logger.LogWarning(errorArgs.GetException(), "The file watcher for data source '{DataSourceId}' failed. Recreating it.", dataSource.Id);
this.RemoveWatcher(dataSource.Id);
this.EnsureWatcher(dataSource);
this.OnWatchedDataSourceChanged(dataSource.Id);
};
watcher.Created += onChanged;
watcher.Changed += onChanged;
watcher.Deleted += onChanged;
watcher.Renamed += onRenamed;
watcher.Error += onError;
watcher.EnableRaisingEvents = true;
return watcher;
}
private string GetWatcherTarget(IDataSource dataSource) => dataSource switch
{
DataSourceLocalDirectory localDirectory => $"{localDirectory.Path}|*.*|True",
DataSourceLocalFile localFile => $"{Path.GetDirectoryName(localFile.FilePath)}|{Path.GetFileName(localFile.FilePath)}|False",
_ => string.Empty
};
private void RemoveWatcher(string dataSourceId)
{
if (this.watchers.TryRemove(dataSourceId, out var watcher))
watcher.Dispose();
}
private void OnWatchedDataSourceChanged(string dataSourceId)
{
this.logger.LogInformation("Detected file system change for data source '{DataSourceId}'. Queueing a fresh embedding run.", dataSourceId);
_ = Task.Run(async () =>
{
try
{
var dataSource = this.settingsManager.ConfigurationData.DataSources
.FirstOrDefault(source => source.Id.Equals(dataSourceId, StringComparison.OrdinalIgnoreCase));
if (dataSource is not null)
await this.QueueDataSourceAsync(dataSource);
}
catch (Exception exception)
{
this.logger.LogWarning(exception, "Failed to queue watched data source '{DataSourceId}' after a file system change.", dataSourceId);
}
});
}
private string GetStatePath()
{
if (string.IsNullOrWhiteSpace(SettingsManager.ConfigDirectory))
return string.Empty;
return Path.Combine(SettingsManager.ConfigDirectory, STATE_FILENAME);
}
private FileEnumerationResult GetInputFiles(IDataSource dataSource)
{
var result = new FileEnumerationResult();
switch (dataSource)
{
case DataSourceLocalFile localFile when File.Exists(localFile.FilePath):
result.Files.Add(new FileInfo(localFile.FilePath));
return result;
case DataSourceLocalDirectory localDirectory when Directory.Exists(localDirectory.Path):
this.EnumerateAccessibleFiles(localDirectory.Path, result);
return result;
}
switch (dataSource)
{
case DataSourceLocalFile localFile:
result.FailedFiles = 1;
result.LastError = $"The selected file '{localFile.FilePath}' does not exist.";
break;
case DataSourceLocalDirectory localDirectory:
result.FailedFiles = 1;
result.LastError = $"The selected directory '{localDirectory.Path}' does not exist.";
break;
}
return result;
}
private void EnumerateAccessibleFiles(string rootPath, FileEnumerationResult result)
{
var pendingDirectories = new Stack<string>();
pendingDirectories.Push(rootPath);
while (pendingDirectories.Count > 0)
{
var currentPath = pendingDirectories.Pop();
IEnumerable<string> subDirectories;
IEnumerable<string> files;
try
{
subDirectories = Directory.EnumerateDirectories(currentPath);
files = Directory.EnumerateFiles(currentPath);
}
catch (Exception exception)
{
this.logger.LogWarning(exception, "Cannot access directory '{DirectoryPath}' while indexing.", currentPath);
result.FailedFiles++;
result.LastError = $"The directory '{currentPath}' could not be accessed.";
continue;
}
foreach (var filePath in files)
{
FileInfo fileInfo;
try
{
fileInfo = new FileInfo(filePath);
if (!fileInfo.Exists)
continue;
}
catch (Exception exception)
{
this.logger.LogWarning(exception, "Cannot inspect file '{FilePath}' while indexing.", filePath);
result.FailedFiles++;
result.LastError = $"The file '{filePath}' could not be inspected.";
continue;
}
result.Files.Add(fileInfo);
}
foreach (var subDirectory in subDirectories)
pendingDirectories.Push(subDirectory);
}
}
private string TryGetRelativePath(IDataSource dataSource, FileInfo file) => dataSource switch
{
DataSourceLocalDirectory localDirectory => Path.GetRelativePath(localDirectory.Path, file.FullName),
_ => file.Name
};
private static string NormalizeChunkSegment(string input)
{
return input
.Replace("\r\n", "\n", StringComparison.Ordinal)
.Replace('\r', '\n')
.Trim();
}
private bool IsImageFilePath(string filePath)
{
return FileTypes.IsAllowedPath(filePath, FileTypes.IMAGE);
}
private string BuildImageIndexText(string filePath)
{
var fileName = Path.GetFileName(filePath);
var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(filePath);
var extension = Path.GetExtension(filePath).TrimStart('.');
var normalizedName = fileNameWithoutExtension
.Replace('_', ' ')
.Replace('-', ' ')
.Trim();
return $$"""
Image asset
File name: {{fileName}}
Type: {{extension}}
Search terms: {{normalizedName}}
Path: {{filePath}}
Note: The current RAG embedding pipeline stores image files by metadata only. Visual content is not OCRed or captioned yet.
""";
}
private string BuildEmbeddingSignature(EmbeddingProvider embeddingProvider)
{
return string.Join('|',
embeddingProvider.Id,
embeddingProvider.UsedLLMProvider,
embeddingProvider.Model.Id,
embeddingProvider.Host,
embeddingProvider.Hostname,
embeddingProvider.TokenizerPath);
}
private string BuildFingerprint(FileInfo file)
{
var fingerprintSource = $"{file.FullName}|{file.Length}|{file.LastWriteTimeUtc.Ticks}";
var bytes = SHA256.HashData(Encoding.UTF8.GetBytes(fingerprintSource));
return Convert.ToHexString(bytes);
}
private string GetCollectionName(string dataSourceId)
{
var safeId = dataSourceId
.ToLowerInvariant()
.Replace("-", string.Empty, StringComparison.Ordinal);
return $"rag_{safeId}";
}
private string CreatePointId(string dataSourceId, string fingerprint, int chunkIndex)
{
var source = $"{dataSourceId}:{fingerprint}:{chunkIndex}";
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(source));
var guidBytes = hash[..16].ToArray();
// Mark the bytes as an RFC 4122 version 4 UUID so Qdrant accepts the ID format.
guidBytes[6] = (byte)((guidBytes[6] & 0x0F) | 0x40);
guidBytes[8] = (byte)((guidBytes[8] & 0x3F) | 0x80);
return new Guid(guidBytes).ToString();
this.logger.LogInformation("Embedding background service is ready. Checking whether automatic data source refresh is enabled.");
await this.QueueAllInternalDataSourcesIfAutomaticRefreshAsync();
}
private bool IsSupportedInternalDataSource(IDataSource dataSource)
@ -944,18 +472,104 @@ public sealed class DataSourceEmbeddingService : BackgroundService
return dataSource is DataSourceLocalDirectory or DataSourceLocalFile;
}
private DataSourceEmbeddingStatus GetFallbackStatus(IDataSource dataSource, string errorMessage)
private bool TryResolveEmbeddingProvider(IDataSource dataSource, [NotNullWhen(true)] out EmbeddingProvider? embeddingProvider)
{
embeddingProvider = this.settingsManager.ConfigurationData.EmbeddingProviders.FirstOrDefault(provider =>
dataSource is IInternalDataSource internalDataSource &&
provider.Id.Equals(internalDataSource.EmbeddingId, StringComparison.OrdinalIgnoreCase));
return embeddingProvider != default && embeddingProvider.UsedLLMProvider is not LLMProviders.NONE;
}
private async Task<DataSourceEmbeddingManifest> EnsureCompatibleManifestAsync(IDataSource dataSource, EmbeddingProvider embeddingProvider, string collectionName, CancellationToken token)
{
var embeddingSignature = this.BuildEmbeddingSignature(embeddingProvider);
var manifest = await this.GetManifestAsync(dataSource.Id, token);
if (!string.Equals(manifest.EmbeddingSignature, embeddingSignature, StringComparison.Ordinal))
{
this.logger.LogInformation(
"Embedding configuration changed for data source '{DataSourceName}' ({DataSourceId}). Resetting persisted state and collection '{CollectionName}'.",
dataSource.Name,
dataSource.Id,
collectionName);
await this.ResetPersistedStateAsync(dataSource.Id);
manifest = await this.GetManifestAsync(dataSource.Id, token);
}
if (!string.Equals(manifest.EmbeddingProviderId, embeddingProvider.Id, StringComparison.OrdinalIgnoreCase) ||
!string.Equals(manifest.EmbeddingSignature, embeddingSignature, StringComparison.Ordinal))
{
manifest.EmbeddingProviderId = embeddingProvider.Id;
manifest.EmbeddingSignature = embeddingSignature;
await this.SaveStateAsync(token);
}
return manifest;
}
private async Task RemoveMissingFileEmbeddingsAsync(
IDataSource dataSource,
string collectionName,
DataSourceEmbeddingManifest manifest,
IReadOnlyCollection<FileInfo> indexedFiles,
CancellationToken token)
{
var existingPaths = indexedFiles
.Select(file => file.FullName)
.ToHashSet(StringComparer.OrdinalIgnoreCase);
foreach (var removedFilePath in manifest.Files.Keys.Except(existingPaths, StringComparer.OrdinalIgnoreCase).ToList())
{
await this.DeleteFilePointsAsync(collectionName, removedFilePath, token);
manifest.Files.Remove(removedFilePath);
this.logger.LogInformation(
"Removed stale embeddings for deleted file '{FilePath}' from data source '{DataSourceName}' ({DataSourceId}).",
removedFilePath,
dataSource.Name,
dataSource.Id);
}
}
private DataSourceEmbeddingStatus CreateStatus(
IDataSource dataSource,
DataSourceEmbeddingState state,
int totalFiles,
int indexedFiles,
int failedFiles,
string currentFile = "",
string lastError = "")
{
return new DataSourceEmbeddingStatus(
dataSource.Id,
dataSource.Name,
dataSource.Type,
DataSourceEmbeddingState.FAILED,
0,
0,
1,
string.Empty,
errorMessage);
state,
totalFiles,
indexedFiles,
failedFiles,
currentFile,
lastError);
}
private DataSourceEmbeddingStatus CreateCompletedStatus(IDataSource dataSource, int totalFiles, int indexedFiles, int failedFiles, string lastError)
{
return this.CreateStatus(
dataSource,
failedFiles > 0 ? DataSourceEmbeddingState.FAILED : DataSourceEmbeddingState.COMPLETED,
totalFiles,
indexedFiles,
failedFiles,
lastError: failedFiles > 0
? string.IsNullOrWhiteSpace(lastError)
? "Some files could not be embedded. See the logs for details."
: lastError
: string.Empty);
}
private DataSourceEmbeddingStatus GetFallbackStatus(IDataSource dataSource, string errorMessage)
{
return this.CreateStatus(dataSource, DataSourceEmbeddingState.FAILED, 0, 0, 1, lastError: errorMessage);
}
private void UpsertStatus(DataSourceEmbeddingStatus status)
@ -969,87 +583,3 @@ public sealed class DataSourceEmbeddingService : BackgroundService
_ = MessageBus.INSTANCE.SendMessage(null, Event.RAG_EMBEDDING_STATUS_CHANGED, true);
}
}
public sealed record DataSourceEmbeddingOverview(
bool IsVisible,
bool ShowIconOnly,
DataSourceEmbeddingState State,
int IndexedFiles,
int TotalFiles,
int FailedFiles,
string NavLabel);
public enum DataSourceEmbeddingState
{
IDLE,
QUEUED,
RUNNING,
COMPLETED,
FAILED,
}
public sealed record DataSourceEmbeddingStatus(
string DataSourceId,
string DataSourceName,
DataSourceType DataSourceType,
DataSourceEmbeddingState State,
int TotalFiles,
int IndexedFiles,
int FailedFiles,
string CurrentFile,
string LastError)
{
private static string TB(string fallbackEN) => I18N.I.T(fallbackEN, typeof(DataSourceEmbeddingService).Namespace, nameof(DataSourceEmbeddingService));
public int ProgressPercent => this.TotalFiles <= 0 ? 0 : Math.Clamp((int)Math.Round(this.IndexedFiles * 100d / this.TotalFiles), 0, 100);
public string StateLabel => this.State switch
{
DataSourceEmbeddingState.QUEUED => TB("Queued"),
DataSourceEmbeddingState.RUNNING => TB("Running"),
DataSourceEmbeddingState.COMPLETED => TB("Completed"),
DataSourceEmbeddingState.FAILED => TB("Needs attention"),
_ => TB("Idle")
};
public int SortOrder => this.State switch
{
DataSourceEmbeddingState.RUNNING => 0,
DataSourceEmbeddingState.QUEUED => 1,
DataSourceEmbeddingState.FAILED => 2,
DataSourceEmbeddingState.COMPLETED => 3,
_ => 4,
};
}
public sealed class FileEnumerationResult
{
public List<FileInfo> Files { get; } = [];
public int FailedFiles { get; set; }
public string LastError { get; set; } = string.Empty;
}
public sealed class PersistedEmbeddingState
{
public Dictionary<string, DataSourceEmbeddingManifest> DataSources { get; init; } = new(StringComparer.OrdinalIgnoreCase);
}
public sealed class DataSourceEmbeddingManifest
{
public string EmbeddingProviderId { get; set; } = string.Empty;
public string EmbeddingSignature { get; set; } = string.Empty;
public int VectorSize { get; set; }
public Dictionary<string, EmbeddedFileRecord> Files { get; init; } = new(StringComparer.OrdinalIgnoreCase);
}
public sealed record EmbeddedFileRecord(
string Fingerprint,
long FileSize,
DateTime LastWriteUtc,
DateTime EmbeddedAtUtc,
int ChunkCount);