mirror of
https://github.com/MindWorkAI/AI-Studio.git
synced 2026-05-20 09:52:14 +00:00
added manual refresh and cleaned up code
This commit is contained in:
parent
ac677c5ac7
commit
637bf9701b
@ -4837,15 +4837,24 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T108494
|
||||
-- Are you sure you want to delete the data source '{0}' of type {1}?
|
||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T1096979935"] = "Are you sure you want to delete the data source '{0}' of type {1}?"
|
||||
|
||||
-- Automatic local data source refresh
|
||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T1208397349"] = "Automatic local data source refresh"
|
||||
|
||||
-- Edit Local Directory Data Source
|
||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T1215599168"] = "Edit Local Directory Data Source"
|
||||
|
||||
-- Refresh
|
||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T135637716"] = "Refresh"
|
||||
|
||||
-- Add Local Directory as Data Source
|
||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T1454193397"] = "Add Local Directory as Data Source"
|
||||
|
||||
-- Delete
|
||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T1469573738"] = "Delete"
|
||||
|
||||
-- Refresh all
|
||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T1503082343"] = "Refresh all"
|
||||
|
||||
-- External (ERI)
|
||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T1652430727"] = "External (ERI)"
|
||||
|
||||
@ -4900,6 +4909,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T352566
|
||||
-- No data sources configured yet.
|
||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T3549650120"] = "No data sources configured yet."
|
||||
|
||||
-- Local data sources refresh when files change.
|
||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T3687976654"] = "Local data sources refresh when files change."
|
||||
|
||||
-- Actions
|
||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T3865031940"] = "Actions"
|
||||
|
||||
@ -4912,6 +4924,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T590005
|
||||
-- External Data (ERI-Server v1)
|
||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T774473996"] = "External Data (ERI-Server v1)"
|
||||
|
||||
-- Local data sources refresh only when triggered manually.
|
||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T854231603"] = "Local data sources refresh only when triggered manually."
|
||||
|
||||
-- Local Directory
|
||||
UI_TEXT_CONTENT["AISTUDIO::DIALOGS::SETTINGS::SETTINGSDIALOGDATASOURCES::T926703547"] = "Local Directory"
|
||||
|
||||
@ -7571,25 +7586,19 @@ UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T378481461"] = "Source like p
|
||||
UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPES::T4165204724"] = "Document"
|
||||
|
||||
-- Running
|
||||
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGSERVICE::T1160324588"] = "Running"
|
||||
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGMODELS::T1160324588"] = "Running"
|
||||
|
||||
-- Idle
|
||||
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGSERVICE::T1168775091"] = "Idle"
|
||||
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGMODELS::T1168775091"] = "Idle"
|
||||
|
||||
-- Needs attention
|
||||
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGSERVICE::T1566837660"] = "Needs attention"
|
||||
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGMODELS::T1566837660"] = "Needs attention"
|
||||
|
||||
-- Queued
|
||||
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGSERVICE::T2655222900"] = "Queued"
|
||||
|
||||
-- Embedding
|
||||
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGSERVICE::T2838542994"] = "Embedding"
|
||||
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGMODELS::T2655222900"] = "Queued"
|
||||
|
||||
-- Completed
|
||||
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGSERVICE::T3968379570"] = "Completed"
|
||||
|
||||
-- Embeddings
|
||||
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGSERVICE::T951463987"] = "Embeddings"
|
||||
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::DATASOURCEEMBEDDINGMODELS::T3968379570"] = "Completed"
|
||||
|
||||
-- Pandoc Installation
|
||||
UI_TEXT_CONTENT["AISTUDIO::TOOLS::SERVICES::PANDOCAVAILABILITYSERVICE::T185447014"] = "Pandoc Installation"
|
||||
|
||||
@ -123,7 +123,8 @@
|
||||
AdornmentColor="Color.Info"
|
||||
Validation="@this.providerValidation.ValidatingInstanceName"
|
||||
UserAttributes="@SPELLCHECK_ATTRIBUTES"/>
|
||||
@if (this.DataModel != default){
|
||||
@if (this.DataLLMProvider != LLMProviders.NONE)
|
||||
{
|
||||
<MudJustifiedText Typo="Typo.body1" Class="mb-3">
|
||||
@T("For better embeddings and less storage usage, it's recommended to use a custom tokenizer to enable a more accurate token count.")
|
||||
</MudJustifiedText>
|
||||
@ -137,8 +138,7 @@
|
||||
Error="@(!string.IsNullOrWhiteSpace(this.dataCustomTokenizerValidationIssue))"
|
||||
ErrorText="@(this.dataCustomTokenizerValidationIssue)"
|
||||
Validation="@this.providerValidation.ValidatingCustomTokenizer"
|
||||
OnClear = "@this.ClearPathTokenizer"
|
||||
/>
|
||||
OnClear="@this.ClearPathTokenizer" />
|
||||
}
|
||||
</MudForm>
|
||||
@if (this.dataStoreWasAttempted)
|
||||
|
||||
@ -2,6 +2,7 @@ using AIStudio.Chat;
|
||||
using AIStudio.Components;
|
||||
using AIStudio.Provider;
|
||||
using AIStudio.Settings;
|
||||
using AIStudio.Tools.Rust;
|
||||
using AIStudio.Tools.Services;
|
||||
using AIStudio.Tools.Validation;
|
||||
|
||||
@ -115,7 +116,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
|
||||
GetPreviousInstanceName = () => this.dataEditingPreviousInstanceName,
|
||||
GetUsedInstanceNames = () => this.UsedInstanceNames,
|
||||
GetHost = () => this.DataHost,
|
||||
IsModelProvidedManually = () => this.DataLLMProvider is LLMProviders.SELF_HOSTED && this.DataHost is Host.OLLAMA,
|
||||
IsModelProvidedManually = () => this.DataLLMProvider.IsEmbeddingModelProvidedManually(this.DataHost),
|
||||
GetCustomTokenizerValidationIssue = () => this.dataCustomTokenizerValidationIssue,
|
||||
};
|
||||
}
|
||||
@ -126,7 +127,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
|
||||
Model model = default;
|
||||
if(this.DataLLMProvider is LLMProviders.SELF_HOSTED)
|
||||
{
|
||||
if (this.DataHost is Host.OLLAMA)
|
||||
if (this.DataLLMProvider.IsEmbeddingModelProvidedManually(this.DataHost))
|
||||
model = new Model(this.dataManuallyModel, null);
|
||||
else if (this.DataHost is Host.LM_STUDIO)
|
||||
model = this.DataModel;
|
||||
@ -176,7 +177,7 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
|
||||
//
|
||||
// We cannot load the API key for self-hosted providers:
|
||||
//
|
||||
if (this.DataLLMProvider is LLMProviders.SELF_HOSTED && this.DataHost is not Host.OLLAMA)
|
||||
if (this.DataLLMProvider is LLMProviders.SELF_HOSTED && this.DataHost is not Host.OLLAMA && this.DataHost is not Host.VLLM)
|
||||
{
|
||||
await this.ReloadModels();
|
||||
await base.OnInitializedAsync();
|
||||
@ -241,10 +242,10 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
|
||||
if (!this.dataIsValid)
|
||||
return;
|
||||
|
||||
var response = await this.RustService.StoreTokenizer(TokenizerModelId.ForEmbeddingProviderId(this.DataId), this.dataFilePath);
|
||||
var response = await this.StoreOrDeleteTokenizerAsync();
|
||||
if (!response.Success)
|
||||
{
|
||||
this.dataCustomTokenizerValidationIssue = response.Message;
|
||||
this.dataCustomTokenizerValidationIssue = string.IsNullOrWhiteSpace(response.Message) ? string.Empty : response.Message;
|
||||
await this.form.Validate();
|
||||
return;
|
||||
}
|
||||
@ -340,6 +341,15 @@ public partial class EmbeddingProviderDialog : MSGComponentBase, ISecretId
|
||||
}
|
||||
}
|
||||
|
||||
private Task<TokenizerResponse> StoreOrDeleteTokenizerAsync()
|
||||
{
|
||||
var tokenizerId = TokenizerModelId.ForEmbeddingProviderId(this.DataId);
|
||||
if (string.IsNullOrWhiteSpace(this.dataFilePath))
|
||||
return this.RustService.DeleteTokenizer(tokenizerId);
|
||||
|
||||
return this.RustService.StoreTokenizer(tokenizerId, this.dataFilePath);
|
||||
}
|
||||
|
||||
private void OnHostChanged(Host selectedHost)
|
||||
{
|
||||
// When the host changes, reset the model selection state:
|
||||
|
||||
@ -4,6 +4,7 @@ using System.Text.Json;
|
||||
using AIStudio.Components;
|
||||
using AIStudio.Provider;
|
||||
using AIStudio.Provider.HuggingFace;
|
||||
using AIStudio.Tools.Rust;
|
||||
using AIStudio.Tools.Services;
|
||||
using AIStudio.Tools.Validation;
|
||||
|
||||
@ -268,7 +269,7 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
|
||||
if (!this.dataIsValid)
|
||||
return;
|
||||
|
||||
var tokenizerResponse = await this.RustService.StoreTokenizer(TokenizerModelId.ForProviderId(this.DataId), this.dataFilePath);
|
||||
var tokenizerResponse = await this.StoreOrDeleteTokenizerAsync();
|
||||
if (!tokenizerResponse.Success)
|
||||
{
|
||||
this.dataCustomTokenizerValidationIssue = tokenizerResponse.Message;
|
||||
@ -367,6 +368,15 @@ public partial class ProviderDialog : MSGComponentBase, ISecretId
|
||||
}
|
||||
}
|
||||
|
||||
private Task<TokenizerResponse> StoreOrDeleteTokenizerAsync()
|
||||
{
|
||||
var tokenizerId = TokenizerModelId.ForProviderId(this.DataId);
|
||||
if (string.IsNullOrWhiteSpace(this.dataFilePath))
|
||||
return this.RustService.DeleteTokenizer(tokenizerId);
|
||||
|
||||
return this.RustService.StoreTokenizer(tokenizerId, this.dataFilePath);
|
||||
}
|
||||
|
||||
private void OnHostChanged(Host selectedHost)
|
||||
{
|
||||
// When the host changes, reset the model selection state:
|
||||
|
||||
@ -14,6 +14,13 @@
|
||||
@T("You might configure different data sources. A data source can include one file, all files in a directory, or data from your company. Later, you can incorporate these data sources as needed when the AI requires this data to complete a certain task.")
|
||||
</MudJustifiedText>
|
||||
|
||||
<MudStack Row="true" AlignItems="AlignItems.Center" Justify="Justify.SpaceBetween" Class="mb-3" Wrap="Wrap.Wrap">
|
||||
<MudTextSwitch Label="@T("Automatic local data source refresh")" Value="@this.SettingsManager.ConfigurationData.DataSourceIndexing.AutomaticRefresh" LabelOn="@T("Local data sources refresh when files change.")" LabelOff="@T("Local data sources refresh only when triggered manually.")" ValueChanged="@this.AutomaticRefreshChanged"/>
|
||||
<MudButton Variant="Variant.Filled" Color="Color.Primary" StartIcon="@Icons.Material.Filled.Sync" Disabled="@(!this.HasRefreshableDataSources())" OnClick="@this.RefreshAllDataSources">
|
||||
@T("Refresh all")
|
||||
</MudButton>
|
||||
</MudStack>
|
||||
|
||||
<MudTable Items="@this.SettingsManager.ConfigurationData.DataSources" Hover="@true" Class="border-dashed border rounded-lg">
|
||||
<ColGroup>
|
||||
<col style="width: 3em;"/>
|
||||
@ -38,6 +45,9 @@
|
||||
<MudTd>
|
||||
<MudStack Row="true" Class="mb-2 mt-2" Wrap="Wrap.Wrap">
|
||||
<MudIconButton Variant="Variant.Filled" Color="Color.Info" Icon="@Icons.Material.Filled.Info" OnClick="() => this.ShowInformation(context)"/>
|
||||
<MudButton Variant="Variant.Filled" Color="Color.Primary" StartIcon="@Icons.Material.Filled.Sync" Disabled="@(!this.CanRefreshDataSource(context))" OnClick="() => this.RefreshDataSource(context)">
|
||||
@T("Refresh")
|
||||
</MudButton>
|
||||
<MudButton Variant="Variant.Filled" Color="Color.Info" StartIcon="@Icons.Material.Filled.Edit" OnClick="() => this.EditDataSource(context)">
|
||||
@T("Edit")
|
||||
</MudButton>
|
||||
@ -73,4 +83,4 @@
|
||||
@T("Close")
|
||||
</MudButton>
|
||||
</DialogActions>
|
||||
</MudDialog>
|
||||
</MudDialog>
|
||||
|
||||
@ -28,6 +28,39 @@ public partial class SettingsDialogDataSources : SettingsDialogBase
|
||||
|
||||
return T("Unknown");
|
||||
}
|
||||
|
||||
private bool CanRefreshDataSource(IDataSource dataSource)
|
||||
{
|
||||
return dataSource is DataSourceLocalDirectory or DataSourceLocalFile;
|
||||
}
|
||||
|
||||
private bool HasRefreshableDataSources()
|
||||
{
|
||||
return this.SettingsManager.ConfigurationData.DataSources.Any(this.CanRefreshDataSource);
|
||||
}
|
||||
|
||||
private async Task AutomaticRefreshChanged(bool enabled)
|
||||
{
|
||||
this.SettingsManager.ConfigurationData.DataSourceIndexing.AutomaticRefresh = enabled;
|
||||
await this.SettingsManager.StoreSettings();
|
||||
this.DataSourceEmbeddingService.RefreshAutomaticWatchers();
|
||||
await this.MessageBus.SendMessage<bool>(this, Event.CONFIGURATION_CHANGED);
|
||||
}
|
||||
|
||||
private async Task RefreshAllDataSources()
|
||||
{
|
||||
await this.DataSourceEmbeddingService.QueueAllInternalDataSourcesAsync();
|
||||
await this.MessageBus.SendMessage<bool>(this, Event.CONFIGURATION_CHANGED);
|
||||
}
|
||||
|
||||
private async Task RefreshDataSource(IDataSource dataSource)
|
||||
{
|
||||
if (!this.CanRefreshDataSource(dataSource))
|
||||
return;
|
||||
|
||||
await this.DataSourceEmbeddingService.QueueDataSourceAsync(dataSource);
|
||||
await this.MessageBus.SendMessage<bool>(this, Event.CONFIGURATION_CHANGED);
|
||||
}
|
||||
|
||||
private async Task AddDataSource(DataSourceType type)
|
||||
{
|
||||
|
||||
@ -25,7 +25,7 @@
|
||||
|
||||
<MudSpacer/>
|
||||
|
||||
@if (this.embeddingOverview.IsVisible)
|
||||
@if (this.showEmbeddingStatusIcon)
|
||||
{
|
||||
<MudNavMenu>
|
||||
<MudTooltip Text="@this.EmbeddingNavigationTooltip" Placement="Placement.Right">
|
||||
@ -64,7 +64,7 @@
|
||||
|
||||
<MudSpacer/>
|
||||
|
||||
@if (this.embeddingOverview.IsVisible)
|
||||
@if (this.showEmbeddingStatusIcon)
|
||||
{
|
||||
<MudNavMenu>
|
||||
@if (this.SettingsManager.ConfigurationData.App.NavigationBehavior is NavBehavior.NEVER_EXPAND_USE_TOOLTIPS)
|
||||
|
||||
@ -60,9 +60,10 @@ public partial class MainLayout : LayoutComponentBase, IMessageBusReceiver, ILan
|
||||
private bool startupCompleted;
|
||||
private readonly SemaphoreSlim mandatoryInfoDialogSemaphore = new(1, 1);
|
||||
|
||||
private DataSourceEmbeddingOverview embeddingOverview = new(false, true, DataSourceEmbeddingState.COMPLETED, 0, 0, 0, string.Empty);
|
||||
private DataSourceEmbeddingOverview embeddingOverview = new(false, DataSourceEmbeddingState.COMPLETED, 0, 0, 0);
|
||||
private IReadOnlyCollection<NavBarItem> navItems = [];
|
||||
private NavBarItem embeddingItem = new NavBarItem(string.Empty, string.Empty, string.Empty, string.Empty, string.Empty, false);
|
||||
private NavBarItem embeddingItem = new (string.Empty, string.Empty, string.Empty, string.Empty, string.Empty, false);
|
||||
private bool showEmbeddingStatusIcon = false;
|
||||
|
||||
#region Overrides of ComponentBase
|
||||
|
||||
@ -93,7 +94,7 @@ public partial class MainLayout : LayoutComponentBase, IMessageBusReceiver, ILan
|
||||
|
||||
// Ensure that all settings are loaded:
|
||||
await this.SettingsManager.LoadSettings();
|
||||
await this.DataSourceEmbeddingService.QueueAllInternalDataSourcesAsync();
|
||||
await this.DataSourceEmbeddingService.QueueAllInternalDataSourcesIfAutomaticRefreshAsync();
|
||||
|
||||
// Register this component with the message bus:
|
||||
this.MessageBus.RegisterComponent(this);
|
||||
@ -325,6 +326,7 @@ public partial class MainLayout : LayoutComponentBase, IMessageBusReceiver, ILan
|
||||
private void LoadEmbeddingItem()
|
||||
{
|
||||
this.embeddingOverview = this.DataSourceEmbeddingService.GetOverview();
|
||||
this.showEmbeddingStatusIcon = this.embeddingOverview.IsVisible;
|
||||
var palette = this.ColorTheme.GetCurrentPalette(this.SettingsManager);
|
||||
(string icon, string lightcolor, string darkcolor) embeddingIcon = this.embeddingOverview.State switch
|
||||
{
|
||||
@ -345,7 +347,7 @@ public partial class MainLayout : LayoutComponentBase, IMessageBusReceiver, ILan
|
||||
DataSourceEmbeddingState.FAILED => this.embeddingOverview.FailedFiles > 0
|
||||
? string.Format(T("Some embeddings failed. {0} file(s) need attention."), this.embeddingOverview.FailedFiles)
|
||||
: T("Some embeddings failed and need attention."),
|
||||
_ => this.embeddingOverview.NavLabel,
|
||||
_ => string.Empty
|
||||
};
|
||||
|
||||
private async Task ShowUpdateDialog()
|
||||
@ -508,4 +510,4 @@ public partial class MainLayout : LayoutComponentBase, IMessageBusReceiver, ILan
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
|
||||
@ -88,7 +88,7 @@ internal sealed class Program
|
||||
|
||||
var embeddingStoreConfig = await rust.GetEmbeddingStoreConfiguration(EmbeddingStoreKind.QDRANT_REMOTE);
|
||||
|
||||
EmbeddingStore embeddingStore = EmbeddingStoreFactory.Create(embeddingStoreConfig);
|
||||
var embeddingStore = EmbeddingStoreFactory.Create(embeddingStoreConfig);
|
||||
|
||||
var builder = WebApplication.CreateBuilder();
|
||||
builder.WebHost.ConfigureKestrel(kestrelServerOptions =>
|
||||
|
||||
@ -124,6 +124,8 @@ public sealed class Data
|
||||
|
||||
public DataAgentRetrievalContextValidation AgentRetrievalContextValidation { get; init; } = new();
|
||||
|
||||
public DataDataSourceIndexing DataSourceIndexing { get; init; } = new();
|
||||
|
||||
public DataAssistantPluginAudit AssistantPluginAudit { get; init; } = new(x => x.AssistantPluginAudit);
|
||||
|
||||
public DataAgenda Agenda { get; init; } = new();
|
||||
|
||||
@ -0,0 +1,9 @@
|
||||
namespace AIStudio.Settings.DataModel;
|
||||
|
||||
public sealed class DataDataSourceIndexing
|
||||
{
|
||||
/// <summary>
|
||||
/// Whether local data source embeddings should refresh automatically when files change.
|
||||
/// </summary>
|
||||
public bool AutomaticRefresh { get; set; } = true;
|
||||
}
|
||||
@ -35,14 +35,15 @@ public abstract class EmbeddingStore(string name, string path)
|
||||
{
|
||||
string[] suffixes = { "B", "KB", "MB", "GB", "TB", "PB" };
|
||||
int suffixIndex = 0;
|
||||
double convertedSize = size;
|
||||
|
||||
while (size >= 1024 && suffixIndex < suffixes.Length - 1)
|
||||
while (convertedSize >= 1024 && suffixIndex < suffixes.Length - 1)
|
||||
{
|
||||
size /= 1024;
|
||||
convertedSize /= 1024;
|
||||
suffixIndex++;
|
||||
}
|
||||
|
||||
return $"{size:0##} {suffixes[suffixIndex]}";
|
||||
return $"{convertedSize:0.##} {suffixes[suffixIndex]}";
|
||||
}
|
||||
|
||||
public void SetLogger(ILogger<EmbeddingStore> logService)
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
using Qdrant.Client;
|
||||
using Qdrant.Client.Grpc;
|
||||
using Grpc.Core;
|
||||
using AIStudio.Tools.PluginSystem;
|
||||
using static Qdrant.Client.Grpc.Conditions;
|
||||
|
||||
@ -104,14 +105,32 @@ public class QdrantClientImplementation : EmbeddingStore
|
||||
return this.GrpcClient.UpsertAsync(collectionName, qdrantPoints, true, null, null, token);
|
||||
}
|
||||
|
||||
public override Task DeleteEmbeddingByFile(string collectionName, string filePath, CancellationToken token)
|
||||
public override async Task DeleteEmbeddingByFile(string collectionName, string filePath, CancellationToken token)
|
||||
{
|
||||
return this.GrpcClient.DeleteAsync(collectionName, MatchKeyword("file_path", filePath), true, null, null, token);
|
||||
try
|
||||
{
|
||||
await this.GrpcClient.DeleteAsync(collectionName, MatchKeyword("file_path", filePath), true, null, null, token);
|
||||
}
|
||||
catch (RpcException exception) when (exception.StatusCode is StatusCode.NotFound)
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
public override Task DeleteEmbeddingStore(string collectionName, CancellationToken token)
|
||||
public override async Task DeleteEmbeddingStore(string collectionName, CancellationToken token)
|
||||
{
|
||||
return this.GrpcClient.DeleteCollectionAsync(collectionName, cancellationToken: token);
|
||||
var exists = await this.GrpcClient.CollectionExistsAsync(collectionName, token);
|
||||
if (!exists)
|
||||
return;
|
||||
|
||||
try
|
||||
{
|
||||
await this.GrpcClient.DeleteCollectionAsync(collectionName, cancellationToken: token);
|
||||
}
|
||||
catch (RpcException exception) when (exception.StatusCode is StatusCode.NotFound)
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
public override void Dispose() => this.GrpcClient.Dispose();
|
||||
|
||||
@ -59,7 +59,7 @@ public static class FileTypes
|
||||
|
||||
// Media hierarchy
|
||||
public static readonly FileTypeFilter IMAGE = FileTypeFilter.Leaf(TB("Image"),
|
||||
"jpg", "jpeg", "png", "gif", "bmp", "tiff", "svg", "webp", "heic");
|
||||
"jpg", "jpeg", "png", "gif", "bmp", "tiff", "svg", "webp", "heic", "avif");
|
||||
public static readonly FileTypeFilter AUDIO = FileTypeFilter.Leaf(TB("Audio"),
|
||||
"mp3", "wav", "wave", "aac", "flac", "ogg", "m4a", "wma", "alac", "aiff", "m4b");
|
||||
public static readonly FileTypeFilter VIDEO = FileTypeFilter.Leaf(TB("Video"),
|
||||
|
||||
@ -0,0 +1,86 @@
|
||||
using AIStudio.Settings.DataModel;
|
||||
using AIStudio.Tools.PluginSystem;
|
||||
|
||||
namespace AIStudio.Tools.Services;
|
||||
|
||||
public sealed record DataSourceEmbeddingOverview(
|
||||
bool IsVisible,
|
||||
DataSourceEmbeddingState State,
|
||||
int IndexedFiles,
|
||||
int TotalFiles,
|
||||
int FailedFiles);
|
||||
|
||||
public enum DataSourceEmbeddingState
|
||||
{
|
||||
IDLE,
|
||||
QUEUED,
|
||||
RUNNING,
|
||||
COMPLETED,
|
||||
FAILED,
|
||||
}
|
||||
|
||||
public sealed record DataSourceEmbeddingStatus(
|
||||
string DataSourceId,
|
||||
string DataSourceName,
|
||||
DataSourceType DataSourceType,
|
||||
DataSourceEmbeddingState State,
|
||||
int TotalFiles,
|
||||
int IndexedFiles,
|
||||
int FailedFiles,
|
||||
string CurrentFile,
|
||||
string LastError)
|
||||
{
|
||||
private static string TB(string fallbackEN) => I18N.I.T(fallbackEN, typeof(DataSourceEmbeddingService).Namespace, nameof(DataSourceEmbeddingService));
|
||||
|
||||
public int ProgressPercent => this.TotalFiles <= 0 ? 0 : Math.Clamp((int)Math.Round(this.IndexedFiles * 100d / this.TotalFiles), 0, 100);
|
||||
|
||||
public string StateLabel => this.State switch
|
||||
{
|
||||
DataSourceEmbeddingState.QUEUED => TB("Queued"),
|
||||
DataSourceEmbeddingState.RUNNING => TB("Running"),
|
||||
DataSourceEmbeddingState.COMPLETED => TB("Completed"),
|
||||
DataSourceEmbeddingState.FAILED => TB("Needs attention"),
|
||||
_ => TB("Idle")
|
||||
};
|
||||
|
||||
public int SortOrder => this.State switch
|
||||
{
|
||||
DataSourceEmbeddingState.RUNNING => 0,
|
||||
DataSourceEmbeddingState.QUEUED => 1,
|
||||
DataSourceEmbeddingState.FAILED => 2,
|
||||
DataSourceEmbeddingState.COMPLETED => 3,
|
||||
_ => 4,
|
||||
};
|
||||
}
|
||||
|
||||
public sealed class FileEnumerationResult
|
||||
{
|
||||
public List<FileInfo> Files { get; } = [];
|
||||
|
||||
public int FailedFiles { get; set; }
|
||||
|
||||
public string LastError { get; set; } = string.Empty;
|
||||
}
|
||||
|
||||
public sealed class PersistedEmbeddingState
|
||||
{
|
||||
public Dictionary<string, DataSourceEmbeddingManifest> DataSources { get; init; } = new(StringComparer.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
public sealed class DataSourceEmbeddingManifest
|
||||
{
|
||||
public string EmbeddingProviderId { get; set; } = string.Empty;
|
||||
|
||||
public string EmbeddingSignature { get; set; } = string.Empty;
|
||||
|
||||
public int VectorSize { get; set; }
|
||||
|
||||
public Dictionary<string, EmbeddedFileRecord> Files { get; init; } = new(StringComparer.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
public sealed record EmbeddedFileRecord(
|
||||
string Fingerprint,
|
||||
long FileSize,
|
||||
DateTime LastWriteUtc,
|
||||
DateTime EmbeddedAtUtc,
|
||||
int ChunkCount);
|
||||
@ -0,0 +1,242 @@
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
|
||||
using AIStudio.Settings;
|
||||
using AIStudio.Settings.DataModel;
|
||||
using AIStudio.Tools.PluginSystem;
|
||||
using AIStudio.Tools.Rust;
|
||||
|
||||
namespace AIStudio.Tools.Services;
|
||||
|
||||
public sealed partial class DataSourceEmbeddingService
|
||||
{
|
||||
private static readonly string[] ADDITIONAL_RAG_FILE_EXTENSIONS = ["csv", "tsv", "ods", "xlsm", "xlsb", "xla", "xlam"];
|
||||
private static readonly string[] SKIPPED_RAG_FILE_EXTENSIONS = ["lnk"];
|
||||
|
||||
private async IAsyncEnumerable<string> StreamEmbeddingChunksAsync(string filePath, [System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken token)
|
||||
{
|
||||
if (this.IsImageFilePath(filePath))
|
||||
{
|
||||
yield return this.BuildImageIndexText(filePath);
|
||||
yield break;
|
||||
}
|
||||
|
||||
var currentChunk = new StringBuilder();
|
||||
|
||||
await foreach (var segment in this.rustService.StreamArbitraryFileData(filePath, token: token))
|
||||
{
|
||||
var normalized = NormalizeChunkSegment(segment);
|
||||
if (string.IsNullOrWhiteSpace(normalized))
|
||||
continue;
|
||||
|
||||
if (currentChunk.Length > 0 && currentChunk.Length + normalized.Length + Environment.NewLine.Length > MAX_CHUNK_LENGTH)
|
||||
{
|
||||
if (currentChunk.Length >= MIN_CHUNK_LENGTH)
|
||||
{
|
||||
var chunk = currentChunk.ToString().Trim();
|
||||
if (!string.IsNullOrWhiteSpace(chunk))
|
||||
yield return chunk;
|
||||
|
||||
var overlap = chunk.Length > CHUNK_OVERLAP_LENGTH
|
||||
? chunk[^CHUNK_OVERLAP_LENGTH..]
|
||||
: chunk;
|
||||
|
||||
currentChunk.Clear();
|
||||
currentChunk.Append(overlap);
|
||||
currentChunk.AppendLine();
|
||||
}
|
||||
else
|
||||
{
|
||||
currentChunk.AppendLine();
|
||||
}
|
||||
}
|
||||
|
||||
currentChunk.Append(normalized);
|
||||
currentChunk.AppendLine();
|
||||
}
|
||||
|
||||
var finalChunk = currentChunk.ToString().Trim();
|
||||
if (!string.IsNullOrWhiteSpace(finalChunk))
|
||||
yield return finalChunk;
|
||||
}
|
||||
|
||||
private FileEnumerationResult GetInputFiles(IDataSource dataSource)
|
||||
{
|
||||
var result = new FileEnumerationResult();
|
||||
|
||||
switch (dataSource)
|
||||
{
|
||||
case DataSourceLocalFile localFile when File.Exists(localFile.FilePath):
|
||||
if (this.IsSupportedRagFilePath(localFile.FilePath))
|
||||
{
|
||||
result.Files.Add(new FileInfo(localFile.FilePath));
|
||||
}
|
||||
else
|
||||
{
|
||||
result.FailedFiles = 1;
|
||||
result.LastError = $"The selected file '{localFile.FilePath}' is not supported for background embeddings.";
|
||||
}
|
||||
|
||||
return result;
|
||||
|
||||
case DataSourceLocalDirectory localDirectory when Directory.Exists(localDirectory.Path):
|
||||
this.EnumerateAccessibleFiles(localDirectory.Path, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
switch (dataSource)
|
||||
{
|
||||
case DataSourceLocalFile localFile:
|
||||
result.FailedFiles = 1;
|
||||
result.LastError = $"The selected file '{localFile.FilePath}' does not exist.";
|
||||
break;
|
||||
|
||||
case DataSourceLocalDirectory localDirectory:
|
||||
result.FailedFiles = 1;
|
||||
result.LastError = $"The selected directory '{localDirectory.Path}' does not exist.";
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private void EnumerateAccessibleFiles(string rootPath, FileEnumerationResult result)
|
||||
{
|
||||
var pendingDirectories = new Stack<string>();
|
||||
pendingDirectories.Push(rootPath);
|
||||
|
||||
while (pendingDirectories.Count > 0)
|
||||
{
|
||||
var currentPath = pendingDirectories.Pop();
|
||||
IEnumerable<string> subDirectories;
|
||||
IEnumerable<string> files;
|
||||
|
||||
try
|
||||
{
|
||||
subDirectories = Directory.EnumerateDirectories(currentPath);
|
||||
files = Directory.EnumerateFiles(currentPath);
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
this.logger.LogWarning(exception, "Cannot access directory '{DirectoryPath}' while indexing.", currentPath);
|
||||
result.FailedFiles++;
|
||||
result.LastError = $"The directory '{currentPath}' could not be accessed.";
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var filePath in files)
|
||||
{
|
||||
FileInfo fileInfo;
|
||||
try
|
||||
{
|
||||
fileInfo = new FileInfo(filePath);
|
||||
if (!fileInfo.Exists)
|
||||
continue;
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
this.logger.LogWarning(exception, "Cannot inspect file '{FilePath}' while indexing.", filePath);
|
||||
result.FailedFiles++;
|
||||
result.LastError = $"The file '{filePath}' could not be inspected.";
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!this.IsSupportedRagFilePath(fileInfo.FullName))
|
||||
continue;
|
||||
|
||||
result.Files.Add(fileInfo);
|
||||
}
|
||||
|
||||
foreach (var subDirectory in subDirectories)
|
||||
pendingDirectories.Push(subDirectory);
|
||||
}
|
||||
}
|
||||
|
||||
private string TryGetRelativePath(IDataSource dataSource, FileInfo file) => dataSource switch
|
||||
{
|
||||
DataSourceLocalDirectory localDirectory => Path.GetRelativePath(localDirectory.Path, file.FullName),
|
||||
_ => file.Name
|
||||
};
|
||||
|
||||
private static string NormalizeChunkSegment(string input)
|
||||
{
|
||||
return input
|
||||
.Replace("\r\n", "\n", StringComparison.Ordinal)
|
||||
.Replace('\r', '\n')
|
||||
.Trim();
|
||||
}
|
||||
|
||||
private bool IsImageFilePath(string filePath)
|
||||
{
|
||||
return FileTypes.IsAllowedPath(filePath, FileTypes.IMAGE);
|
||||
}
|
||||
|
||||
private bool IsSupportedRagFilePath(string filePath)
|
||||
{
|
||||
var extension = Path.GetExtension(filePath).TrimStart('.');
|
||||
if (SKIPPED_RAG_FILE_EXTENSIONS.Contains(extension, StringComparer.OrdinalIgnoreCase))
|
||||
return false;
|
||||
|
||||
return FileTypes.IsAllowedPath(filePath, FileTypes.DOCUMENT, FileTypes.IMAGE)
|
||||
|| ADDITIONAL_RAG_FILE_EXTENSIONS.Contains(extension, StringComparer.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private string BuildImageIndexText(string filePath)
|
||||
{
|
||||
var fileName = Path.GetFileName(filePath);
|
||||
var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(filePath);
|
||||
var extension = Path.GetExtension(filePath).TrimStart('.');
|
||||
var normalizedName = fileNameWithoutExtension
|
||||
.Replace('_', ' ')
|
||||
.Replace('-', ' ')
|
||||
.Trim();
|
||||
|
||||
return $$"""
|
||||
Image asset
|
||||
File name: {{fileName}}
|
||||
Type: {{extension}}
|
||||
Search terms: {{normalizedName}}
|
||||
Path: {{filePath}}
|
||||
Note: The current RAG embedding pipeline stores image files by metadata only. Visual content is not OCRed or captioned yet.
|
||||
""";
|
||||
}
|
||||
|
||||
private string BuildEmbeddingSignature(EmbeddingProvider embeddingProvider)
|
||||
{
|
||||
return string.Join('|',
|
||||
embeddingProvider.Id,
|
||||
embeddingProvider.UsedLLMProvider,
|
||||
embeddingProvider.Model.Id,
|
||||
embeddingProvider.Host,
|
||||
embeddingProvider.Hostname,
|
||||
embeddingProvider.TokenizerPath);
|
||||
}
|
||||
|
||||
private string BuildFingerprint(FileInfo file)
|
||||
{
|
||||
var fingerprintSource = $"{file.FullName}|{file.Length}|{file.LastWriteTimeUtc.Ticks}";
|
||||
var bytes = SHA256.HashData(Encoding.UTF8.GetBytes(fingerprintSource));
|
||||
return Convert.ToHexString(bytes);
|
||||
}
|
||||
|
||||
private string GetCollectionName(string dataSourceId)
|
||||
{
|
||||
var safeId = dataSourceId
|
||||
.ToLowerInvariant()
|
||||
.Replace("-", string.Empty, StringComparison.Ordinal);
|
||||
|
||||
return $"rag_{safeId}";
|
||||
}
|
||||
|
||||
private string CreatePointId(string dataSourceId, string fingerprint, int chunkIndex)
|
||||
{
|
||||
var source = $"{dataSourceId}:{fingerprint}:{chunkIndex}";
|
||||
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(source));
|
||||
var guidBytes = hash[..16].ToArray();
|
||||
|
||||
guidBytes[6] = (byte)((guidBytes[6] & 0x0F) | 0x40);
|
||||
guidBytes[8] = (byte)((guidBytes[8] & 0x3F) | 0x80);
|
||||
|
||||
return new Guid(guidBytes).ToString();
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,89 @@
|
||||
using System.Text.Json;
|
||||
|
||||
using AIStudio.Settings;
|
||||
|
||||
namespace AIStudio.Tools.Services;
|
||||
|
||||
public sealed partial class DataSourceEmbeddingService
|
||||
{
|
||||
private const string STATE_FILENAME = "rag-embedding-state.json";
|
||||
|
||||
private readonly JsonSerializerOptions jsonOptions = new()
|
||||
{
|
||||
WriteIndented = true,
|
||||
};
|
||||
|
||||
private async Task EnsureStateLoadedAsync(CancellationToken token)
|
||||
{
|
||||
if (this.stateLoaded)
|
||||
return;
|
||||
|
||||
await this.stateLock.WaitAsync(token);
|
||||
try
|
||||
{
|
||||
if (this.stateLoaded)
|
||||
return;
|
||||
|
||||
var statePath = this.GetStatePath();
|
||||
if (!string.IsNullOrWhiteSpace(statePath) && File.Exists(statePath))
|
||||
{
|
||||
var json = await File.ReadAllTextAsync(statePath, token);
|
||||
var persistedState = JsonSerializer.Deserialize<PersistedEmbeddingState>(json, this.jsonOptions);
|
||||
this.manifests = persistedState?.DataSources ?? new Dictionary<string, DataSourceEmbeddingManifest>(StringComparer.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
this.stateLoaded = true;
|
||||
}
|
||||
finally
|
||||
{
|
||||
this.stateLock.Release();
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<DataSourceEmbeddingManifest> GetManifestAsync(string dataSourceId, CancellationToken token)
|
||||
{
|
||||
await this.EnsureStateLoadedAsync(token);
|
||||
if (this.manifests.TryGetValue(dataSourceId, out var manifest))
|
||||
return manifest;
|
||||
|
||||
manifest = new DataSourceEmbeddingManifest();
|
||||
this.manifests[dataSourceId] = manifest;
|
||||
return manifest;
|
||||
}
|
||||
|
||||
private async Task SaveStateAsync(CancellationToken token)
|
||||
{
|
||||
var statePath = this.GetStatePath();
|
||||
if (string.IsNullOrWhiteSpace(statePath))
|
||||
return;
|
||||
|
||||
var directory = Path.GetDirectoryName(statePath);
|
||||
if (!string.IsNullOrWhiteSpace(directory))
|
||||
Directory.CreateDirectory(directory);
|
||||
|
||||
var persistedState = new PersistedEmbeddingState
|
||||
{
|
||||
DataSources = this.manifests
|
||||
};
|
||||
|
||||
var json = JsonSerializer.Serialize(persistedState, this.jsonOptions);
|
||||
await File.WriteAllTextAsync(statePath, json, token);
|
||||
}
|
||||
|
||||
private async Task ResetPersistedStateAsync(string dataSourceId)
|
||||
{
|
||||
await this.EnsureStateLoadedAsync(CancellationToken.None);
|
||||
this.manifests.Remove(dataSourceId);
|
||||
await this.DeleteCollectionAsync(this.GetCollectionName(dataSourceId));
|
||||
await this.SaveStateAsync(CancellationToken.None);
|
||||
this.logger.LogInformation("Reset persisted embedding state for data source '{DataSourceId}'.", dataSourceId);
|
||||
}
|
||||
|
||||
private string GetStatePath()
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(SettingsManager.ConfigDirectory))
|
||||
return string.Empty;
|
||||
|
||||
return Path.Combine(SettingsManager.ConfigDirectory, STATE_FILENAME);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,226 @@
|
||||
using System.Collections.Concurrent;
|
||||
using AIStudio.Settings;
|
||||
using AIStudio.Settings.DataModel;
|
||||
|
||||
namespace AIStudio.Tools.Services;
|
||||
|
||||
public sealed partial class DataSourceEmbeddingService
|
||||
{
|
||||
private const int WATCHER_DEBOUNCE_SECONDS = 2;
|
||||
|
||||
private readonly ConcurrentDictionary<string, DataSourceWatcherRegistration> watchers = new(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly Dictionary<string, CancellationTokenSource> watcherDebounceTokens = new(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly object watcherDebounceLock = new();
|
||||
|
||||
private void RefreshWatchers()
|
||||
{
|
||||
if (!this.settingsManager.ConfigurationData.DataSourceIndexing.AutomaticRefresh)
|
||||
{
|
||||
this.RemoveAllWatchers();
|
||||
return;
|
||||
}
|
||||
|
||||
var supportedSources = this.settingsManager.ConfigurationData.DataSources
|
||||
.Where(this.IsSupportedInternalDataSource)
|
||||
.ToDictionary(source => source.Id, StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var existingWatcherId in this.watchers.Keys.Except(supportedSources.Keys, StringComparer.OrdinalIgnoreCase).ToList())
|
||||
this.RemoveWatcher(existingWatcherId);
|
||||
|
||||
foreach (var dataSource in supportedSources.Values)
|
||||
this.EnsureWatcher(dataSource);
|
||||
}
|
||||
|
||||
private void EnsureWatcher(IDataSource dataSource)
|
||||
{
|
||||
if (!this.settingsManager.ConfigurationData.DataSourceIndexing.AutomaticRefresh)
|
||||
return;
|
||||
|
||||
var configuration = GetWatchConfiguration(dataSource);
|
||||
if (configuration is null)
|
||||
return;
|
||||
|
||||
if (this.watchers.TryGetValue(dataSource.Id, out var existingRegistration))
|
||||
{
|
||||
if (IsSameWatchConfiguration(existingRegistration.Configuration, configuration))
|
||||
return;
|
||||
|
||||
this.RemoveWatcher(dataSource.Id);
|
||||
}
|
||||
|
||||
var watcher = this.CreateWatcher(dataSource.Id, configuration);
|
||||
if (watcher is null)
|
||||
return;
|
||||
|
||||
if (!this.watchers.TryAdd(dataSource.Id, new DataSourceWatcherRegistration(watcher, configuration)))
|
||||
watcher.Dispose();
|
||||
}
|
||||
|
||||
private FileSystemWatcher? CreateWatcher(string dataSourceId, DataSourceWatcherConfiguration configuration)
|
||||
{
|
||||
try
|
||||
{
|
||||
var watcher = new FileSystemWatcher(configuration.RootPath)
|
||||
{
|
||||
Filter = configuration.Filter,
|
||||
IncludeSubdirectories = configuration.IncludeSubdirectories,
|
||||
NotifyFilter = NotifyFilters.FileName | NotifyFilters.DirectoryName | NotifyFilters.LastWrite | NotifyFilters.CreationTime | NotifyFilters.Size,
|
||||
};
|
||||
|
||||
watcher.Changed += (_, _) => this.OnWatchedDataSourceChanged(dataSourceId);
|
||||
watcher.Deleted += (_, _) => this.OnWatchedDataSourceChanged(dataSourceId);
|
||||
watcher.Created += (_, _) => this.OnWatchedDataSourceChanged(dataSourceId);
|
||||
watcher.Renamed += (_, _) => this.OnWatchedDataSourceChanged(dataSourceId);
|
||||
watcher.Error += (_, args) =>
|
||||
{
|
||||
this.logger.LogWarning(args.GetException(), "The file watcher for data source '{DataSourceId}' failed. Recreating it.", dataSourceId);
|
||||
this.RemoveWatcher(dataSourceId);
|
||||
this.EnsureWatcher(dataSourceId);
|
||||
this.OnWatchedDataSourceChanged(dataSourceId);
|
||||
};
|
||||
watcher.EnableRaisingEvents = true;
|
||||
return watcher;
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
this.logger.LogWarning(exception, "Failed to create file watcher for data source '{DataSourceId}' at '{RootPath}'.", dataSourceId, configuration.RootPath);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private void RemoveWatcher(string dataSourceId)
|
||||
{
|
||||
this.CancelPendingWatcherRefresh(dataSourceId);
|
||||
|
||||
if (this.watchers.TryRemove(dataSourceId, out var registration))
|
||||
registration.Watcher.Dispose();
|
||||
}
|
||||
|
||||
private void RemoveAllWatchers()
|
||||
{
|
||||
foreach (var watcherId in this.watchers.Keys.ToList())
|
||||
this.RemoveWatcher(watcherId);
|
||||
}
|
||||
|
||||
private void DisposeWatchers()
|
||||
{
|
||||
this.CancelAllPendingWatcherRefreshes();
|
||||
|
||||
foreach (var registration in this.watchers.Values)
|
||||
registration.Watcher.Dispose();
|
||||
|
||||
this.watchers.Clear();
|
||||
}
|
||||
|
||||
private void OnWatchedDataSourceChanged(string dataSourceId)
|
||||
{
|
||||
if (!this.settingsManager.ConfigurationData.DataSourceIndexing.AutomaticRefresh)
|
||||
return;
|
||||
|
||||
this.logger.LogDebug("Detected file system change for data source '{DataSourceId}'. Scheduling a debounced embedding run.", dataSourceId);
|
||||
var debounceToken = new CancellationTokenSource();
|
||||
|
||||
lock (this.watcherDebounceLock)
|
||||
{
|
||||
if (this.watcherDebounceTokens.Remove(dataSourceId, out var existingToken))
|
||||
existingToken.Cancel();
|
||||
|
||||
this.watcherDebounceTokens[dataSourceId] = debounceToken;
|
||||
}
|
||||
|
||||
_ = Task.Run(async () =>
|
||||
{
|
||||
try
|
||||
{
|
||||
await Task.Delay(TimeSpan.FromSeconds(WATCHER_DEBOUNCE_SECONDS), debounceToken.Token);
|
||||
if (!this.TryCompletePendingWatcherRefresh(dataSourceId, debounceToken))
|
||||
return;
|
||||
|
||||
var dataSource = this.settingsManager.ConfigurationData.DataSources
|
||||
.FirstOrDefault(source => source.Id.Equals(dataSourceId, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
if (dataSource is not null)
|
||||
{
|
||||
this.logger.LogInformation("Queueing data source '{DataSourceName}' ({DataSourceId}) after file system changes settled.", dataSource.Name, dataSource.Id);
|
||||
await this.QueueDataSourceAsync(dataSource);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
this.logger.LogWarning(exception, "Failed to queue watched data source '{DataSourceId}' after a file system change.", dataSourceId);
|
||||
}
|
||||
finally
|
||||
{
|
||||
debounceToken.Dispose();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private void EnsureWatcher(string dataSourceId)
|
||||
{
|
||||
var dataSource = this.settingsManager.ConfigurationData.DataSources
|
||||
.FirstOrDefault(source => source.Id.Equals(dataSourceId, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
if (dataSource is not null)
|
||||
this.EnsureWatcher(dataSource);
|
||||
}
|
||||
|
||||
private void CancelPendingWatcherRefresh(string dataSourceId)
|
||||
{
|
||||
lock (this.watcherDebounceLock)
|
||||
{
|
||||
if (this.watcherDebounceTokens.Remove(dataSourceId, out var token))
|
||||
token.Cancel();
|
||||
}
|
||||
}
|
||||
|
||||
private void CancelAllPendingWatcherRefreshes()
|
||||
{
|
||||
lock (this.watcherDebounceLock)
|
||||
{
|
||||
foreach (var token in this.watcherDebounceTokens.Values)
|
||||
token.Cancel();
|
||||
|
||||
this.watcherDebounceTokens.Clear();
|
||||
}
|
||||
}
|
||||
|
||||
private bool TryCompletePendingWatcherRefresh(string dataSourceId, CancellationTokenSource debounceToken)
|
||||
{
|
||||
lock (this.watcherDebounceLock)
|
||||
{
|
||||
if (!this.watcherDebounceTokens.TryGetValue(dataSourceId, out var currentToken) || !ReferenceEquals(currentToken, debounceToken))
|
||||
return false;
|
||||
|
||||
this.watcherDebounceTokens.Remove(dataSourceId);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
private static DataSourceWatcherConfiguration? GetWatchConfiguration(IDataSource dataSource) => dataSource switch
|
||||
{
|
||||
DataSourceLocalDirectory localDirectory when Directory.Exists(localDirectory.Path) => new DataSourceWatcherConfiguration(
|
||||
localDirectory.Path,
|
||||
"*.*",
|
||||
true),
|
||||
DataSourceLocalFile localFile when File.Exists(localFile.FilePath) && !string.IsNullOrWhiteSpace(Path.GetDirectoryName(localFile.FilePath)) => new DataSourceWatcherConfiguration(
|
||||
Path.GetDirectoryName(localFile.FilePath)!,
|
||||
Path.GetFileName(localFile.FilePath),
|
||||
false),
|
||||
_ => null,
|
||||
};
|
||||
|
||||
private static bool IsSameWatchConfiguration(DataSourceWatcherConfiguration left, DataSourceWatcherConfiguration right)
|
||||
{
|
||||
return left.IncludeSubdirectories == right.IncludeSubdirectories
|
||||
&& string.Equals(left.RootPath, right.RootPath, StringComparison.OrdinalIgnoreCase)
|
||||
&& string.Equals(left.Filter, right.Filter, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private sealed record DataSourceWatcherConfiguration(string RootPath, string Filter, bool IncludeSubdirectories);
|
||||
|
||||
private sealed record DataSourceWatcherRegistration(FileSystemWatcher Watcher, DataSourceWatcherConfiguration Configuration);
|
||||
}
|
||||
@ -1,7 +1,5 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Threading.Channels;
|
||||
|
||||
using AIStudio.Provider;
|
||||
@ -13,9 +11,8 @@ using AIStudio.Tools.Rust;
|
||||
|
||||
namespace AIStudio.Tools.Services;
|
||||
|
||||
public sealed class DataSourceEmbeddingService : BackgroundService
|
||||
public sealed partial class DataSourceEmbeddingService : BackgroundService
|
||||
{
|
||||
private const string STATE_FILENAME = "rag-embedding-state.json";
|
||||
private const int MAX_CHUNK_LENGTH = 3_200;
|
||||
private const int MIN_CHUNK_LENGTH = 800;
|
||||
private const int CHUNK_OVERLAP_LENGTH = 320;
|
||||
@ -28,12 +25,7 @@ public sealed class DataSourceEmbeddingService : BackgroundService
|
||||
private readonly Channel<string> queue = Channel.CreateUnbounded<string>();
|
||||
private readonly ConcurrentDictionary<string, byte> queuedIds = new(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly ConcurrentDictionary<string, DataSourceEmbeddingStatus> statuses = new(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly ConcurrentDictionary<string, FileSystemWatcher> watchers = new(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly SemaphoreSlim stateLock = new(1, 1);
|
||||
private readonly JsonSerializerOptions jsonOptions = new()
|
||||
{
|
||||
WriteIndented = true,
|
||||
};
|
||||
|
||||
private static string TB(string fallbackEN) => I18N.I.T(fallbackEN, typeof(DataSourceEmbeddingService).Namespace, nameof(DataSourceEmbeddingService));
|
||||
|
||||
@ -58,11 +50,7 @@ public sealed class DataSourceEmbeddingService : BackgroundService
|
||||
|
||||
public DataSourceEmbeddingOverview GetOverview()
|
||||
{
|
||||
var orderedStatuses = this.statuses.Values
|
||||
.OrderBy(status => status.SortOrder)
|
||||
.ThenBy(status => status.DataSourceName, StringComparer.OrdinalIgnoreCase)
|
||||
.ToList();
|
||||
|
||||
var orderedStatuses = this.GetStatuses();
|
||||
var activeStatus = orderedStatuses
|
||||
.FirstOrDefault(status => status.State is DataSourceEmbeddingState.QUEUED or DataSourceEmbeddingState.RUNNING);
|
||||
|
||||
@ -71,21 +59,19 @@ public sealed class DataSourceEmbeddingService : BackgroundService
|
||||
var total = Math.Max(activeStatus.TotalFiles, 1);
|
||||
return new(
|
||||
true,
|
||||
false,
|
||||
activeStatus.State,
|
||||
activeStatus.IndexedFiles,
|
||||
total,
|
||||
activeStatus.FailedFiles,
|
||||
$"{TB("Embedding")} {activeStatus.IndexedFiles}/{total}");
|
||||
activeStatus.FailedFiles);
|
||||
}
|
||||
|
||||
var failedStatus = orderedStatuses
|
||||
.FirstOrDefault(status => status.State is DataSourceEmbeddingState.FAILED || status.FailedFiles > 0);
|
||||
|
||||
if (failedStatus is not null)
|
||||
return new(true, true, DataSourceEmbeddingState.FAILED, failedStatus.IndexedFiles, failedStatus.TotalFiles, failedStatus.FailedFiles, TB("Embeddings"));
|
||||
return new(true, DataSourceEmbeddingState.FAILED, failedStatus.IndexedFiles, failedStatus.TotalFiles, failedStatus.FailedFiles);
|
||||
|
||||
return new(false, true, DataSourceEmbeddingState.COMPLETED, 0, 0, 0, TB("Embeddings"));
|
||||
return new(false, DataSourceEmbeddingState.COMPLETED, 0, 0, 0);
|
||||
}
|
||||
|
||||
public Task QueueAllInternalDataSourcesAsync()
|
||||
@ -99,6 +85,22 @@ public sealed class DataSourceEmbeddingService : BackgroundService
|
||||
return Task.WhenAll(tasks);
|
||||
}
|
||||
|
||||
public Task QueueAllInternalDataSourcesIfAutomaticRefreshAsync()
|
||||
{
|
||||
if (!this.settingsManager.ConfigurationData.DataSourceIndexing.AutomaticRefresh)
|
||||
{
|
||||
this.RefreshWatchers();
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
return this.QueueAllInternalDataSourcesAsync();
|
||||
}
|
||||
|
||||
public void RefreshAutomaticWatchers()
|
||||
{
|
||||
this.RefreshWatchers();
|
||||
}
|
||||
|
||||
public async Task QueueDataSourceAsync(IDataSource dataSource)
|
||||
{
|
||||
if (!this.IsSupportedInternalDataSource(dataSource))
|
||||
@ -106,23 +108,14 @@ public sealed class DataSourceEmbeddingService : BackgroundService
|
||||
|
||||
this.logger.LogInformation("Queueing data source '{DataSourceName}' ({DataSourceId}) for background embeddings.", dataSource.Name, dataSource.Id);
|
||||
this.RefreshWatchers();
|
||||
this.logger.LogDebug("Adding watcher for data source '{DataSourceName}' ({DataSourceId}).", dataSource.Name, dataSource.Id);
|
||||
|
||||
if (!this.statuses.TryGetValue(dataSource.Id, out var currentStatus) || currentStatus.State is not DataSourceEmbeddingState.RUNNING)
|
||||
{
|
||||
this.UpsertStatus(new DataSourceEmbeddingStatus(
|
||||
dataSource.Id,
|
||||
dataSource.Name,
|
||||
dataSource.Type,
|
||||
DataSourceEmbeddingState.QUEUED,
|
||||
currentStatus?.TotalFiles ?? 0,
|
||||
currentStatus?.IndexedFiles ?? 0,
|
||||
currentStatus?.FailedFiles ?? 0,
|
||||
string.Empty,
|
||||
string.Empty));
|
||||
}
|
||||
|
||||
this.UpsertStatus(this.CreateStatus(dataSource, DataSourceEmbeddingState.QUEUED, currentStatus?.TotalFiles ?? 0, currentStatus?.IndexedFiles ?? 0, currentStatus?.FailedFiles ?? 0));
|
||||
this.logger.LogDebug("Upserting status for data source '{DataSourceName}' ({DataSourceId}).", dataSource.Name, dataSource.Id);
|
||||
if (this.queuedIds.TryAdd(dataSource.Id, 0))
|
||||
await this.queue.Writer.WriteAsync(dataSource.Id);
|
||||
this.logger.LogDebug("Queued data source '{DataSourceName}' ({DataSourceId}).", dataSource.Name, dataSource.Id);
|
||||
}
|
||||
|
||||
public async Task RemoveDataSourceAsync(IDataSource dataSource)
|
||||
@ -169,10 +162,7 @@ public sealed class DataSourceEmbeddingService : BackgroundService
|
||||
|
||||
public override void Dispose()
|
||||
{
|
||||
foreach (var watcher in this.watchers.Values)
|
||||
watcher.Dispose();
|
||||
|
||||
this.watchers.Clear();
|
||||
this.DisposeWatchers();
|
||||
this.stateLock.Dispose();
|
||||
base.Dispose();
|
||||
}
|
||||
@ -193,11 +183,7 @@ public sealed class DataSourceEmbeddingService : BackgroundService
|
||||
return;
|
||||
}
|
||||
|
||||
var embeddingProvider = this.settingsManager.ConfigurationData.EmbeddingProviders.FirstOrDefault(provider =>
|
||||
dataSource is IInternalDataSource internalDataSource &&
|
||||
provider.Id.Equals(internalDataSource.EmbeddingId, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
if (embeddingProvider == default || embeddingProvider.UsedLLMProvider is LLMProviders.NONE)
|
||||
if (!this.TryResolveEmbeddingProvider(dataSource, out var embeddingProvider))
|
||||
{
|
||||
this.UpsertStatus(this.GetFallbackStatus(dataSource, "The selected embedding provider is not available."));
|
||||
return;
|
||||
@ -210,29 +196,11 @@ public sealed class DataSourceEmbeddingService : BackgroundService
|
||||
dataSource.Name,
|
||||
dataSource.Id);
|
||||
|
||||
var embeddingSignature = this.BuildEmbeddingSignature(embeddingProvider);
|
||||
var manifest = await this.GetManifestAsync(dataSource.Id, token);
|
||||
// A provider/model change invalidates the existing collection because the vectors are no longer comparable.
|
||||
if (!string.Equals(manifest.EmbeddingSignature, embeddingSignature, StringComparison.Ordinal))
|
||||
{
|
||||
this.logger.LogInformation(
|
||||
"Embedding configuration changed for data source '{DataSourceName}' ({DataSourceId}). Resetting persisted state and collection '{CollectionName}'.",
|
||||
dataSource.Name,
|
||||
dataSource.Id,
|
||||
this.GetCollectionName(dataSource.Id));
|
||||
await this.ResetPersistedStateAsync(dataSource.Id);
|
||||
manifest = await this.GetManifestAsync(dataSource.Id, token);
|
||||
manifest.EmbeddingProviderId = embeddingProvider.Id;
|
||||
manifest.EmbeddingSignature = embeddingSignature;
|
||||
await this.SaveStateAsync(token);
|
||||
}
|
||||
|
||||
var collectionName = this.GetCollectionName(dataSource.Id);
|
||||
var manifest = await this.EnsureCompatibleManifestAsync(dataSource, embeddingProvider, collectionName, token);
|
||||
var inputFiles = this.GetInputFiles(dataSource);
|
||||
var indexedFiles = inputFiles.Files;
|
||||
var totalFiles = indexedFiles.Count + inputFiles.FailedFiles;
|
||||
var existingPaths = indexedFiles
|
||||
.Select(file => file.FullName)
|
||||
.ToHashSet(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
this.logger.LogInformation(
|
||||
"Prepared data source '{DataSourceName}' ({DataSourceId}) for embedding. AccessibleFiles={AccessibleFiles}, FailedFiles={FailedFiles}, Collection='{CollectionName}'.",
|
||||
@ -240,32 +208,18 @@ public sealed class DataSourceEmbeddingService : BackgroundService
|
||||
dataSource.Id,
|
||||
indexedFiles.Count,
|
||||
inputFiles.FailedFiles,
|
||||
this.GetCollectionName(dataSource.Id));
|
||||
|
||||
foreach (var removedFilePath in manifest.Files.Keys.Except(existingPaths, StringComparer.OrdinalIgnoreCase).ToList())
|
||||
{
|
||||
await this.DeleteFilePointsAsync(this.GetCollectionName(dataSource.Id), removedFilePath, token);
|
||||
manifest.Files.Remove(removedFilePath);
|
||||
this.logger.LogInformation(
|
||||
"Removed stale embeddings for deleted file '{FilePath}' from data source '{DataSourceName}' ({DataSourceId}).",
|
||||
removedFilePath,
|
||||
dataSource.Name,
|
||||
dataSource.Id);
|
||||
}
|
||||
collectionName);
|
||||
|
||||
await this.RemoveMissingFileEmbeddingsAsync(dataSource, collectionName, manifest, indexedFiles, token);
|
||||
await this.SaveStateAsync(token);
|
||||
|
||||
// Keep the UI status in sync with the long-running file loop below.
|
||||
this.UpsertStatus(new DataSourceEmbeddingStatus(
|
||||
dataSource.Id,
|
||||
dataSource.Name,
|
||||
dataSource.Type,
|
||||
this.UpsertStatus(this.CreateStatus(
|
||||
dataSource,
|
||||
DataSourceEmbeddingState.RUNNING,
|
||||
totalFiles,
|
||||
0,
|
||||
inputFiles.FailedFiles,
|
||||
string.Empty,
|
||||
inputFiles.LastError));
|
||||
lastError: inputFiles.LastError));
|
||||
|
||||
var provider = embeddingProvider.CreateProvider();
|
||||
var skippedFiles = 0;
|
||||
@ -287,29 +241,11 @@ public sealed class DataSourceEmbeddingService : BackgroundService
|
||||
dataSource.Name,
|
||||
dataSource.Id);
|
||||
skippedFiles++;
|
||||
this.UpsertStatus(new DataSourceEmbeddingStatus(
|
||||
dataSource.Id,
|
||||
dataSource.Name,
|
||||
dataSource.Type,
|
||||
DataSourceEmbeddingState.RUNNING,
|
||||
totalFiles,
|
||||
skippedFiles + completedFiles,
|
||||
failedFiles,
|
||||
string.Empty,
|
||||
lastError));
|
||||
this.UpsertStatus(this.CreateStatus(dataSource, DataSourceEmbeddingState.RUNNING, totalFiles, skippedFiles + completedFiles, failedFiles, lastError: lastError));
|
||||
continue;
|
||||
}
|
||||
|
||||
this.UpsertStatus(new DataSourceEmbeddingStatus(
|
||||
dataSource.Id,
|
||||
dataSource.Name,
|
||||
dataSource.Type,
|
||||
DataSourceEmbeddingState.RUNNING,
|
||||
totalFiles,
|
||||
skippedFiles + completedFiles,
|
||||
failedFiles,
|
||||
file.Name,
|
||||
lastError));
|
||||
this.UpsertStatus(this.CreateStatus(dataSource, DataSourceEmbeddingState.RUNNING, totalFiles, skippedFiles + completedFiles, failedFiles, file.Name, lastError));
|
||||
|
||||
try
|
||||
{
|
||||
@ -343,37 +279,15 @@ public sealed class DataSourceEmbeddingService : BackgroundService
|
||||
failedFiles++;
|
||||
lastError = exception.Message;
|
||||
manifest.Files.Remove(file.FullName);
|
||||
await this.DeleteFilePointsAsync(this.GetCollectionName(dataSource.Id), file.FullName, token);
|
||||
await this.DeleteFilePointsAsync(collectionName, file.FullName, token);
|
||||
await this.SaveStateAsync(token);
|
||||
|
||||
this.logger.LogWarning(exception, "Failed to embed file '{FilePath}' for data source '{DataSourceName}'.", file.FullName, dataSource.Name);
|
||||
this.UpsertStatus(new DataSourceEmbeddingStatus(
|
||||
dataSource.Id,
|
||||
dataSource.Name,
|
||||
dataSource.Type,
|
||||
DataSourceEmbeddingState.RUNNING,
|
||||
totalFiles,
|
||||
skippedFiles + completedFiles,
|
||||
failedFiles,
|
||||
file.Name,
|
||||
exception.Message));
|
||||
this.UpsertStatus(this.CreateStatus(dataSource, DataSourceEmbeddingState.RUNNING, totalFiles, skippedFiles + completedFiles, failedFiles, file.Name, exception.Message));
|
||||
}
|
||||
}
|
||||
|
||||
this.UpsertStatus(new DataSourceEmbeddingStatus(
|
||||
dataSource.Id,
|
||||
dataSource.Name,
|
||||
dataSource.Type,
|
||||
failedFiles > 0 ? DataSourceEmbeddingState.FAILED : DataSourceEmbeddingState.COMPLETED,
|
||||
totalFiles,
|
||||
skippedFiles + completedFiles,
|
||||
failedFiles,
|
||||
string.Empty,
|
||||
failedFiles > 0
|
||||
? string.IsNullOrWhiteSpace(lastError)
|
||||
? "Some files could not be embedded. See the logs for details."
|
||||
: lastError
|
||||
: string.Empty));
|
||||
this.UpsertStatus(this.CreateCompletedStatus(dataSource, totalFiles, skippedFiles + completedFiles, failedFiles, lastError));
|
||||
this.logger.LogInformation(
|
||||
"Finished background embeddings for data source '{DataSourceName}' ({DataSourceId}). Indexed={IndexedFiles}, Failed={FailedFiles}, Total={TotalFiles}.",
|
||||
dataSource.Name,
|
||||
@ -459,7 +373,6 @@ public sealed class DataSourceEmbeddingService : BackgroundService
|
||||
|
||||
if (manifest.VectorSize == 0)
|
||||
{
|
||||
// The first successful batch defines the vector size for the whole collection.
|
||||
manifest.VectorSize = vectorSize;
|
||||
await this.EnsureCollectionExistsAsync(collectionName, vectorSize, token);
|
||||
await this.SaveStateAsync(token);
|
||||
@ -490,53 +403,6 @@ public sealed class DataSourceEmbeddingService : BackgroundService
|
||||
batch.Clear();
|
||||
}
|
||||
|
||||
private async IAsyncEnumerable<string> StreamEmbeddingChunksAsync(string filePath, [System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken token)
|
||||
{
|
||||
if (this.IsImageFilePath(filePath))
|
||||
{
|
||||
yield return this.BuildImageIndexText(filePath);
|
||||
yield break;
|
||||
}
|
||||
|
||||
var currentChunk = new StringBuilder();
|
||||
|
||||
await foreach (var segment in this.rustService.StreamArbitraryFileData(filePath, token: token))
|
||||
{
|
||||
var normalized = NormalizeChunkSegment(segment);
|
||||
if (string.IsNullOrWhiteSpace(normalized))
|
||||
continue;
|
||||
|
||||
if (currentChunk.Length > 0 && currentChunk.Length + normalized.Length + Environment.NewLine.Length > MAX_CHUNK_LENGTH)
|
||||
{
|
||||
if (currentChunk.Length >= MIN_CHUNK_LENGTH)
|
||||
{
|
||||
var chunk = currentChunk.ToString().Trim();
|
||||
if (!string.IsNullOrWhiteSpace(chunk))
|
||||
yield return chunk;
|
||||
|
||||
var overlap = chunk.Length > CHUNK_OVERLAP_LENGTH
|
||||
? chunk[^CHUNK_OVERLAP_LENGTH..]
|
||||
: chunk;
|
||||
|
||||
currentChunk.Clear();
|
||||
currentChunk.Append(overlap);
|
||||
currentChunk.AppendLine();
|
||||
}
|
||||
else
|
||||
{
|
||||
currentChunk.AppendLine();
|
||||
}
|
||||
}
|
||||
|
||||
currentChunk.Append(normalized);
|
||||
currentChunk.AppendLine();
|
||||
}
|
||||
|
||||
var finalChunk = currentChunk.ToString().Trim();
|
||||
if (!string.IsNullOrWhiteSpace(finalChunk))
|
||||
yield return finalChunk;
|
||||
}
|
||||
|
||||
private async Task EnsureCollectionExistsAsync(string collectionName, int vectorSize, CancellationToken token)
|
||||
{
|
||||
await this.embeddingStore.EnsureEmbeddingStoreExists(collectionName, vectorSize, token);
|
||||
@ -581,72 +447,6 @@ public sealed class DataSourceEmbeddingService : BackgroundService
|
||||
await this.embeddingStore.DeleteEmbeddingStore(collectionName, CancellationToken.None);
|
||||
}
|
||||
|
||||
private async Task EnsureStateLoadedAsync(CancellationToken token)
|
||||
{
|
||||
if (this.stateLoaded)
|
||||
return;
|
||||
|
||||
await this.stateLock.WaitAsync(token);
|
||||
try
|
||||
{
|
||||
if (this.stateLoaded)
|
||||
return;
|
||||
|
||||
var statePath = this.GetStatePath();
|
||||
if (!string.IsNullOrWhiteSpace(statePath) && File.Exists(statePath))
|
||||
{
|
||||
var json = await File.ReadAllTextAsync(statePath, token);
|
||||
var persistedState = JsonSerializer.Deserialize<PersistedEmbeddingState>(json, this.jsonOptions);
|
||||
this.manifests = persistedState?.DataSources ?? new Dictionary<string, DataSourceEmbeddingManifest>(StringComparer.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
this.stateLoaded = true;
|
||||
}
|
||||
finally
|
||||
{
|
||||
this.stateLock.Release();
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<DataSourceEmbeddingManifest> GetManifestAsync(string dataSourceId, CancellationToken token)
|
||||
{
|
||||
await this.EnsureStateLoadedAsync(token);
|
||||
if (this.manifests.TryGetValue(dataSourceId, out var manifest))
|
||||
return manifest;
|
||||
|
||||
manifest = new DataSourceEmbeddingManifest();
|
||||
this.manifests[dataSourceId] = manifest;
|
||||
return manifest;
|
||||
}
|
||||
|
||||
private async Task SaveStateAsync(CancellationToken token)
|
||||
{
|
||||
var statePath = this.GetStatePath();
|
||||
if (string.IsNullOrWhiteSpace(statePath))
|
||||
return;
|
||||
|
||||
var directory = Path.GetDirectoryName(statePath);
|
||||
if (!string.IsNullOrWhiteSpace(directory))
|
||||
Directory.CreateDirectory(directory);
|
||||
|
||||
var persistedState = new PersistedEmbeddingState
|
||||
{
|
||||
DataSources = this.manifests
|
||||
};
|
||||
|
||||
var json = JsonSerializer.Serialize(persistedState, this.jsonOptions);
|
||||
await File.WriteAllTextAsync(statePath, json, token);
|
||||
}
|
||||
|
||||
private async Task ResetPersistedStateAsync(string dataSourceId)
|
||||
{
|
||||
await this.EnsureStateLoadedAsync(CancellationToken.None);
|
||||
this.manifests.Remove(dataSourceId);
|
||||
await this.DeleteCollectionAsync(this.GetCollectionName(dataSourceId));
|
||||
await this.SaveStateAsync(CancellationToken.None);
|
||||
this.logger.LogInformation("Reset persisted embedding state for data source '{DataSourceId}'.", dataSourceId);
|
||||
}
|
||||
|
||||
private async Task WaitForInitialSettingsAndBootstrapAsync(CancellationToken token)
|
||||
{
|
||||
while (!token.IsCancellationRequested)
|
||||
@ -663,280 +463,8 @@ public sealed class DataSourceEmbeddingService : BackgroundService
|
||||
|
||||
token.ThrowIfCancellationRequested();
|
||||
|
||||
this.logger.LogInformation("Embedding background service is ready. Queueing all internal data sources.");
|
||||
await this.QueueAllInternalDataSourcesAsync();
|
||||
}
|
||||
|
||||
private void RefreshWatchers()
|
||||
{
|
||||
var supportedSources = this.settingsManager.ConfigurationData.DataSources
|
||||
.Where(this.IsSupportedInternalDataSource)
|
||||
.ToDictionary(source => source.Id, StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var existingWatcherId in this.watchers.Keys.Except(supportedSources.Keys, StringComparer.OrdinalIgnoreCase).ToList())
|
||||
this.RemoveWatcher(existingWatcherId);
|
||||
|
||||
foreach (var dataSource in supportedSources.Values)
|
||||
this.EnsureWatcher(dataSource);
|
||||
}
|
||||
|
||||
private void EnsureWatcher(IDataSource dataSource)
|
||||
{
|
||||
if (this.watchers.TryGetValue(dataSource.Id, out var existingWatcher))
|
||||
{
|
||||
var existingTarget = $"{existingWatcher.Path}|{existingWatcher.Filter}|{existingWatcher.IncludeSubdirectories}";
|
||||
var desiredTarget = this.GetWatcherTarget(dataSource);
|
||||
if (string.Equals(existingTarget, desiredTarget, StringComparison.OrdinalIgnoreCase))
|
||||
return;
|
||||
|
||||
this.RemoveWatcher(dataSource.Id);
|
||||
}
|
||||
|
||||
var watcher = this.CreateWatcher(dataSource);
|
||||
if (watcher is null)
|
||||
return;
|
||||
|
||||
if (!this.watchers.TryAdd(dataSource.Id, watcher))
|
||||
watcher.Dispose();
|
||||
}
|
||||
|
||||
private FileSystemWatcher? CreateWatcher(IDataSource dataSource)
|
||||
{
|
||||
FileSystemWatcher? watcher = dataSource switch
|
||||
{
|
||||
DataSourceLocalDirectory localDirectory when Directory.Exists(localDirectory.Path) => new FileSystemWatcher(localDirectory.Path)
|
||||
{
|
||||
IncludeSubdirectories = true,
|
||||
NotifyFilter = NotifyFilters.FileName | NotifyFilters.DirectoryName | NotifyFilters.LastWrite | NotifyFilters.CreationTime | NotifyFilters.Size,
|
||||
},
|
||||
DataSourceLocalFile localFile when File.Exists(localFile.FilePath) && !string.IsNullOrWhiteSpace(Path.GetDirectoryName(localFile.FilePath)) => new FileSystemWatcher(Path.GetDirectoryName(localFile.FilePath)!)
|
||||
{
|
||||
Filter = Path.GetFileName(localFile.FilePath),
|
||||
NotifyFilter = NotifyFilters.FileName | NotifyFilters.LastWrite | NotifyFilters.CreationTime | NotifyFilters.Size,
|
||||
},
|
||||
_ => null,
|
||||
};
|
||||
|
||||
if (watcher is null)
|
||||
return null;
|
||||
|
||||
FileSystemEventHandler onChanged = (_, _) => this.OnWatchedDataSourceChanged(dataSource.Id);
|
||||
RenamedEventHandler onRenamed = (_, _) => this.OnWatchedDataSourceChanged(dataSource.Id);
|
||||
ErrorEventHandler onError = (_, errorArgs) =>
|
||||
{
|
||||
this.logger.LogWarning(errorArgs.GetException(), "The file watcher for data source '{DataSourceId}' failed. Recreating it.", dataSource.Id);
|
||||
this.RemoveWatcher(dataSource.Id);
|
||||
this.EnsureWatcher(dataSource);
|
||||
this.OnWatchedDataSourceChanged(dataSource.Id);
|
||||
};
|
||||
|
||||
watcher.Created += onChanged;
|
||||
watcher.Changed += onChanged;
|
||||
watcher.Deleted += onChanged;
|
||||
watcher.Renamed += onRenamed;
|
||||
watcher.Error += onError;
|
||||
watcher.EnableRaisingEvents = true;
|
||||
return watcher;
|
||||
}
|
||||
|
||||
private string GetWatcherTarget(IDataSource dataSource) => dataSource switch
|
||||
{
|
||||
DataSourceLocalDirectory localDirectory => $"{localDirectory.Path}|*.*|True",
|
||||
DataSourceLocalFile localFile => $"{Path.GetDirectoryName(localFile.FilePath)}|{Path.GetFileName(localFile.FilePath)}|False",
|
||||
_ => string.Empty
|
||||
};
|
||||
|
||||
private void RemoveWatcher(string dataSourceId)
|
||||
{
|
||||
if (this.watchers.TryRemove(dataSourceId, out var watcher))
|
||||
watcher.Dispose();
|
||||
}
|
||||
|
||||
private void OnWatchedDataSourceChanged(string dataSourceId)
|
||||
{
|
||||
this.logger.LogInformation("Detected file system change for data source '{DataSourceId}'. Queueing a fresh embedding run.", dataSourceId);
|
||||
_ = Task.Run(async () =>
|
||||
{
|
||||
try
|
||||
{
|
||||
var dataSource = this.settingsManager.ConfigurationData.DataSources
|
||||
.FirstOrDefault(source => source.Id.Equals(dataSourceId, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
if (dataSource is not null)
|
||||
await this.QueueDataSourceAsync(dataSource);
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
this.logger.LogWarning(exception, "Failed to queue watched data source '{DataSourceId}' after a file system change.", dataSourceId);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private string GetStatePath()
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(SettingsManager.ConfigDirectory))
|
||||
return string.Empty;
|
||||
|
||||
return Path.Combine(SettingsManager.ConfigDirectory, STATE_FILENAME);
|
||||
}
|
||||
|
||||
private FileEnumerationResult GetInputFiles(IDataSource dataSource)
|
||||
{
|
||||
var result = new FileEnumerationResult();
|
||||
|
||||
switch (dataSource)
|
||||
{
|
||||
case DataSourceLocalFile localFile when File.Exists(localFile.FilePath):
|
||||
result.Files.Add(new FileInfo(localFile.FilePath));
|
||||
return result;
|
||||
|
||||
case DataSourceLocalDirectory localDirectory when Directory.Exists(localDirectory.Path):
|
||||
this.EnumerateAccessibleFiles(localDirectory.Path, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
switch (dataSource)
|
||||
{
|
||||
case DataSourceLocalFile localFile:
|
||||
result.FailedFiles = 1;
|
||||
result.LastError = $"The selected file '{localFile.FilePath}' does not exist.";
|
||||
break;
|
||||
|
||||
case DataSourceLocalDirectory localDirectory:
|
||||
result.FailedFiles = 1;
|
||||
result.LastError = $"The selected directory '{localDirectory.Path}' does not exist.";
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private void EnumerateAccessibleFiles(string rootPath, FileEnumerationResult result)
|
||||
{
|
||||
var pendingDirectories = new Stack<string>();
|
||||
pendingDirectories.Push(rootPath);
|
||||
|
||||
while (pendingDirectories.Count > 0)
|
||||
{
|
||||
var currentPath = pendingDirectories.Pop();
|
||||
IEnumerable<string> subDirectories;
|
||||
IEnumerable<string> files;
|
||||
|
||||
try
|
||||
{
|
||||
subDirectories = Directory.EnumerateDirectories(currentPath);
|
||||
files = Directory.EnumerateFiles(currentPath);
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
this.logger.LogWarning(exception, "Cannot access directory '{DirectoryPath}' while indexing.", currentPath);
|
||||
result.FailedFiles++;
|
||||
result.LastError = $"The directory '{currentPath}' could not be accessed.";
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var filePath in files)
|
||||
{
|
||||
FileInfo fileInfo;
|
||||
try
|
||||
{
|
||||
fileInfo = new FileInfo(filePath);
|
||||
if (!fileInfo.Exists)
|
||||
continue;
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
this.logger.LogWarning(exception, "Cannot inspect file '{FilePath}' while indexing.", filePath);
|
||||
result.FailedFiles++;
|
||||
result.LastError = $"The file '{filePath}' could not be inspected.";
|
||||
continue;
|
||||
}
|
||||
|
||||
result.Files.Add(fileInfo);
|
||||
}
|
||||
|
||||
foreach (var subDirectory in subDirectories)
|
||||
pendingDirectories.Push(subDirectory);
|
||||
}
|
||||
}
|
||||
|
||||
private string TryGetRelativePath(IDataSource dataSource, FileInfo file) => dataSource switch
|
||||
{
|
||||
DataSourceLocalDirectory localDirectory => Path.GetRelativePath(localDirectory.Path, file.FullName),
|
||||
_ => file.Name
|
||||
};
|
||||
|
||||
private static string NormalizeChunkSegment(string input)
|
||||
{
|
||||
return input
|
||||
.Replace("\r\n", "\n", StringComparison.Ordinal)
|
||||
.Replace('\r', '\n')
|
||||
.Trim();
|
||||
}
|
||||
|
||||
private bool IsImageFilePath(string filePath)
|
||||
{
|
||||
return FileTypes.IsAllowedPath(filePath, FileTypes.IMAGE);
|
||||
}
|
||||
|
||||
private string BuildImageIndexText(string filePath)
|
||||
{
|
||||
var fileName = Path.GetFileName(filePath);
|
||||
var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(filePath);
|
||||
var extension = Path.GetExtension(filePath).TrimStart('.');
|
||||
var normalizedName = fileNameWithoutExtension
|
||||
.Replace('_', ' ')
|
||||
.Replace('-', ' ')
|
||||
.Trim();
|
||||
|
||||
return $$"""
|
||||
Image asset
|
||||
File name: {{fileName}}
|
||||
Type: {{extension}}
|
||||
Search terms: {{normalizedName}}
|
||||
Path: {{filePath}}
|
||||
Note: The current RAG embedding pipeline stores image files by metadata only. Visual content is not OCRed or captioned yet.
|
||||
""";
|
||||
}
|
||||
|
||||
private string BuildEmbeddingSignature(EmbeddingProvider embeddingProvider)
|
||||
{
|
||||
return string.Join('|',
|
||||
embeddingProvider.Id,
|
||||
embeddingProvider.UsedLLMProvider,
|
||||
embeddingProvider.Model.Id,
|
||||
embeddingProvider.Host,
|
||||
embeddingProvider.Hostname,
|
||||
embeddingProvider.TokenizerPath);
|
||||
}
|
||||
|
||||
private string BuildFingerprint(FileInfo file)
|
||||
{
|
||||
var fingerprintSource = $"{file.FullName}|{file.Length}|{file.LastWriteTimeUtc.Ticks}";
|
||||
var bytes = SHA256.HashData(Encoding.UTF8.GetBytes(fingerprintSource));
|
||||
return Convert.ToHexString(bytes);
|
||||
}
|
||||
|
||||
private string GetCollectionName(string dataSourceId)
|
||||
{
|
||||
var safeId = dataSourceId
|
||||
.ToLowerInvariant()
|
||||
.Replace("-", string.Empty, StringComparison.Ordinal);
|
||||
|
||||
return $"rag_{safeId}";
|
||||
}
|
||||
|
||||
private string CreatePointId(string dataSourceId, string fingerprint, int chunkIndex)
|
||||
{
|
||||
var source = $"{dataSourceId}:{fingerprint}:{chunkIndex}";
|
||||
var hash = SHA256.HashData(Encoding.UTF8.GetBytes(source));
|
||||
var guidBytes = hash[..16].ToArray();
|
||||
|
||||
// Mark the bytes as an RFC 4122 version 4 UUID so Qdrant accepts the ID format.
|
||||
guidBytes[6] = (byte)((guidBytes[6] & 0x0F) | 0x40);
|
||||
guidBytes[8] = (byte)((guidBytes[8] & 0x3F) | 0x80);
|
||||
|
||||
return new Guid(guidBytes).ToString();
|
||||
this.logger.LogInformation("Embedding background service is ready. Checking whether automatic data source refresh is enabled.");
|
||||
await this.QueueAllInternalDataSourcesIfAutomaticRefreshAsync();
|
||||
}
|
||||
|
||||
private bool IsSupportedInternalDataSource(IDataSource dataSource)
|
||||
@ -944,18 +472,104 @@ public sealed class DataSourceEmbeddingService : BackgroundService
|
||||
return dataSource is DataSourceLocalDirectory or DataSourceLocalFile;
|
||||
}
|
||||
|
||||
private DataSourceEmbeddingStatus GetFallbackStatus(IDataSource dataSource, string errorMessage)
|
||||
private bool TryResolveEmbeddingProvider(IDataSource dataSource, [NotNullWhen(true)] out EmbeddingProvider? embeddingProvider)
|
||||
{
|
||||
embeddingProvider = this.settingsManager.ConfigurationData.EmbeddingProviders.FirstOrDefault(provider =>
|
||||
dataSource is IInternalDataSource internalDataSource &&
|
||||
provider.Id.Equals(internalDataSource.EmbeddingId, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
return embeddingProvider != default && embeddingProvider.UsedLLMProvider is not LLMProviders.NONE;
|
||||
}
|
||||
|
||||
private async Task<DataSourceEmbeddingManifest> EnsureCompatibleManifestAsync(IDataSource dataSource, EmbeddingProvider embeddingProvider, string collectionName, CancellationToken token)
|
||||
{
|
||||
var embeddingSignature = this.BuildEmbeddingSignature(embeddingProvider);
|
||||
var manifest = await this.GetManifestAsync(dataSource.Id, token);
|
||||
|
||||
if (!string.Equals(manifest.EmbeddingSignature, embeddingSignature, StringComparison.Ordinal))
|
||||
{
|
||||
this.logger.LogInformation(
|
||||
"Embedding configuration changed for data source '{DataSourceName}' ({DataSourceId}). Resetting persisted state and collection '{CollectionName}'.",
|
||||
dataSource.Name,
|
||||
dataSource.Id,
|
||||
collectionName);
|
||||
await this.ResetPersistedStateAsync(dataSource.Id);
|
||||
manifest = await this.GetManifestAsync(dataSource.Id, token);
|
||||
}
|
||||
|
||||
if (!string.Equals(manifest.EmbeddingProviderId, embeddingProvider.Id, StringComparison.OrdinalIgnoreCase) ||
|
||||
!string.Equals(manifest.EmbeddingSignature, embeddingSignature, StringComparison.Ordinal))
|
||||
{
|
||||
manifest.EmbeddingProviderId = embeddingProvider.Id;
|
||||
manifest.EmbeddingSignature = embeddingSignature;
|
||||
await this.SaveStateAsync(token);
|
||||
}
|
||||
|
||||
return manifest;
|
||||
}
|
||||
|
||||
private async Task RemoveMissingFileEmbeddingsAsync(
|
||||
IDataSource dataSource,
|
||||
string collectionName,
|
||||
DataSourceEmbeddingManifest manifest,
|
||||
IReadOnlyCollection<FileInfo> indexedFiles,
|
||||
CancellationToken token)
|
||||
{
|
||||
var existingPaths = indexedFiles
|
||||
.Select(file => file.FullName)
|
||||
.ToHashSet(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var removedFilePath in manifest.Files.Keys.Except(existingPaths, StringComparer.OrdinalIgnoreCase).ToList())
|
||||
{
|
||||
await this.DeleteFilePointsAsync(collectionName, removedFilePath, token);
|
||||
manifest.Files.Remove(removedFilePath);
|
||||
this.logger.LogInformation(
|
||||
"Removed stale embeddings for deleted file '{FilePath}' from data source '{DataSourceName}' ({DataSourceId}).",
|
||||
removedFilePath,
|
||||
dataSource.Name,
|
||||
dataSource.Id);
|
||||
}
|
||||
}
|
||||
|
||||
private DataSourceEmbeddingStatus CreateStatus(
|
||||
IDataSource dataSource,
|
||||
DataSourceEmbeddingState state,
|
||||
int totalFiles,
|
||||
int indexedFiles,
|
||||
int failedFiles,
|
||||
string currentFile = "",
|
||||
string lastError = "")
|
||||
{
|
||||
return new DataSourceEmbeddingStatus(
|
||||
dataSource.Id,
|
||||
dataSource.Name,
|
||||
dataSource.Type,
|
||||
DataSourceEmbeddingState.FAILED,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
string.Empty,
|
||||
errorMessage);
|
||||
state,
|
||||
totalFiles,
|
||||
indexedFiles,
|
||||
failedFiles,
|
||||
currentFile,
|
||||
lastError);
|
||||
}
|
||||
|
||||
private DataSourceEmbeddingStatus CreateCompletedStatus(IDataSource dataSource, int totalFiles, int indexedFiles, int failedFiles, string lastError)
|
||||
{
|
||||
return this.CreateStatus(
|
||||
dataSource,
|
||||
failedFiles > 0 ? DataSourceEmbeddingState.FAILED : DataSourceEmbeddingState.COMPLETED,
|
||||
totalFiles,
|
||||
indexedFiles,
|
||||
failedFiles,
|
||||
lastError: failedFiles > 0
|
||||
? string.IsNullOrWhiteSpace(lastError)
|
||||
? "Some files could not be embedded. See the logs for details."
|
||||
: lastError
|
||||
: string.Empty);
|
||||
}
|
||||
|
||||
private DataSourceEmbeddingStatus GetFallbackStatus(IDataSource dataSource, string errorMessage)
|
||||
{
|
||||
return this.CreateStatus(dataSource, DataSourceEmbeddingState.FAILED, 0, 0, 1, lastError: errorMessage);
|
||||
}
|
||||
|
||||
private void UpsertStatus(DataSourceEmbeddingStatus status)
|
||||
@ -969,87 +583,3 @@ public sealed class DataSourceEmbeddingService : BackgroundService
|
||||
_ = MessageBus.INSTANCE.SendMessage(null, Event.RAG_EMBEDDING_STATUS_CHANGED, true);
|
||||
}
|
||||
}
|
||||
|
||||
public sealed record DataSourceEmbeddingOverview(
|
||||
bool IsVisible,
|
||||
bool ShowIconOnly,
|
||||
DataSourceEmbeddingState State,
|
||||
int IndexedFiles,
|
||||
int TotalFiles,
|
||||
int FailedFiles,
|
||||
string NavLabel);
|
||||
|
||||
public enum DataSourceEmbeddingState
|
||||
{
|
||||
IDLE,
|
||||
QUEUED,
|
||||
RUNNING,
|
||||
COMPLETED,
|
||||
FAILED,
|
||||
}
|
||||
|
||||
public sealed record DataSourceEmbeddingStatus(
|
||||
string DataSourceId,
|
||||
string DataSourceName,
|
||||
DataSourceType DataSourceType,
|
||||
DataSourceEmbeddingState State,
|
||||
int TotalFiles,
|
||||
int IndexedFiles,
|
||||
int FailedFiles,
|
||||
string CurrentFile,
|
||||
string LastError)
|
||||
{
|
||||
private static string TB(string fallbackEN) => I18N.I.T(fallbackEN, typeof(DataSourceEmbeddingService).Namespace, nameof(DataSourceEmbeddingService));
|
||||
|
||||
public int ProgressPercent => this.TotalFiles <= 0 ? 0 : Math.Clamp((int)Math.Round(this.IndexedFiles * 100d / this.TotalFiles), 0, 100);
|
||||
|
||||
public string StateLabel => this.State switch
|
||||
{
|
||||
DataSourceEmbeddingState.QUEUED => TB("Queued"),
|
||||
DataSourceEmbeddingState.RUNNING => TB("Running"),
|
||||
DataSourceEmbeddingState.COMPLETED => TB("Completed"),
|
||||
DataSourceEmbeddingState.FAILED => TB("Needs attention"),
|
||||
_ => TB("Idle")
|
||||
};
|
||||
|
||||
public int SortOrder => this.State switch
|
||||
{
|
||||
DataSourceEmbeddingState.RUNNING => 0,
|
||||
DataSourceEmbeddingState.QUEUED => 1,
|
||||
DataSourceEmbeddingState.FAILED => 2,
|
||||
DataSourceEmbeddingState.COMPLETED => 3,
|
||||
_ => 4,
|
||||
};
|
||||
}
|
||||
|
||||
public sealed class FileEnumerationResult
|
||||
{
|
||||
public List<FileInfo> Files { get; } = [];
|
||||
|
||||
public int FailedFiles { get; set; }
|
||||
|
||||
public string LastError { get; set; } = string.Empty;
|
||||
}
|
||||
|
||||
public sealed class PersistedEmbeddingState
|
||||
{
|
||||
public Dictionary<string, DataSourceEmbeddingManifest> DataSources { get; init; } = new(StringComparer.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
public sealed class DataSourceEmbeddingManifest
|
||||
{
|
||||
public string EmbeddingProviderId { get; set; } = string.Empty;
|
||||
|
||||
public string EmbeddingSignature { get; set; } = string.Empty;
|
||||
|
||||
public int VectorSize { get; set; }
|
||||
|
||||
public Dictionary<string, EmbeddedFileRecord> Files { get; init; } = new(StringComparer.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
public sealed record EmbeddedFileRecord(
|
||||
string Fingerprint,
|
||||
long FileSize,
|
||||
DateTime LastWriteUtc,
|
||||
DateTime EmbeddedAtUtc,
|
||||
int ChunkCount);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user