mirror of
https://github.com/MindWorkAI/AI-Studio.git
synced 2025-10-20 09:40:21 +00:00
Some checks failed
Build and Release / Read metadata (push) Has been cancelled
Build and Release / Build app (${{ matrix.dotnet_runtime }}) (-x86_64-unknown-linux-gnu, linux-x64, ubuntu-22.04, x86_64-unknown-linux-gnu, appimage deb updater) (push) Has been cancelled
Build and Release / Prepare & create release (push) Has been cancelled
Build and Release / Publish release (push) Has been cancelled
Build and Release / Build app (${{ matrix.dotnet_runtime }}) (-aarch64-apple-darwin, osx-arm64, macos-latest, aarch64-apple-darwin, dmg updater) (push) Has been cancelled
Build and Release / Build app (${{ matrix.dotnet_runtime }}) (-aarch64-pc-windows-msvc.exe, win-arm64, windows-latest, aarch64-pc-windows-msvc, nsis updater) (push) Has been cancelled
Build and Release / Build app (${{ matrix.dotnet_runtime }}) (-aarch64-unknown-linux-gnu, linux-arm64, ubuntu-22.04-arm, aarch64-unknown-linux-gnu, appimage deb updater) (push) Has been cancelled
Build and Release / Build app (${{ matrix.dotnet_runtime }}) (-x86_64-apple-darwin, osx-x64, macos-latest, x86_64-apple-darwin, dmg updater) (push) Has been cancelled
Build and Release / Build app (${{ matrix.dotnet_runtime }}) (-x86_64-pc-windows-msvc.exe, win-x64, windows-latest, x86_64-pc-windows-msvc, nsis updater) (push) Has been cancelled
228 lines
10 KiB
C#
228 lines
10 KiB
C#
using AIStudio.Chat;
|
|
using AIStudio.Provider;
|
|
using AIStudio.Settings;
|
|
using AIStudio.Settings.DataModel;
|
|
using AIStudio.Tools.PluginSystem;
|
|
using AIStudio.Tools.RAG.AugmentationProcesses;
|
|
using AIStudio.Tools.RAG.DataSourceSelectionProcesses;
|
|
using AIStudio.Tools.Services;
|
|
|
|
namespace AIStudio.Tools.RAG.RAGProcesses;
|
|
|
|
public sealed class AISrcSelWithRetCtxVal : IRagProcess
|
|
{
|
|
private static readonly ILogger<AISrcSelWithRetCtxVal> LOGGER = Program.LOGGER_FACTORY.CreateLogger<AISrcSelWithRetCtxVal>();
|
|
|
|
private static string TB(string fallbackEN) => I18N.I.T(fallbackEN, typeof(AISrcSelWithRetCtxVal).Namespace, nameof(AISrcSelWithRetCtxVal));
|
|
|
|
#region Implementation of IRagProcess
|
|
|
|
/// <inheritdoc />
|
|
public string TechnicalName => "AISrcSelWithRetCtxVal";
|
|
|
|
/// <inheritdoc />
|
|
public string UIName => TB("AI source selection with AI retrieval context validation");
|
|
|
|
/// <inheritdoc />
|
|
public string Description => TB("This RAG process filters data sources, automatically selects appropriate sources, optionally allows manual source selection, retrieves data, and automatically validates the retrieval context.");
|
|
|
|
/// <inheritdoc />
|
|
public async Task<ChatThread> ProcessAsync(IProvider provider, IContent lastUserPrompt, ChatThread chatThread, CancellationToken token = default)
|
|
{
|
|
var settings = Program.SERVICE_PROVIDER.GetService<SettingsManager>()!;
|
|
var dataSourceService = Program.SERVICE_PROVIDER.GetService<DataSourceService>()!;
|
|
|
|
//
|
|
// 1. Check if the user wants to bind any data sources to the chat:
|
|
//
|
|
if (chatThread.DataSourceOptions.IsEnabled())
|
|
{
|
|
LOGGER.LogInformation("Data sources are enabled for this chat.");
|
|
|
|
// Across the different code-branches, we keep track of whether it
|
|
// makes sense to proceed with the RAG process:
|
|
var proceedWithRAG = true;
|
|
|
|
//
|
|
// We read the last block in the chat thread. We need to re-arrange
|
|
// the order of blocks later, after the augmentation process takes
|
|
// place:
|
|
//
|
|
if(chatThread.Blocks.Count == 0)
|
|
{
|
|
LOGGER.LogError("The chat thread is empty. Skipping the RAG process.");
|
|
return chatThread;
|
|
}
|
|
|
|
if (chatThread.Blocks.Last().Role != ChatRole.AI)
|
|
{
|
|
LOGGER.LogError("The last block in the chat thread is not the AI block. There is something wrong with the chat thread. Skipping the RAG process.");
|
|
return chatThread;
|
|
}
|
|
|
|
//
|
|
// At this point in time, the chat thread contains already the
|
|
// last block, which is the waiting AI block. We need to remove
|
|
// this block before we call some parts of the RAG process:
|
|
//
|
|
var chatThreadWithoutWaitingAIBlock = chatThread with { Blocks = chatThread.Blocks[..^1] };
|
|
|
|
//
|
|
// When the user wants to bind data sources to the chat, we
|
|
// have to check if the data sources are available for the
|
|
// selected provider. Also, we have to check if any ERI
|
|
// data sources changed its security requirements.
|
|
//
|
|
List<IDataSource> preselectedDataSources = chatThread.DataSourceOptions.PreselectedDataSourceIds.Select(id => settings.ConfigurationData.DataSources.FirstOrDefault(ds => ds.Id == id)).Where(ds => ds is not null).ToList()!;
|
|
var dataSources = await dataSourceService.GetDataSources(provider, preselectedDataSources);
|
|
var selectedDataSources = dataSources.SelectedDataSources;
|
|
|
|
//
|
|
// Should the AI select the data sources?
|
|
//
|
|
if (chatThread.DataSourceOptions.AutomaticDataSourceSelection)
|
|
{
|
|
var dataSourceSelectionProcess = new AgenticSrcSelWithDynHeur();
|
|
var result = await dataSourceSelectionProcess.SelectDataSourcesAsync(provider, lastUserPrompt, chatThread, dataSources, token);
|
|
proceedWithRAG = result.ProceedWithRAG;
|
|
selectedDataSources = result.SelectedDataSources;
|
|
}
|
|
else
|
|
{
|
|
//
|
|
// No, the user made the choice manually:
|
|
//
|
|
var selectedDataSourceInfo = selectedDataSources.Select(ds => ds.Name).Aggregate((a, b) => $"'{a}', '{b}'");
|
|
LOGGER.LogInformation($"The user selected the data sources manually. {selectedDataSources.Count} data source(s) are selected: {selectedDataSourceInfo}.");
|
|
}
|
|
|
|
if(selectedDataSources.Count == 0)
|
|
{
|
|
LOGGER.LogWarning("No data sources are selected. The RAG process is skipped.");
|
|
proceedWithRAG = false;
|
|
}
|
|
else
|
|
{
|
|
var previousDataSecurity = chatThread.DataSecurity;
|
|
|
|
//
|
|
// Update the data security of the chat thread. We consider the current data security
|
|
// of the chat thread and the data security of the selected data sources:
|
|
//
|
|
var dataSecurityRestrictedToSelfHosted = selectedDataSources.Any(x => x.SecurityPolicy is DataSourceSecurity.SELF_HOSTED);
|
|
chatThread.DataSecurity = dataSecurityRestrictedToSelfHosted switch
|
|
{
|
|
//
|
|
//
|
|
// Case: the data sources which are selected have a security policy
|
|
// of SELF_HOSTED (at least one data source).
|
|
//
|
|
// When the policy was already set to ALLOW_ANY, we restrict it
|
|
// to SELF_HOSTED.
|
|
//
|
|
true => DataSourceSecurity.SELF_HOSTED,
|
|
|
|
//
|
|
// Case: the data sources which are selected have a security policy
|
|
// of ALLOW_ANY (none of the data sources has a SELF_HOSTED policy).
|
|
//
|
|
// When the policy was already set to SELF_HOSTED, we must keep that.
|
|
//
|
|
false => chatThread.DataSecurity switch
|
|
{
|
|
//
|
|
// When the policy was not specified yet, we set it to ALLOW_ANY.
|
|
//
|
|
DataSourceSecurity.NOT_SPECIFIED => DataSourceSecurity.ALLOW_ANY,
|
|
DataSourceSecurity.ALLOW_ANY => DataSourceSecurity.ALLOW_ANY,
|
|
|
|
//
|
|
// When the policy was already set to SELF_HOSTED, we must keep that.
|
|
// This is important since the thread might already contain data
|
|
// from a data source with a SELF_HOSTED policy.
|
|
//
|
|
DataSourceSecurity.SELF_HOSTED => DataSourceSecurity.SELF_HOSTED,
|
|
|
|
// Default case: we use the current data security of the chat thread.
|
|
_ => chatThread.DataSecurity,
|
|
}
|
|
};
|
|
|
|
if (previousDataSecurity != chatThread.DataSecurity)
|
|
LOGGER.LogInformation($"The data security of the chat thread was updated from '{previousDataSecurity}' to '{chatThread.DataSecurity}'.");
|
|
}
|
|
|
|
//
|
|
// Trigger the retrieval part of the (R)AG process:
|
|
//
|
|
var dataContexts = new List<IRetrievalContext>();
|
|
if (proceedWithRAG)
|
|
{
|
|
//
|
|
// We kick off the retrieval process for each data source in parallel:
|
|
//
|
|
var retrievalTasks = new List<Task<IReadOnlyList<IRetrievalContext>>>(selectedDataSources.Count);
|
|
foreach (var dataSource in selectedDataSources)
|
|
retrievalTasks.Add(dataSource.RetrieveDataAsync(lastUserPrompt, chatThreadWithoutWaitingAIBlock, token));
|
|
|
|
//
|
|
// Wait for all retrieval tasks to finish:
|
|
//
|
|
foreach (var retrievalTask in retrievalTasks)
|
|
{
|
|
try
|
|
{
|
|
dataContexts.AddRange(await retrievalTask);
|
|
}
|
|
catch (Exception e)
|
|
{
|
|
LOGGER.LogError(e, "An error occurred during the retrieval process.");
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Perform the augmentation of the R(A)G process:
|
|
//
|
|
if (proceedWithRAG)
|
|
{
|
|
var augmentationProcess = new AugmentationOne();
|
|
chatThread = await augmentationProcess.ProcessAsync(provider, lastUserPrompt, chatThread, dataContexts, token);
|
|
}
|
|
|
|
//
|
|
// Add sources from the selected data
|
|
//
|
|
|
|
// We know that the last block is the AI answer block (cf. check above):
|
|
var aiAnswerBlock = chatThread.Blocks.Last();
|
|
var aiAnswerSources = aiAnswerBlock.Content?.Sources;
|
|
|
|
// It should never happen that the AI answer block does not contain a content part.
|
|
// Just in case, we check this:
|
|
if(aiAnswerSources is null)
|
|
return chatThread;
|
|
|
|
var ragSources = new List<ISource>();
|
|
foreach (var retrievalContext in dataContexts)
|
|
{
|
|
var title = retrievalContext.DataSourceName;
|
|
if(string.IsNullOrWhiteSpace(title))
|
|
continue;
|
|
|
|
var link = retrievalContext.Path;
|
|
if(!link.StartsWith("http", StringComparison.OrdinalIgnoreCase))
|
|
continue;
|
|
|
|
ragSources.Add(new Source(title, link, SourceOrigin.RAG));
|
|
}
|
|
|
|
// Merge the sources, avoiding duplicates:
|
|
aiAnswerSources.MergeSources(ragSources);
|
|
}
|
|
|
|
return chatThread;
|
|
}
|
|
|
|
#endregion
|
|
} |