diff --git a/README.md b/README.md
index 06469a0..f66f60a 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ Things we are currently working on:
- [ ] App: Implement the process to vectorize one local file using embeddings
- [ ] Runtime: Integration of the vector database [LanceDB](https://github.com/lancedb/lancedb)
- [ ] App: Implement the continuous process of vectorizing data
- - [x] ~~App: Define a common retrieval context interface for the integration of RAG processes in chats (PR [#281](https://github.com/MindWorkAI/AI-Studio/pull/281))~~
+ - [x] ~~App: Define a common retrieval context interface for the integration of RAG processes in chats (PR [#281](https://github.com/MindWorkAI/AI-Studio/pull/281), [#284](https://github.com/MindWorkAI/AI-Studio/pull/284))~~
- [ ] App: Define a common augmentation interface for the integration of RAG processes in chats
- [x] ~~App: Integrate data sources in chats (PR [#282](https://github.com/MindWorkAI/AI-Studio/pull/282))~~
diff --git a/app/MindWork AI Studio.sln.DotSettings b/app/MindWork AI Studio.sln.DotSettings
index c9d147b..3f07930 100644
--- a/app/MindWork AI Studio.sln.DotSettings
+++ b/app/MindWork AI Studio.sln.DotSettings
@@ -5,6 +5,8 @@
LLM
LM
MSG
+ RAG
+ UI
True
True
True
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Agents/AgentBase.cs b/app/MindWork AI Studio/Agents/AgentBase.cs
index 35c4f39..53e4152 100644
--- a/app/MindWork AI Studio/Agents/AgentBase.cs
+++ b/app/MindWork AI Studio/Agents/AgentBase.cs
@@ -1,3 +1,5 @@
+using System.Text.Json;
+
using AIStudio.Chat;
using AIStudio.Provider;
using AIStudio.Settings;
@@ -9,6 +11,11 @@ namespace AIStudio.Agents;
public abstract class AgentBase(ILogger logger, SettingsManager settingsManager, DataSourceService dataSourceService, ThreadSafeRandom rng) : IAgent
{
+ protected static readonly JsonSerializerOptions JSON_SERIALIZER_OPTIONS = new()
+ {
+ PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
+ };
+
protected DataSourceService DataSourceService { get; init; } = dataSourceService;
protected SettingsManager SettingsManager { get; init; } = settingsManager;
diff --git a/app/MindWork AI Studio/Agents/AgentDataSourceSelection.cs b/app/MindWork AI Studio/Agents/AgentDataSourceSelection.cs
new file mode 100644
index 0000000..f42a476
--- /dev/null
+++ b/app/MindWork AI Studio/Agents/AgentDataSourceSelection.cs
@@ -0,0 +1,416 @@
+using System.Text;
+using System.Text.Json;
+
+using AIStudio.Chat;
+using AIStudio.Provider;
+using AIStudio.Settings;
+using AIStudio.Settings.DataModel;
+using AIStudio.Tools.ERIClient;
+using AIStudio.Tools.Services;
+
+namespace AIStudio.Agents;
+
+public sealed class AgentDataSourceSelection (ILogger logger, ILogger baseLogger, SettingsManager settingsManager, DataSourceService dataSourceService, ThreadSafeRandom rng) : AgentBase(baseLogger, settingsManager, dataSourceService, rng)
+{
+ private static readonly ContentBlock EMPTY_BLOCK = new()
+ {
+ Content = null,
+ ContentType = ContentType.NONE,
+ Role = ChatRole.AGENT,
+ Time = DateTimeOffset.UtcNow,
+ };
+
+ private readonly List answers = new();
+
+ #region Overrides of AgentBase
+
+ ///
+ protected override Type Type => Type.SYSTEM;
+
+ ///
+ public override string Id => "Data Source Selection";
+
+ ///
+ protected override string JobDescription =>
+ """
+ You receive a system and a user prompt, as well as a list of possible data sources as input.
+ Your task is to select the appropriate data sources for the given task. You may choose none,
+ one, or multiple sources, depending on what best fits the system and user prompt. You need
+ to estimate and assess which source, based on its description, might be helpful in
+ processing the prompts.
+
+ Your response is a JSON list in the following format:
+
+ ```
+ [
+ {"id": "The data source ID", "reason": "Why did you choose this source?", "confidence": 0.87},
+ {"id": "The data source ID", "reason": "Why did you choose this source?", "confidence": 0.54}
+ ]
+ ```
+
+ You express your confidence as a floating-point number between 0.0 (maximum uncertainty) and
+ 1.0 (you are absolutely certain that this source is needed).
+
+ The JSON schema is:
+
+ ```
+ {
+ "$schema": "http://json-schema.org/draft-04/schema#",
+ "type": "array",
+ "items": [
+ {
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string"
+ },
+ "reason": {
+ "type": "string"
+ },
+ "confidence": {
+ "type": "number"
+ }
+ },
+ "required": [
+ "id",
+ "reason",
+ "confidence"
+ ]
+ }
+ ]
+ }
+ ```
+
+ When no data source is needed, you return an empty JSON list `[]`. You do not ask any
+ follow-up questions. You do not address the user. Your response consists solely of
+ the JSON list.
+ """;
+
+ ///
+ protected override string SystemPrompt(string availableDataSources) => $"""
+ {this.JobDescription}
+
+ {availableDataSources}
+ """;
+
+ ///
+ public override Settings.Provider? ProviderSettings { get; set; }
+
+ ///
+ /// The data source selection agent does not work with context. Use
+ /// the process input method instead.
+ ///
+ /// The chat thread without any changes.
+ public override Task ProcessContext(ChatThread chatThread, IDictionary additionalData) => Task.FromResult(chatThread);
+
+ ///
+ public override async Task ProcessInput(ContentBlock input, IDictionary additionalData)
+ {
+ if (input.Content is not ContentText text)
+ return EMPTY_BLOCK;
+
+ if(text.InitialRemoteWait || text.IsStreaming)
+ return EMPTY_BLOCK;
+
+ if(string.IsNullOrWhiteSpace(text.Text))
+ return EMPTY_BLOCK;
+
+ if(!additionalData.TryGetValue("availableDataSources", out var availableDataSources) || string.IsNullOrWhiteSpace(availableDataSources))
+ return EMPTY_BLOCK;
+
+ var thread = this.CreateChatThread(this.SystemPrompt(availableDataSources));
+ var time = this.AddUserRequest(thread, text.Text);
+ await this.AddAIResponseAsync(thread, time);
+
+ var answer = thread.Blocks[^1];
+
+ this.answers.Add(answer);
+ return answer;
+ }
+
+ //
+ public override Task MadeDecision(ContentBlock input) => Task.FromResult(true);
+
+ //
+ public override IReadOnlyCollection GetContext() => [];
+
+ //
+ public override IReadOnlyCollection GetAnswers() => this.answers;
+
+ #endregion
+
+ public async Task> PerformSelectionAsync(IProvider provider, IContent lastPrompt, ChatThread chatThread, AllowedSelectedDataSources dataSources, CancellationToken token = default)
+ {
+ logger.LogInformation("The AI should select the appropriate data sources.");
+
+ //
+ // 1. Which LLM provider should the agent use?
+ //
+
+ // We start with the provider currently selected by the user:
+ var agentProvider = this.SettingsManager.ConfigurationData.Providers.FirstOrDefault(x => x.Id == provider.Id);
+
+ // If the user preselected an agent provider, we try to use this one:
+ if (this.SettingsManager.ConfigurationData.AgentDataSourceSelection.PreselectAgentOptions)
+ {
+ var configuredAgentProvider = this.SettingsManager.ConfigurationData.Providers.FirstOrDefault(x => x.Id == this.SettingsManager.ConfigurationData.AgentDataSourceSelection.PreselectedAgentProvider);
+
+ // If the configured agent provider is available, we use it:
+ if (configuredAgentProvider != default)
+ agentProvider = configuredAgentProvider;
+ }
+
+ // Assign the provider settings to the agent:
+ logger.LogInformation($"The agent for the data source selection uses the provider '{agentProvider.InstanceName}' ({agentProvider.UsedLLMProvider.ToName()}, confidence={agentProvider.UsedLLMProvider.GetConfidence(this.SettingsManager).Level.GetName()}).");
+ this.ProviderSettings = agentProvider;
+
+ //
+ // 2. Prepare the current system and user prompts as input for the agent:
+ //
+ var lastPromptContent = lastPrompt switch
+ {
+ ContentText text => text.Text,
+
+ // Image prompts may be empty, e.g., when the image is too large:
+ ContentImage image => await image.AsBase64(token),
+
+ // Other content types are not supported yet:
+ _ => string.Empty,
+ };
+
+ if (string.IsNullOrWhiteSpace(lastPromptContent))
+ {
+ logger.LogWarning("The last prompt is empty. The AI cannot select data sources.");
+ return [];
+ }
+
+ //
+ // 3. Prepare the allowed data sources as input for the agent:
+ //
+ var additionalData = new Dictionary();
+ logger.LogInformation("Preparing the list of allowed data sources for the agent to choose from.");
+
+ // Notice: We do not dispose the Rust service here. The Rust service is a singleton
+ // and will be disposed when the application shuts down:
+ var rustService = Program.SERVICE_PROVIDER.GetService()!;
+
+ var sb = new StringBuilder();
+ sb.AppendLine("The following data sources are available for selection:");
+ foreach (var ds in dataSources.AllowedDataSources)
+ {
+ switch (ds)
+ {
+ case DataSourceLocalDirectory localDirectory:
+ sb.AppendLine($"- Id={ds.Id}, name='{localDirectory.Name}', type=local directory, path='{localDirectory.Path}'");
+ break;
+
+ case DataSourceLocalFile localFile:
+ sb.AppendLine($"- Id={ds.Id}, name='{localFile.Name}', type=local file, path='{localFile.FilePath}'");
+ break;
+
+ case IERIDataSource eriDataSource:
+ var eriServerDescription = string.Empty;
+
+ try
+ {
+ //
+ // Call the ERI server to get the server description:
+ //
+ using var eriClient = ERIClientFactory.Get(eriDataSource.Version, eriDataSource)!;
+ var authResponse = await eriClient.AuthenticateAsync(eriDataSource, rustService, token);
+ if (authResponse.Successful)
+ {
+ var serverDescriptionResponse = await eriClient.GetDataSourceInfoAsync(token);
+ if (serverDescriptionResponse.Successful)
+ {
+ eriServerDescription = serverDescriptionResponse.Data.Description;
+
+ // Remove all line breaks from the description:
+ eriServerDescription = eriServerDescription.Replace("\n", " ").Replace("\r", " ");
+ }
+ else
+ logger.LogWarning($"Was not able to retrieve the server description from the ERI data source '{eriDataSource.Name}'. Message: {serverDescriptionResponse.Message}");
+ }
+ else
+ logger.LogWarning($"Was not able to authenticate with the ERI data source '{eriDataSource.Name}'. Message: {authResponse.Message}");
+ }
+ catch (Exception e)
+ {
+ logger.LogWarning($"The ERI data source '{eriDataSource.Name}' is not available. Thus, we cannot retrieve the server description. Error: {e.Message}");
+ }
+
+ //
+ // Append the ERI data source to the list. Use the server description if available:
+ //
+ if (string.IsNullOrWhiteSpace(eriServerDescription))
+ sb.AppendLine($"- Id={ds.Id}, name='{eriDataSource.Name}', type=external data source");
+ else
+ sb.AppendLine($"- Id={ds.Id}, name='{eriDataSource.Name}', type=external data source, description='{eriServerDescription}'");
+
+ break;
+ }
+ }
+
+ logger.LogInformation("Prepared the list of allowed data sources for the agent.");
+ additionalData.Add("availableDataSources", sb.ToString());
+
+ //
+ // 4. Let the agent select the data sources:
+ //
+ var prompt = $"""
+ The system prompt is:
+
+ ```
+ {chatThread.SystemPrompt}
+ ```
+
+ The user prompt is:
+
+ ```
+ {lastPromptContent}
+ ```
+ """;
+
+ // Call the agent:
+ var aiResponse = await this.ProcessInput(new ContentBlock
+ {
+ Time = DateTimeOffset.UtcNow,
+ ContentType = ContentType.TEXT,
+ Role = ChatRole.USER,
+ Content = new ContentText
+ {
+ Text = prompt,
+ },
+ }, additionalData);
+
+ if(aiResponse.Content is null)
+ {
+ logger.LogWarning("The agent did not return a response.");
+ return [];
+ }
+
+ switch (aiResponse)
+ {
+
+ //
+ // 5. Parse the agent response:
+ //
+ case { ContentType: ContentType.TEXT, Content: ContentText textContent }:
+ {
+ //
+ // What we expect is a JSON list of SelectedDataSource objects:
+ //
+ var selectedDataSourcesJson = textContent.Text;
+
+ //
+ // We know how bad LLM may be in generating JSON without surrounding text.
+ // Thus, we expect the worst and try to extract the JSON list from the text:
+ //
+ var json = this.ExtractJson(selectedDataSourcesJson);
+
+ try
+ {
+ var aiSelectedDataSources = JsonSerializer.Deserialize>(json, JSON_SERIALIZER_OPTIONS);
+ return aiSelectedDataSources ?? [];
+ }
+ catch
+ {
+ logger.LogWarning("The agent answered with an invalid or unexpected JSON format.");
+ return [];
+ }
+ }
+
+ case { ContentType: ContentType.TEXT }:
+ logger.LogWarning("The agent answered with an unexpected inner content type.");
+ return [];
+
+ case { ContentType: ContentType.NONE }:
+ logger.LogWarning("The agent did not return a response.");
+ return [];
+
+ default:
+ logger.LogWarning($"The agent answered with an unexpected content type '{aiResponse.ContentType}'.");
+ return [];
+ }
+ }
+
+ ///
+ /// Extracts the JSON list from the given text. The text may contain additional
+ /// information around the JSON list. The method tries to extract the JSON list
+ /// from the text.
+ ///
+ ///
+ /// Algorithm: The method searches for the first line that contains only a '[' character.
+ /// Then, it searches for the first line that contains only a ']' character. The method
+ /// returns the text between these two lines (including the brackets). When the method
+ /// cannot find the JSON list, it returns an empty string.
+ ///
+ /// This overload is using strings instead of spans. We can use this overload in any
+ /// async method. Thus, it is a wrapper around the span-based method. Yes, we are losing
+ /// the memory efficiency of the span-based method, but we still gain the performance
+ /// of the span-based method: the entire search algorithm is span-based.
+ ///
+ /// The text that may contain the JSON list.
+ /// The extracted JSON list.
+ private string ExtractJson(string text) => ExtractJson(text.AsSpan()).ToString();
+
+ ///
+ /// Extracts the JSON list from the given text. The text may contain additional
+ /// information around the JSON list. The method tries to extract the JSON list
+ /// from the text.
+ ///
+ ///
+ /// Algorithm: The method searches for the first line that contains only a '[' character.
+ /// Then, it searches for the first line that contains only a ']' character. The method
+ /// returns the text between these two lines (including the brackets). When the method
+ /// cannot find the JSON list, it returns an empty string.
+ ///
+ /// The text that may contain the JSON list.
+ /// The extracted JSON list.
+ private static ReadOnlySpan ExtractJson(ReadOnlySpan text)
+ {
+ var startIndex = -1;
+ var endIndex = -1;
+ var foundStart = false;
+ var foundEnd = false;
+ var lineStart = 0;
+
+ for (var i = 0; i <= text.Length; i++)
+ {
+ // Handle the end of the line or the end of the text:
+ if (i == text.Length || text[i] == '\n')
+ {
+ if (IsCharacterAloneInLine(text, lineStart, i, '[') && !foundStart)
+ {
+ startIndex = lineStart;
+ foundStart = true;
+ }
+ else if (IsCharacterAloneInLine(text, lineStart, i, ']') && foundStart && !foundEnd)
+ {
+ endIndex = i;
+ foundEnd = true;
+ break;
+ }
+
+ lineStart = i + 1;
+ }
+ }
+
+ if (foundStart && foundEnd)
+ {
+ // Adjust endIndex for slicing, ensuring it's within bounds:
+ return text.Slice(startIndex, Math.Min(text.Length, endIndex + 1) - startIndex);
+ }
+
+ return ReadOnlySpan.Empty;
+ }
+
+ private static bool IsCharacterAloneInLine(ReadOnlySpan text, int lineStart, int lineEnd, char character)
+ {
+ for (var i = lineStart; i < lineEnd; i++)
+ if (!char.IsWhiteSpace(text[i]) && text[i] != character)
+ return false;
+
+ return true;
+ }
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Agents/SelectedDataSource.cs b/app/MindWork AI Studio/Agents/SelectedDataSource.cs
new file mode 100644
index 0000000..c8b7192
--- /dev/null
+++ b/app/MindWork AI Studio/Agents/SelectedDataSource.cs
@@ -0,0 +1,9 @@
+namespace AIStudio.Agents;
+
+///
+/// Represents a selected data source, chosen by the agent.
+///
+/// The data source ID.
+/// The reason for selecting the data source.
+/// The confidence of the agent in the selection.
+public readonly record struct SelectedDataSource(string Id, string Reason, float Confidence);
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Chat/ChatThread.cs b/app/MindWork AI Studio/Chat/ChatThread.cs
index 3e41161..7709db7 100644
--- a/app/MindWork AI Studio/Chat/ChatThread.cs
+++ b/app/MindWork AI Studio/Chat/ChatThread.cs
@@ -1,3 +1,4 @@
+using AIStudio.Components;
using AIStudio.Settings;
using AIStudio.Settings.DataModel;
@@ -33,6 +34,11 @@ public sealed record ChatThread
///
public DataSourceOptions DataSourceOptions { get; set; } = new();
+ ///
+ /// The AI-selected data sources for this chat thread.
+ ///
+ public IReadOnlyList AISelectedDataSources { get; set; } = [];
+
///
/// The name of the chat thread. Usually generated by an AI model or manually edited by the user.
///
diff --git a/app/MindWork AI Studio/Chat/ContentImage.cs b/app/MindWork AI Studio/Chat/ContentImage.cs
index 0d83145..4e108df 100644
--- a/app/MindWork AI Studio/Chat/ContentImage.cs
+++ b/app/MindWork AI Studio/Chat/ContentImage.cs
@@ -49,4 +49,62 @@ public sealed class ContentImage : IContent
/// The image source.
///
public required string Source { get; set; }
+
+ ///
+ /// Read the image content as a base64 string.
+ ///
+ ///
+ /// The images are directly converted to base64 strings. The maximum
+ /// size of the image is around 10 MB. If the image is larger, the method
+ /// returns an empty string.
+ ///
+ /// As of now, this method does no sort of image processing. LLMs usually
+ /// do not work with arbitrary image sizes. In the future, we might have
+ /// to resize the images before sending them to the model.
+ ///
+ /// The cancellation token.
+ /// The image content as a base64 string; might be empty.
+ public async Task AsBase64(CancellationToken token = default)
+ {
+ switch (this.SourceType)
+ {
+ case ContentImageSource.BASE64:
+ return this.Source;
+
+ case ContentImageSource.URL:
+ {
+ using var httpClient = new HttpClient();
+ using var response = await httpClient.GetAsync(this.Source, HttpCompletionOption.ResponseHeadersRead, token);
+ if(response.IsSuccessStatusCode)
+ {
+ // Read the length of the content:
+ var lengthBytes = response.Content.Headers.ContentLength;
+ if(lengthBytes > 10_000_000)
+ return string.Empty;
+
+ var bytes = await response.Content.ReadAsByteArrayAsync(token);
+ return Convert.ToBase64String(bytes);
+ }
+
+ return string.Empty;
+ }
+
+ case ContentImageSource.LOCAL_PATH:
+ if(File.Exists(this.Source))
+ {
+ // Read the content length:
+ var length = new FileInfo(this.Source).Length;
+ if(length > 10_000_000)
+ return string.Empty;
+
+ var bytes = await File.ReadAllBytesAsync(this.Source, token);
+ return Convert.ToBase64String(bytes);
+ }
+
+ return string.Empty;
+
+ default:
+ return string.Empty;
+ }
+ }
}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Chat/ContentText.cs b/app/MindWork AI Studio/Chat/ContentText.cs
index 19928b4..c3b4625 100644
--- a/app/MindWork AI Studio/Chat/ContentText.cs
+++ b/app/MindWork AI Studio/Chat/ContentText.cs
@@ -1,5 +1,7 @@
using System.Text.Json.Serialization;
+using AIStudio.Agents;
+using AIStudio.Components;
using AIStudio.Provider;
using AIStudio.Settings;
using AIStudio.Tools.Services;
@@ -41,11 +43,19 @@ public sealed class ContentText : IContent
if(chatThread is null)
return;
+ var logger = Program.SERVICE_PROVIDER.GetService>()!;
+
//
- // Check if the user wants to bind any data sources to the chat:
+ // 1. Check if the user wants to bind any data sources to the chat:
//
- if (chatThread.DataSourceOptions.IsEnabled())
+ if (chatThread.DataSourceOptions.IsEnabled() && lastPrompt is not null)
{
+ logger.LogInformation("Data sources are enabled for this chat.");
+
+ // Across the different code-branches, we keep track of whether it
+ // makes sense to proceed with the RAG process:
+ var proceedWithRAG = true;
+
//
// When the user wants to bind data sources to the chat, we
// have to check if the data sources are available for the
@@ -61,18 +71,146 @@ public sealed class ContentText : IContent
//
if (chatThread.DataSourceOptions.AutomaticDataSourceSelection)
{
- // TODO: Start agent based on allowed data sources.
+ // Get the agent for the data source selection:
+ var selectionAgent = Program.SERVICE_PROVIDER.GetService()!;
+
+ // Let the AI agent do its work:
+ IReadOnlyList finalAISelection = [];
+ var aiSelectedDataSources = await selectionAgent.PerformSelectionAsync(provider, lastPrompt, chatThread, dataSources, token);
+
+ // Check if the AI selected any data sources:
+ if(aiSelectedDataSources.Count is 0)
+ {
+ logger.LogWarning("The AI did not select any data sources. The RAG process is skipped.");
+ proceedWithRAG = false;
+
+ // Send the selected data sources to the data source selection component.
+ // Then, the user can see which data sources were selected by the AI.
+ await MessageBus.INSTANCE.SendMessage(null, Event.RAG_AUTO_DATA_SOURCES_SELECTED, finalAISelection);
+ chatThread.AISelectedDataSources = finalAISelection;
+ }
+ else
+ {
+ // Log the selected data sources:
+ var selectedDataSourceInfo = aiSelectedDataSources.Select(ds => $"[Id={ds.Id}, reason={ds.Reason}, confidence={ds.Confidence}]").Aggregate((a, b) => $"'{a}', '{b}'");
+ logger.LogInformation($"The AI selected the data sources automatically. {aiSelectedDataSources.Count} data source(s) are selected: {selectedDataSourceInfo}.");
+
+ //
+ // Check how many data sources were hallucinated by the AI:
+ //
+ var totalAISelectedDataSources = aiSelectedDataSources.Count;
+
+ // Filter out the data sources that are not available:
+ aiSelectedDataSources = aiSelectedDataSources.Where(x => settings.ConfigurationData.DataSources.FirstOrDefault(ds => ds.Id == x.Id) is not null).ToList();
+
+ // Store the real AI-selected data sources:
+ finalAISelection = aiSelectedDataSources.Select(x => new DataSourceAgentSelected { DataSource = settings.ConfigurationData.DataSources.First(ds => ds.Id == x.Id), AIDecision = x, Selected = false }).ToList();
+
+ var numHallucinatedSources = totalAISelectedDataSources - aiSelectedDataSources.Count;
+ if(numHallucinatedSources > 0)
+ logger.LogWarning($"The AI hallucinated {numHallucinatedSources} data source(s). We ignore them.");
+
+ if (aiSelectedDataSources.Count > 3)
+ {
+ //
+ // We have more than 3 data sources. Let's filter by confidence.
+ // In order to do that, we must identify the lower and upper
+ // bounds of the confidence interval:
+ //
+ var confidenceValues = aiSelectedDataSources.Select(x => x.Confidence).ToList();
+ var lowerBound = confidenceValues.Min();
+ var upperBound = confidenceValues.Max();
+
+ //
+ // Next, we search for a threshold so that we have between 2 and 3
+ // data sources. When not possible, we take all data sources.
+ //
+ var threshold = 0.0f;
+
+ // Check the case where the confidence values are too close:
+ if (upperBound - lowerBound >= 0.01)
+ {
+ var previousThreshold = 0.0f;
+ for (var i = 0; i < 10; i++)
+ {
+ threshold = lowerBound + (upperBound - lowerBound) * i / 10;
+ var numMatches = aiSelectedDataSources.Count(x => x.Confidence >= threshold);
+ if (numMatches <= 1)
+ {
+ threshold = previousThreshold;
+ break;
+ }
+
+ if (numMatches is <= 3 and >= 2)
+ break;
+
+ previousThreshold = threshold;
+ }
+ }
+
+ //
+ // Filter the data sources by the threshold:
+ //
+ aiSelectedDataSources = aiSelectedDataSources.Where(x => x.Confidence >= threshold).ToList();
+ foreach (var dataSource in finalAISelection)
+ if(aiSelectedDataSources.Any(x => x.Id == dataSource.DataSource.Id))
+ dataSource.Selected = true;
+
+ logger.LogInformation($"The AI selected {aiSelectedDataSources.Count} data source(s) with a confidence of at least {threshold}.");
+
+ // Transform the final data sources to the actual data sources:
+ selectedDataSources = aiSelectedDataSources.Select(x => settings.ConfigurationData.DataSources.FirstOrDefault(ds => ds.Id == x.Id)).Where(ds => ds is not null).ToList()!;
+ }
+
+ // We have max. 3 data sources. We take all of them:
+ else
+ {
+ // Transform the selected data sources to the actual data sources:
+ selectedDataSources = aiSelectedDataSources.Select(x => settings.ConfigurationData.DataSources.FirstOrDefault(ds => ds.Id == x.Id)).Where(ds => ds is not null).ToList()!;
+
+ // Mark the data sources as selected:
+ foreach (var dataSource in finalAISelection)
+ dataSource.Selected = true;
+ }
+
+ // Send the selected data sources to the data source selection component.
+ // Then, the user can see which data sources were selected by the AI.
+ await MessageBus.INSTANCE.SendMessage(null, Event.RAG_AUTO_DATA_SOURCES_SELECTED, finalAISelection);
+ chatThread.AISelectedDataSources = finalAISelection;
+ }
+ }
+ else
+ {
+ //
+ // No, the user made the choice manually:
+ //
+ var selectedDataSourceInfo = selectedDataSources.Select(ds => ds.Name).Aggregate((a, b) => $"'{a}', '{b}'");
+ logger.LogInformation($"The user selected the data sources manually. {selectedDataSources.Count} data source(s) are selected: {selectedDataSourceInfo}.");
}
+ if(selectedDataSources.Count == 0)
+ {
+ logger.LogWarning("No data sources are selected. The RAG process is skipped.");
+ proceedWithRAG = false;
+ }
+
//
// Trigger the retrieval part of the (R)AG process:
//
+ if (proceedWithRAG)
+ {
+
+ }
//
// Perform the augmentation of the R(A)G process:
//
+ if (proceedWithRAG)
+ {
+
+ }
}
-
+
// Store the last time we got a response. We use this later
// to determine whether we should notify the UI about the
// new content or not. Depends on the energy saving mode
diff --git a/app/MindWork AI Studio/Components/ChatComponent.razor b/app/MindWork AI Studio/Components/ChatComponent.razor
index 2ae6eb2..06dd07b 100644
--- a/app/MindWork AI Studio/Components/ChatComponent.razor
+++ b/app/MindWork AI Studio/Components/ChatComponent.razor
@@ -111,7 +111,7 @@
@if (PreviewFeatures.PRE_RAG_2024.IsEnabled(this.SettingsManager))
{
-
+
}
diff --git a/app/MindWork AI Studio/Components/ChatComponent.razor.cs b/app/MindWork AI Studio/Components/ChatComponent.razor.cs
index 10b8bd7..7a25680 100644
--- a/app/MindWork AI Studio/Components/ChatComponent.razor.cs
+++ b/app/MindWork AI Studio/Components/ChatComponent.razor.cs
@@ -305,6 +305,14 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
await this.ChatThreadChanged.InvokeAsync(this.ChatThread);
}
+ private IReadOnlyList GetAgentSelectedDataSources()
+ {
+ if (this.ChatThread is null)
+ return [];
+
+ return this.ChatThread.AISelectedDataSources;
+ }
+
private DataSourceOptions GetCurrentDataSourceOptions()
{
if (this.ChatThread is not null)
@@ -481,6 +489,8 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
// Disable the stream state:
this.isStreaming = false;
+
+ // Update the UI:
this.StateHasChanged();
}
@@ -674,7 +684,7 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable
this.currentWorkspaceId = this.ChatThread.WorkspaceId;
this.currentWorkspaceName = await WorkspaceBehaviour.LoadWorkspaceName(this.ChatThread.WorkspaceId);
this.WorkspaceName(this.currentWorkspaceName);
- this.dataSourceSelectionComponent?.ChangeOptionWithoutSaving(this.ChatThread.DataSourceOptions);
+ this.dataSourceSelectionComponent?.ChangeOptionWithoutSaving(this.ChatThread.DataSourceOptions, this.ChatThread.AISelectedDataSources);
}
else
{
diff --git a/app/MindWork AI Studio/Components/DataSourceAgentSelected.cs b/app/MindWork AI Studio/Components/DataSourceAgentSelected.cs
new file mode 100644
index 0000000..5108596
--- /dev/null
+++ b/app/MindWork AI Studio/Components/DataSourceAgentSelected.cs
@@ -0,0 +1,25 @@
+using AIStudio.Agents;
+using AIStudio.Settings;
+
+namespace AIStudio.Components;
+
+///
+/// A data structure to combine the data source and the underlying AI decision.
+///
+public sealed class DataSourceAgentSelected
+{
+ ///
+ /// The data source.
+ ///
+ public required IDataSource DataSource { get; set; }
+
+ ///
+ /// The AI decision, which led to the selection of the data source.
+ ///
+ public required SelectedDataSource AIDecision { get; set; }
+
+ ///
+ /// Indicates whether the data source is part of the final selection for the RAG process.
+ ///
+ public bool Selected { get; set; }
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Components/DataSourceSelection.razor b/app/MindWork AI Studio/Components/DataSourceSelection.razor
index f9dff17..0816044 100644
--- a/app/MindWork AI Studio/Components/DataSourceSelection.razor
+++ b/app/MindWork AI Studio/Components/DataSourceSelection.razor
@@ -24,7 +24,7 @@
Data Source Selection
-
+
@if (this.waitingForDataSources)
{
@@ -38,16 +38,54 @@
{
-
- this.SelectionChanged(x))" Style="max-height: 14em;">
- @foreach (var source in this.availableDataSources)
- {
-
- @source.Name
-
- }
-
-
+
+ @if (this.aiBasedSourceSelection is false || this.DataSourcesAISelected.Count == 0)
+ {
+
+ this.SelectionChanged(x))" Style="max-height: 14em;">
+ @foreach (var source in this.availableDataSources)
+ {
+
+ @source.Name
+
+ }
+
+
+ }
+ else
+ {
+
+
+
+ @foreach (var source in this.availableDataSources)
+ {
+
+ @source.Name
+
+ }
+
+
+
+
+ @foreach (var source in this.DataSourcesAISelected)
+ {
+
+
+
+ @source.DataSource.Name
+
+
+
+
+ @this.GetAIReasoning(source)
+
+
+
+ }
+
+
+
+ }
}
}
@@ -79,7 +117,7 @@ else if (this.SelectionMode is DataSourceSelectionMode.CONFIGURATION_MODE)
- this.SelectionChanged(x))">
+ this.SelectionChanged(x))">
@foreach (var source in this.availableDataSources)
{
diff --git a/app/MindWork AI Studio/Components/DataSourceSelection.razor.cs b/app/MindWork AI Studio/Components/DataSourceSelection.razor.cs
index 7b75bbd..73796d6 100644
--- a/app/MindWork AI Studio/Components/DataSourceSelection.razor.cs
+++ b/app/MindWork AI Studio/Components/DataSourceSelection.razor.cs
@@ -25,6 +25,9 @@ public partial class DataSourceSelection : ComponentBase, IMessageBusReceiver, I
[Parameter]
public EventCallback DataSourceOptionsChanged { get; set; }
+
+ [Parameter]
+ public IReadOnlyList DataSourcesAISelected { get; set; } = [];
[Parameter]
public string ConfigurationHeaderMessage { get; set; } = string.Empty;
@@ -58,7 +61,7 @@ public partial class DataSourceSelection : ComponentBase, IMessageBusReceiver, I
protected override async Task OnInitializedAsync()
{
this.MessageBus.RegisterComponent(this);
- this.MessageBus.ApplyFilters(this, [], [ Event.COLOR_THEME_CHANGED ]);
+ this.MessageBus.ApplyFilters(this, [], [ Event.COLOR_THEME_CHANGED, Event.RAG_AUTO_DATA_SOURCES_SELECTED ]);
//
// Load the settings:
@@ -129,9 +132,17 @@ public partial class DataSourceSelection : ComponentBase, IMessageBusReceiver, I
#endregion
- public void ChangeOptionWithoutSaving(DataSourceOptions options)
+ private SelectionMode GetListSelectionMode() => this.aiBasedSourceSelection ? MudBlazor.SelectionMode.SingleSelection : MudBlazor.SelectionMode.MultiSelection;
+
+ private IReadOnlyCollection GetSelectedDataSourcesWithAI() => this.DataSourcesAISelected.Where(n => n.Selected).ToList();
+
+ private string GetAIReasoning(DataSourceAgentSelected source) => $"AI reasoning (confidence {source.AIDecision.Confidence:P0}): {source.AIDecision.Reason}";
+
+ public void ChangeOptionWithoutSaving(DataSourceOptions options, IReadOnlyList? aiSelectedDataSources = null)
{
this.DataSourceOptions = options;
+ this.DataSourcesAISelected = aiSelectedDataSources ?? [];
+
this.aiBasedSourceSelection = this.DataSourceOptions.AutomaticDataSourceSelection;
this.aiBasedValidation = this.DataSourceOptions.AutomaticValidation;
this.areDataSourcesEnabled = !this.DataSourceOptions.DisableDataSources;
@@ -237,6 +248,13 @@ public partial class DataSourceSelection : ComponentBase, IMessageBusReceiver, I
this.showDataSourceSelection = false;
this.StateHasChanged();
break;
+
+ case Event.RAG_AUTO_DATA_SOURCES_SELECTED:
+ if(data is IReadOnlyList aiSelectedDataSources)
+ this.DataSourcesAISelected = aiSelectedDataSources;
+
+ this.StateHasChanged();
+ break;
}
return Task.CompletedTask;
diff --git a/app/MindWork AI Studio/Components/Settings/SettingsPanelAgentDataSourceSelection.razor b/app/MindWork AI Studio/Components/Settings/SettingsPanelAgentDataSourceSelection.razor
new file mode 100644
index 0000000..da2a6af
--- /dev/null
+++ b/app/MindWork AI Studio/Components/Settings/SettingsPanelAgentDataSourceSelection.razor
@@ -0,0 +1,11 @@
+@inherits SettingsPanelBase
+
+
+
+
+ Use Case: this agent is used to select the appropriate data sources for the current prompt.
+
+
+
+
+
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Components/Settings/SettingsPanelAgentDataSourceSelection.razor.cs b/app/MindWork AI Studio/Components/Settings/SettingsPanelAgentDataSourceSelection.razor.cs
new file mode 100644
index 0000000..1c191a5
--- /dev/null
+++ b/app/MindWork AI Studio/Components/Settings/SettingsPanelAgentDataSourceSelection.razor.cs
@@ -0,0 +1,3 @@
+namespace AIStudio.Components.Settings;
+
+public partial class SettingsPanelAgentDataSourceSelection : SettingsPanelBase;
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Pages/Settings.razor b/app/MindWork AI Studio/Pages/Settings.razor
index 41c4298..f8d9050 100644
--- a/app/MindWork AI Studio/Pages/Settings.razor
+++ b/app/MindWork AI Studio/Pages/Settings.razor
@@ -27,6 +27,7 @@
+
diff --git a/app/MindWork AI Studio/Program.cs b/app/MindWork AI Studio/Program.cs
index b18bdd1..a9ae7a9 100644
--- a/app/MindWork AI Studio/Program.cs
+++ b/app/MindWork AI Studio/Program.cs
@@ -21,6 +21,7 @@ internal sealed class Program
public static RustService RUST_SERVICE = null!;
public static Encryption ENCRYPTION = null!;
public static string API_TOKEN = null!;
+ public static IServiceProvider SERVICE_PROVIDER = null!;
public static async Task Main(string[] args)
{
@@ -117,6 +118,7 @@ internal sealed class Program
builder.Services.AddSingleton();
builder.Services.AddSingleton();
builder.Services.AddTransient();
+ builder.Services.AddTransient();
builder.Services.AddTransient();
builder.Services.AddHostedService();
builder.Services.AddHostedService();
@@ -148,6 +150,10 @@ internal sealed class Program
var programLogger = app.Services.GetRequiredService>();
programLogger.LogInformation("Starting the AI Studio server.");
+ // Store the service provider (DI). We need it later for some classes,
+ // which are not part of the request pipeline:
+ SERVICE_PROVIDER = app.Services;
+
// Initialize the encryption service:
programLogger.LogInformation("Initializing the encryption service.");
var encryptionLogger = app.Services.GetRequiredService>();
@@ -196,5 +202,8 @@ internal sealed class Program
};
await serverTask;
+
+ RUST_SERVICE.Dispose();
+ programLogger.LogInformation("The AI Studio server was stopped.");
}
}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Settings/DataModel/Data.cs b/app/MindWork AI Studio/Settings/DataModel/Data.cs
index 2dd099e..729cfe4 100644
--- a/app/MindWork AI Studio/Settings/DataModel/Data.cs
+++ b/app/MindWork AI Studio/Settings/DataModel/Data.cs
@@ -74,6 +74,8 @@ public sealed class Data
public DataTextContentCleaner TextContentCleaner { get; init; } = new();
+ public DataAgentDataSourceSelection AgentDataSourceSelection { get; init; } = new();
+
public DataAgenda Agenda { get; init; } = new();
public DataGrammarSpelling GrammarSpelling { get; init; } = new();
diff --git a/app/MindWork AI Studio/Settings/DataModel/DataAgentDataSourceSelection.cs b/app/MindWork AI Studio/Settings/DataModel/DataAgentDataSourceSelection.cs
new file mode 100644
index 0000000..a0ae5fb
--- /dev/null
+++ b/app/MindWork AI Studio/Settings/DataModel/DataAgentDataSourceSelection.cs
@@ -0,0 +1,14 @@
+namespace AIStudio.Settings.DataModel;
+
+public sealed class DataAgentDataSourceSelection
+{
+ ///
+ /// Preselect any text content cleaner options?
+ ///
+ public bool PreselectAgentOptions { get; set; }
+
+ ///
+ /// Preselect a text content cleaner provider?
+ ///
+ public string PreselectedAgentProvider { get; set; } = string.Empty;
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Settings/DataModel/DataSourceERI_V1.cs b/app/MindWork AI Studio/Settings/DataModel/DataSourceERI_V1.cs
index a9931e3..328161e 100644
--- a/app/MindWork AI Studio/Settings/DataModel/DataSourceERI_V1.cs
+++ b/app/MindWork AI Studio/Settings/DataModel/DataSourceERI_V1.cs
@@ -1,5 +1,6 @@
// ReSharper disable InconsistentNaming
+using AIStudio.Assistants.ERI;
using AIStudio.Tools.ERIClient.DataModel;
namespace AIStudio.Settings.DataModel;
@@ -39,4 +40,7 @@ public readonly record struct DataSourceERI_V1 : IERIDataSource
///
public DataSourceSecurity SecurityPolicy { get; init; } = DataSourceSecurity.NOT_SPECIFIED;
+
+ ///
+ public ERIVersion Version { get; init; } = ERIVersion.V1;
}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Settings/IERIDataSource.cs b/app/MindWork AI Studio/Settings/IERIDataSource.cs
index 0d917d1..35e3779 100644
--- a/app/MindWork AI Studio/Settings/IERIDataSource.cs
+++ b/app/MindWork AI Studio/Settings/IERIDataSource.cs
@@ -1,3 +1,4 @@
+using AIStudio.Assistants.ERI;
using AIStudio.Tools.ERIClient.DataModel;
namespace AIStudio.Settings;
@@ -23,4 +24,9 @@ public interface IERIDataSource : IExternalDataSource
/// The username to use for authentication, when the auth. method is USERNAME_PASSWORD.
///
public string Username { get; init; }
+
+ ///
+ /// The ERI specification to use.
+ ///
+ public ERIVersion Version { get; init; }
}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/Event.cs b/app/MindWork AI Studio/Tools/Event.cs
index 37a855f..45005fd 100644
--- a/app/MindWork AI Studio/Tools/Event.cs
+++ b/app/MindWork AI Studio/Tools/Event.cs
@@ -23,6 +23,9 @@ public enum Event
WORKSPACE_LOADED_CHAT_CHANGED,
WORKSPACE_TOGGLE_OVERLAY,
+ // RAG events:
+ RAG_AUTO_DATA_SOURCES_SELECTED,
+
// Send events:
SEND_TO_GRAMMAR_SPELLING_ASSISTANT,
SEND_TO_ICON_FINDER_ASSISTANT,
diff --git a/app/MindWork AI Studio/wwwroot/changelog/v0.9.29.md b/app/MindWork AI Studio/wwwroot/changelog/v0.9.29.md
index ed82eea..358146c 100644
--- a/app/MindWork AI Studio/wwwroot/changelog/v0.9.29.md
+++ b/app/MindWork AI Studio/wwwroot/changelog/v0.9.29.md
@@ -2,5 +2,6 @@
- Added the possibility to select data sources for chats. This preview feature is hidden behind the RAG feature flag, check your app options in case you want to enable it.
- Added an option to all data sources to select a local security policy. This preview feature is hidden behind the RAG feature flag.
- Added an option to preselect data sources and options for new chats. This preview feature is hidden behind the RAG feature flag.
+- Added an agent to select the appropriate data sources for any prompt. This preview feature is hidden behind the RAG feature flag.
- Improved confidence card for small spaces.
- Fixed a bug in which 'APP_SETTINGS' appeared as a valid destination in the "send to" menu.
\ No newline at end of file