Added a common augmentation interface & default implementation (#288)

This commit is contained in:
Thorsten Sommer 2025-02-18 11:24:43 +01:00 committed by GitHub
parent f01cf498e2
commit 96e6372fcd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 249 additions and 62 deletions

View File

@ -20,7 +20,7 @@ Things we are currently working on:
- [ ] Runtime: Integration of the vector database [LanceDB](https://github.com/lancedb/lancedb)
- [ ] App: Implement the continuous process of vectorizing data
- [x] ~~App: Define a common retrieval context interface for the integration of RAG processes in chats (PR [#281](https://github.com/MindWorkAI/AI-Studio/pull/281), [#284](https://github.com/MindWorkAI/AI-Studio/pull/284), [#286](https://github.com/MindWorkAI/AI-Studio/pull/286), [#287](https://github.com/MindWorkAI/AI-Studio/pull/287))~~
- [ ] App: Define a common augmentation interface for the integration of RAG processes in chats
- [x] ~~App: Define a common augmentation interface for the integration of RAG processes in chats (PR [#288](https://github.com/MindWorkAI/AI-Studio/pull/288))~~
- [x] ~~App: Integrate data sources in chats (PR [#282](https://github.com/MindWorkAI/AI-Studio/pull/282))~~

View File

@ -7,6 +7,7 @@
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=MSG/@EntryIndexedValue">MSG</s:String>
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=RAG/@EntryIndexedValue">RAG</s:String>
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=UI/@EntryIndexedValue">UI</s:String>
<s:Boolean x:Key="/Default/UserDictionary/Words/=agentic/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=groq/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=ollama/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=tauri_0027s/@EntryIndexedValue">True</s:Boolean></wpf:ResourceDictionary>

View File

@ -12,6 +12,7 @@ public enum ChatRole
USER,
AI,
AGENT,
RAG,
}
/// <summary>

View File

@ -7,7 +7,7 @@ namespace AIStudio.Chat;
/// <summary>
/// Represents an image inside the chat.
/// </summary>
public sealed class ContentImage : IContent
public sealed class ContentImage : IContent, IImageSource
{
#region Implementation of IContent
@ -47,62 +47,4 @@ public sealed class ContentImage : IContent
/// The image source.
/// </summary>
public required string Source { get; set; }
/// <summary>
/// Read the image content as a base64 string.
/// </summary>
/// <remarks>
/// The images are directly converted to base64 strings. The maximum
/// size of the image is around 10 MB. If the image is larger, the method
/// returns an empty string.
///
/// As of now, this method does no sort of image processing. LLMs usually
/// do not work with arbitrary image sizes. In the future, we might have
/// to resize the images before sending them to the model.
/// </remarks>
/// <param name="token">The cancellation token.</param>
/// <returns>The image content as a base64 string; might be empty.</returns>
public async Task<string> AsBase64(CancellationToken token = default)
{
switch (this.SourceType)
{
case ContentImageSource.BASE64:
return this.Source;
case ContentImageSource.URL:
{
using var httpClient = new HttpClient();
using var response = await httpClient.GetAsync(this.Source, HttpCompletionOption.ResponseHeadersRead, token);
if(response.IsSuccessStatusCode)
{
// Read the length of the content:
var lengthBytes = response.Content.Headers.ContentLength;
if(lengthBytes > 10_000_000)
return string.Empty;
var bytes = await response.Content.ReadAsByteArrayAsync(token);
return Convert.ToBase64String(bytes);
}
return string.Empty;
}
case ContentImageSource.LOCAL_PATH:
if(File.Exists(this.Source))
{
// Read the content length:
var length = new FileInfo(this.Source).Length;
if(length > 10_000_000)
return string.Empty;
var bytes = await File.ReadAllBytesAsync(this.Source, token);
return Convert.ToBase64String(bytes);
}
return string.Empty;
default:
return string.Empty;
}
}
}

View File

@ -0,0 +1,17 @@
namespace AIStudio.Chat;
public interface IImageSource
{
/// <summary>
/// The type of the image source.
/// </summary>
/// <remarks>
/// Is the image source a URL, a local file path, a base64 string, etc.?
/// </remarks>
public ContentImageSource SourceType { get; init; }
/// <summary>
/// The image source.
/// </summary>
public string Source { get; set; }
}

View File

@ -0,0 +1,63 @@
namespace AIStudio.Chat;
public static class IImageSourceExtensions
{
/// <summary>
/// Read the image content as a base64 string.
/// </summary>
/// <remarks>
/// The images are directly converted to base64 strings. The maximum
/// size of the image is around 10 MB. If the image is larger, the method
/// returns an empty string.
///
/// As of now, this method does no sort of image processing. LLMs usually
/// do not work with arbitrary image sizes. In the future, we might have
/// to resize the images before sending them to the model.
/// </remarks>
/// <param name="image">The image source.</param>
/// <param name="token">The cancellation token.</param>
/// <returns>The image content as a base64 string; might be empty.</returns>
public static async Task<string> AsBase64(this IImageSource image, CancellationToken token = default)
{
switch (image.SourceType)
{
case ContentImageSource.BASE64:
return image.Source;
case ContentImageSource.URL:
{
using var httpClient = new HttpClient();
using var response = await httpClient.GetAsync(image.Source, HttpCompletionOption.ResponseHeadersRead, token);
if(response.IsSuccessStatusCode)
{
// Read the length of the content:
var lengthBytes = response.Content.Headers.ContentLength;
if(lengthBytes > 10_000_000)
return string.Empty;
var bytes = await response.Content.ReadAsByteArrayAsync(token);
return Convert.ToBase64String(bytes);
}
return string.Empty;
}
case ContentImageSource.LOCAL_PATH:
if(File.Exists(image.Source))
{
// Read the content length:
var length = new FileInfo(image.Source).Length;
if(length > 10_000_000)
return string.Empty;
var bytes = await File.ReadAllBytesAsync(image.Source, token);
return Convert.ToBase64String(bytes);
}
return string.Empty;
default:
return string.Empty;
}
}
}

View File

@ -37,6 +37,7 @@ public sealed class ProviderAnthropic(ILogger logger) : BaseProvider("https://ap
ChatRole.USER => "user",
ChatRole.AI => "assistant",
ChatRole.AGENT => "assistant",
ChatRole.RAG => "assistant",
_ => "user",
},

View File

@ -49,6 +49,7 @@ public class ProviderFireworks(ILogger logger) : BaseProvider("https://api.firew
ChatRole.AI => "assistant",
ChatRole.AGENT => "assistant",
ChatRole.SYSTEM => "system",
ChatRole.RAG => "assistant",
_ => "user",
},

View File

@ -50,6 +50,7 @@ public class ProviderGoogle(ILogger logger) : BaseProvider("https://generativela
ChatRole.AI => "assistant",
ChatRole.AGENT => "assistant",
ChatRole.SYSTEM => "system",
ChatRole.RAG => "assistant",
_ => "user",
},

View File

@ -50,6 +50,7 @@ public class ProviderGroq(ILogger logger) : BaseProvider("https://api.groq.com/o
ChatRole.AI => "assistant",
ChatRole.AGENT => "assistant",
ChatRole.SYSTEM => "system",
ChatRole.RAG => "assistant",
_ => "user",
},

View File

@ -48,6 +48,7 @@ public sealed class ProviderMistral(ILogger logger) : BaseProvider("https://api.
ChatRole.AI => "assistant",
ChatRole.AGENT => "assistant",
ChatRole.SYSTEM => "system",
ChatRole.RAG => "assistant",
_ => "user",
},

View File

@ -76,6 +76,7 @@ public sealed class ProviderOpenAI(ILogger logger) : BaseProvider("https://api.o
ChatRole.USER => "user",
ChatRole.AI => "assistant",
ChatRole.AGENT => "assistant",
ChatRole.RAG => "assistant",
ChatRole.SYSTEM => systemPromptRole,
_ => "user",

View File

@ -46,6 +46,7 @@ public sealed class ProviderSelfHosted(ILogger logger, Host host, string hostnam
ChatRole.AI => "assistant",
ChatRole.AGENT => "assistant",
ChatRole.SYSTEM => "system",
ChatRole.RAG => "assistant",
_ => "user",
},

View File

@ -50,6 +50,7 @@ public sealed class ProviderX(ILogger logger) : BaseProvider("https://api.x.ai/v
ChatRole.AI => "assistant",
ChatRole.AGENT => "assistant",
ChatRole.SYSTEM => "system",
ChatRole.RAG => "assistant",
_ => "user",
},

View File

@ -0,0 +1,119 @@
using System.Text;
using AIStudio.Chat;
using AIStudio.Provider;
namespace AIStudio.Tools.RAG.AugmentationProcesses;
public sealed class AugmentationOne : IAugmentationProcess
{
#region Implementation of IAugmentationProcess
/// <inheritdoc />
public string TechnicalName => "AugmentationOne";
/// <inheritdoc />
public string UIName => "Standard augmentation process";
/// <inheritdoc />
public string Description => "This is the standard augmentation process, which uses all retrieval contexts to augment the chat thread.";
/// <inheritdoc />
public async Task<ChatThread> ProcessAsync(IProvider provider, IContent lastPrompt, ChatThread chatThread, IReadOnlyList<IRetrievalContext> retrievalContexts, CancellationToken token = default)
{
var logger = Program.SERVICE_PROVIDER.GetService<ILogger<AugmentationOne>>()!;
if(retrievalContexts.Count == 0)
{
logger.LogWarning("No retrieval contexts were issued. Skipping the augmentation process.");
return chatThread;
}
var numTotalRetrievalContexts = retrievalContexts.Count;
logger.LogInformation($"Starting the augmentation process over {numTotalRetrievalContexts:###,###,###,###} retrieval contexts.");
//
// We build a huge prompt from all retrieval contexts:
//
var sb = new StringBuilder();
sb.AppendLine("The following useful information will help you in processing the user prompt:");
sb.AppendLine();
var index = 0;
foreach(var retrievalContext in retrievalContexts)
{
index++;
sb.AppendLine($"# Retrieval context {index} of {numTotalRetrievalContexts}");
sb.AppendLine($"Data source name: {retrievalContext.DataSourceName}");
sb.AppendLine($"Content category: {retrievalContext.Category}");
sb.AppendLine($"Content type: {retrievalContext.Type}");
sb.AppendLine($"Content path: {retrievalContext.Path}");
if(retrievalContext.Links.Count > 0)
{
sb.AppendLine("Additional links:");
foreach(var link in retrievalContext.Links)
sb.AppendLine($"- {link}");
}
switch(retrievalContext)
{
case RetrievalTextContext textContext:
sb.AppendLine();
sb.AppendLine("Matched text content:");
sb.AppendLine("````");
sb.AppendLine(textContext.MatchedText);
sb.AppendLine("````");
if(textContext.SurroundingContent.Count > 0)
{
sb.AppendLine();
sb.AppendLine("Surrounding text content:");
foreach(var surrounding in textContext.SurroundingContent)
{
sb.AppendLine();
sb.AppendLine("````");
sb.AppendLine(surrounding);
sb.AppendLine("````");
}
}
break;
case RetrievalImageContext imageContext:
sb.AppendLine();
sb.AppendLine("Matched image content as base64-encoded data:");
sb.AppendLine("````");
sb.AppendLine(await imageContext.AsBase64(token));
sb.AppendLine("````");
break;
default:
logger.LogWarning($"The retrieval content type '{retrievalContext.Type}' of data source '{retrievalContext.DataSourceName}' at location '{retrievalContext.Path}' is not supported yet.");
break;
}
sb.AppendLine();
}
//
// Append the entire augmentation to the chat thread,
// just before the user prompt:
//
chatThread.Blocks.Insert(chatThread.Blocks.Count - 1, new()
{
Role = ChatRole.RAG,
Time = DateTimeOffset.UtcNow,
ContentType = ContentType.TEXT,
HideFromUser = true,
Content = new ContentText
{
Text = sb.ToString(),
}
});
return chatThread;
}
#endregion
}

View File

@ -0,0 +1,33 @@
using AIStudio.Chat;
using AIStudio.Provider;
namespace AIStudio.Tools.RAG;
public interface IAugmentationProcess
{
/// <summary>
/// How is the augmentation process called?
/// </summary>
public string TechnicalName { get; }
/// <summary>
/// How is the augmentation process called in the UI?
/// </summary>
public string UIName { get; }
/// <summary>
/// How works the augmentation process?
/// </summary>
public string Description { get; }
/// <summary>
/// Starts the augmentation process.
/// </summary>
/// <param name="provider">The LLM provider. Gets used, e.g., for automatic retrieval context validation.</param>
/// <param name="lastPrompt">The last prompt that was issued by the user.</param>
/// <param name="chatThread">The chat thread.</param>
/// <param name="retrievalContexts">The retrieval contexts that were issued by the retrieval process.</param>
/// <param name="token">The cancellation token.</param>
/// <returns>The altered chat thread.</returns>
public Task<ChatThread> ProcessAsync(IProvider provider, IContent lastPrompt, ChatThread chatThread, IReadOnlyList<IRetrievalContext> retrievalContexts, CancellationToken token = default);
}

View File

@ -1,6 +1,7 @@
using AIStudio.Chat;
using AIStudio.Provider;
using AIStudio.Settings;
using AIStudio.Tools.RAG.AugmentationProcesses;
using AIStudio.Tools.RAG.DataSourceSelectionProcesses;
using AIStudio.Tools.Services;
@ -106,7 +107,8 @@ public sealed class AISrcSelWithRetCtxVal : IRagProcess
//
if (proceedWithRAG)
{
var augmentationProcess = new AugmentationOne();
chatThread = await augmentationProcess.ProcessAsync(provider, lastPrompt, chatThread, dataContexts, token);
}
}

View File

@ -2,7 +2,7 @@ using AIStudio.Chat;
namespace AIStudio.Tools.RAG;
public sealed class RetrievalImageContext : IRetrievalContext
public sealed class RetrievalImageContext : IRetrievalContext, IImageSource
{
#region Implementation of IRetrievalContext

View File

@ -3,5 +3,6 @@
- Added an option to all data sources to select a local security policy. This preview feature is hidden behind the RAG feature flag.
- Added an option to preselect data sources and options for new chats. This preview feature is hidden behind the RAG feature flag.
- Added an agent to select the appropriate data sources for any prompt. This preview feature is hidden behind the RAG feature flag.
- Added a generic RAG process to integrate possibly any data in your chats. Although the generic RAG process is now implemented, the retrieval part is working only with external data sources using the ERI interface. That means that you could integrate your company's data from the corporate network by now. The retrieval process for your local data is still under development and will take several weeks to be released. In order to use data through ERI, you (or your company) have to develop an ERI server. You might use the ERI server assistant within AI Studio to do so. This preview feature is hidden behind the RAG feature flag.
- Improved confidence card for small spaces.
- Fixed a bug in which 'APP_SETTINGS' appeared as a valid destination in the "send to" menu.