AI-Studio/app/MindWork AI Studio/Chat/ListContentBlockExtensions.cs

180 lines
8.1 KiB
C#
Raw Permalink Normal View History

2025-12-28 13:10:20 +00:00
using AIStudio.Provider;
2025-12-30 17:30:32 +00:00
using AIStudio.Provider.OpenAI;
using AIStudio.Settings;
2025-12-28 13:10:20 +00:00
namespace AIStudio.Chat;
public static class ListContentBlockExtensions
{
/// <summary>
/// Processes a list of content blocks by transforming them into a collection of message results asynchronously.
/// </summary>
/// <param name="blocks">The list of content blocks to process.</param>
2025-12-30 17:30:32 +00:00
/// <param name="roleTransformer">A function that transforms each content block into a message result asynchronously.</param>
/// <param name="selectedProvider">The selected LLM provider.</param>
/// <param name="selectedModel">The selected model.</param>
/// <param name="textSubContentFactory">A factory function to create text sub-content.</param>
/// <param name="imageSubContentFactory">A factory function to create image sub-content.</param>
/// <returns>An asynchronous task that resolves to a list of transformed results.</returns>
2025-12-30 17:30:32 +00:00
public static async Task<IList<IMessageBase>> BuildMessagesAsync(
this List<ContentBlock> blocks,
LLMProviders selectedProvider,
Model selectedModel,
Func<ChatRole, string> roleTransformer,
Func<string, ISubContent> textSubContentFactory,
Func<FileAttachmentImage, Task<ISubContent>> imageSubContentFactory)
{
2025-12-30 17:30:32 +00:00
var capabilities = selectedProvider.GetModelCapabilities(selectedModel);
var canProcessImages = capabilities.Contains(Capability.MULTIPLE_IMAGE_INPUT) ||
capabilities.Contains(Capability.SINGLE_IMAGE_INPUT);
2025-12-30 17:30:32 +00:00
var messageTaskList = new List<Task<IMessageBase>>(blocks.Count);
foreach (var block in blocks)
{
switch (block.Content)
{
// The prompt may or may not contain image(s), but the provider/model cannot process images.
// Thus, we treat it as a regular text message.
case ContentText text when block.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace(text.Text) && !canProcessImages:
messageTaskList.Add(CreateTextMessageAsync(block, text));
break;
// The regular case for text content without images:
case ContentText text when block.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace(text.Text) && !text.FileAttachments.ContainsImages():
messageTaskList.Add(CreateTextMessageAsync(block, text));
break;
// Text prompt with images as attachments, and the provider/model can process images:
case ContentText text when block.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace(text.Text) && text.FileAttachments.ContainsImages():
messageTaskList.Add(CreateMultimodalMessageAsync(block, text, textSubContentFactory, imageSubContentFactory));
break;
}
}
2025-12-30 17:30:32 +00:00
// Await all messages:
await Task.WhenAll(messageTaskList);
// Select all results:
2025-12-30 17:30:32 +00:00
return messageTaskList.Select(n => n.Result).ToList();
// Local function to create a text message asynchronously.
Task<IMessageBase> CreateTextMessageAsync(ContentBlock block, ContentText text)
{
return Task.Run(async () => new TextMessage
{
Role = roleTransformer(block.Role),
Content = await text.PrepareTextContentForAI(),
} as IMessageBase);
}
// Local function to create a multimodal message asynchronously.
Task<IMessageBase> CreateMultimodalMessageAsync(
ContentBlock block,
ContentText text,
Func<string, ISubContent> innerTextSubContentFactory,
Func<FileAttachmentImage, Task<ISubContent>> innerImageSubContentFactory)
{
return Task.Run(async () =>
{
var imagesTasks = text.FileAttachments
.Where(x => x is { IsImage: true, Exists: true })
.Cast<FileAttachmentImage>()
.Select(innerImageSubContentFactory)
.ToList();
Task.WaitAll(imagesTasks);
var images = imagesTasks.Select(t => t.Result).ToList();
return new MultimodalMessage
{
Role = roleTransformer(block.Role),
Content =
[
innerTextSubContentFactory(await text.PrepareTextContentForAI()),
..images,
]
} as IMessageBase;
});
}
}
2025-12-30 17:30:32 +00:00
/// <summary>
/// Processes a list of content blocks using direct image URL format to create message results asynchronously.
/// </summary>
/// <param name="blocks">The list of content blocks to process.</param>
/// <param name="selectedProvider">The selected LLM provider.</param>
/// <param name="selectedModel">The selected model.</param>
/// <returns>An asynchronous task that resolves to a list of transformed message results.</returns>
/// <remarks>
/// Uses direct image URL format where the image data is placed directly in the image_url field:
/// <code>
/// { "type": "image_url", "image_url": "data:image/jpeg;base64,..." }
/// </code>
/// This format is used by OpenAI, Mistral, and Ollama.
/// </remarks>
public static async Task<IList<IMessageBase>> BuildMessagesUsingDirectImageUrlAsync(
this List<ContentBlock> blocks,
LLMProviders selectedProvider,
Model selectedModel) => await blocks.BuildMessagesAsync(
selectedProvider,
selectedModel,
StandardRoleTransformer,
StandardTextSubContentFactory,
DirectImageSubContentFactory);
/// <summary>
/// Processes a list of content blocks using nested image URL format to create message results asynchronously.
/// </summary>
/// <param name="blocks">The list of content blocks to process.</param>
/// <param name="selectedProvider">The selected LLM provider.</param>
/// <param name="selectedModel">The selected model.</param>
/// <returns>An asynchronous task that resolves to a list of transformed message results.</returns>
/// <remarks>
/// Uses nested image URL format where the image data is wrapped in an object:
/// <code>
/// { "type": "image_url", "image_url": { "url": "data:image/jpeg;base64,..." } }
/// </code>
/// This format is used by LM Studio, VLLM, llama.cpp, and other OpenAI-compatible providers.
/// </remarks>
public static async Task<IList<IMessageBase>> BuildMessagesUsingNestedImageUrlAsync(
this List<ContentBlock> blocks,
LLMProviders selectedProvider,
Model selectedModel) => await blocks.BuildMessagesAsync(
selectedProvider,
selectedModel,
StandardRoleTransformer,
StandardTextSubContentFactory,
NestedImageSubContentFactory);
private static ISubContent StandardTextSubContentFactory(string text) => new SubContentText
{
Text = text,
};
private static async Task<ISubContent> DirectImageSubContentFactory(FileAttachmentImage attachment) => new SubContentImageUrl
{
ImageUrl = await attachment.TryAsBase64() is (true, var base64Content)
? $"data:{attachment.DetermineMimeType()};base64,{base64Content}"
: string.Empty,
};
private static async Task<ISubContent> NestedImageSubContentFactory(FileAttachmentImage attachment) => new SubContentImageUrlNested
{
ImageUrl = new SubContentImageUrlData
{
Url = await attachment.TryAsBase64() is (true, var base64Content)
? $"data:{attachment.DetermineMimeType()};base64,{base64Content}"
: string.Empty,
},
};
private static string StandardRoleTransformer(ChatRole role) => role switch
{
ChatRole.USER => "user",
ChatRole.AI => "assistant",
ChatRole.AGENT => "assistant",
ChatRole.SYSTEM => "system",
_ => "user",
};
}