mirror of
https://github.com/MindWorkAI/AI-Studio.git
synced 2026-02-13 00:01:37 +00:00
Enhance BuildMessagesAsync to support multimodal content with image handling
This commit is contained in:
parent
447ec9f31a
commit
91a302be41
@ -1,5 +1,6 @@
|
||||
using AIStudio.Provider;
|
||||
using AIStudio.Provider.OpenAI;
|
||||
using AIStudio.Settings;
|
||||
|
||||
namespace AIStudio.Chat;
|
||||
|
||||
@ -12,21 +13,41 @@ public static class ListContentBlockExtensions
|
||||
/// <param name="roleTransformer">A function that transforms each content block into a message result asynchronously.</param>
|
||||
/// <param name="selectedProvider">The selected LLM provider.</param>
|
||||
/// <param name="selectedModel">The selected model.</param>
|
||||
/// <param name="textSubContentFactory">A factory function to create text sub-content.</param>
|
||||
/// <param name="imageSubContentFactory">A factory function to create image sub-content.</param>
|
||||
/// <returns>An asynchronous task that resolves to a list of transformed results.</returns>
|
||||
public static async Task<IList<IMessageBase>> BuildMessagesAsync(
|
||||
this List<ContentBlock> blocks,
|
||||
LLMProviders selectedProvider,
|
||||
Model selectedModel,
|
||||
Func<ChatRole, string> roleTransformer)
|
||||
Func<ChatRole, string> roleTransformer,
|
||||
Func<string, ISubContent> textSubContentFactory,
|
||||
Func<FileAttachmentImage, Task<ISubContent>> imageSubContentFactory)
|
||||
{
|
||||
var capabilities = selectedProvider.GetModelCapabilities(selectedModel);
|
||||
var canProcessImages = capabilities.Contains(Capability.MULTIPLE_IMAGE_INPUT) ||
|
||||
capabilities.Contains(Capability.SINGLE_IMAGE_INPUT);
|
||||
|
||||
var messageTaskList = new List<Task<IMessageBase>>(blocks.Count);
|
||||
foreach (var block in blocks)
|
||||
{
|
||||
switch (block.Content)
|
||||
{
|
||||
case ContentText text when block.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace(text.Text):
|
||||
// The prompt may or may not contain image(s), but the provider/model cannot process images.
|
||||
// Thus, we treat it as a regular text message.
|
||||
case ContentText text when block.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace(text.Text) && !canProcessImages:
|
||||
messageTaskList.Add(CreateTextMessageAsync(block, text));
|
||||
break;
|
||||
|
||||
// The regular case for text content without images:
|
||||
case ContentText text when block.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace(text.Text) && !text.FileAttachments.ContainsImages():
|
||||
messageTaskList.Add(CreateTextMessageAsync(block, text));
|
||||
break;
|
||||
|
||||
// Text prompt with images as attachments, and the provider/model can process images:
|
||||
case ContentText text when block.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace(text.Text) && text.FileAttachments.ContainsImages():
|
||||
messageTaskList.Add(CreateMultimodalMessageAsync(block, text, textSubContentFactory, imageSubContentFactory));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -45,8 +66,38 @@ public static class ListContentBlockExtensions
|
||||
Content = await text.PrepareTextContentForAI(),
|
||||
} as IMessageBase);
|
||||
}
|
||||
|
||||
// Local function to create a multimodal message asynchronously.
|
||||
Task<IMessageBase> CreateMultimodalMessageAsync(
|
||||
ContentBlock block,
|
||||
ContentText text,
|
||||
Func<string, ISubContent> innerTextSubContentFactory,
|
||||
Func<FileAttachmentImage, Task<ISubContent>> innerImageSubContentFactory)
|
||||
{
|
||||
return Task.Run(async () =>
|
||||
{
|
||||
var imagesTasks = text.FileAttachments
|
||||
.Where(x => x is { IsImage: true, Exists: true })
|
||||
.Cast<FileAttachmentImage>()
|
||||
.Select(innerImageSubContentFactory)
|
||||
.ToList();
|
||||
|
||||
Task.WaitAll(imagesTasks);
|
||||
var images = imagesTasks.Select(t => t.Result).ToList();
|
||||
|
||||
return new MultimodalMessage
|
||||
{
|
||||
Role = roleTransformer(block.Role),
|
||||
Content =
|
||||
[
|
||||
innerTextSubContentFactory(await text.PrepareTextContentForAI()),
|
||||
..images,
|
||||
]
|
||||
} as IMessageBase;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Processes a list of content blocks using standard role transformations to create message results asynchronously.
|
||||
/// </summary>
|
||||
@ -54,11 +105,28 @@ public static class ListContentBlockExtensions
|
||||
/// <param name="selectedProvider">The selected LLM provider.</param>
|
||||
/// <param name="selectedModel">The selected model.</param>
|
||||
/// <returns>>An asynchronous task that resolves to a list of transformed message results.</returns>
|
||||
public static async Task<IList<IMessageBase>> BuildMessagesUsingStandardRolesAsync(
|
||||
public static async Task<IList<IMessageBase>> BuildMessagesUsingStandardsAsync(
|
||||
this List<ContentBlock> blocks,
|
||||
LLMProviders selectedProvider,
|
||||
Model selectedModel) => await blocks.BuildMessagesAsync(selectedProvider, selectedModel, StandardRoleTransformer);
|
||||
Model selectedModel) => await blocks.BuildMessagesAsync(
|
||||
selectedProvider,
|
||||
selectedModel,
|
||||
StandardRoleTransformer,
|
||||
StandardTextSubContentFactory,
|
||||
StandardImageSubContentFactory);
|
||||
|
||||
private static ISubContent StandardTextSubContentFactory(string text) => new SubContentText
|
||||
{
|
||||
Text = text,
|
||||
};
|
||||
|
||||
private static async Task<ISubContent> StandardImageSubContentFactory(FileAttachmentImage attachment) => new SubContentImageUrl
|
||||
{
|
||||
ImageUrl = await attachment.TryAsBase64() is (true, var base64Content)
|
||||
? $"data:{attachment.DetermineMimeType()};base64,{base64Content}"
|
||||
: string.Empty,
|
||||
};
|
||||
|
||||
private static string StandardRoleTransformer(ChatRole role) => role switch
|
||||
{
|
||||
ChatRole.USER => "user",
|
||||
|
||||
@ -40,7 +40,7 @@ public sealed class ProviderAlibabaCloud() : BaseProvider(LLMProviders.ALIBABA_C
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the AlibabaCloud HTTP chat request:
|
||||
var alibabaCloudChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest
|
||||
|
||||
@ -31,14 +31,38 @@ public sealed class ProviderAnthropic() : BaseProvider(LLMProviders.ANTHROPIC, "
|
||||
var apiParameters = this.ParseAdditionalApiParameters("system");
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesAsync(this.Provider, chatModel, role => role switch
|
||||
{
|
||||
ChatRole.USER => "user",
|
||||
ChatRole.AI => "assistant",
|
||||
ChatRole.AGENT => "assistant",
|
||||
var messages = await chatThread.Blocks.BuildMessagesAsync(
|
||||
this.Provider, chatModel,
|
||||
|
||||
// Anthropic-specific role mapping:
|
||||
role => role switch
|
||||
{
|
||||
ChatRole.USER => "user",
|
||||
ChatRole.AI => "assistant",
|
||||
ChatRole.AGENT => "assistant",
|
||||
|
||||
_ => "user",
|
||||
});
|
||||
_ => "user",
|
||||
},
|
||||
|
||||
// Anthropic uses the standard text sub-content:
|
||||
text => new SubContentText
|
||||
{
|
||||
Text = text,
|
||||
},
|
||||
|
||||
// Anthropic-specific image sub-content:
|
||||
async attachment => new SubContentImage
|
||||
{
|
||||
Source = new SubContentBase64Image
|
||||
{
|
||||
Data = await attachment.TryAsBase64(token: token) is (true, var base64Content)
|
||||
? base64Content
|
||||
: string.Empty,
|
||||
|
||||
MediaType = attachment.DetermineMimeType(),
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// Prepare the Anthropic HTTP chat request:
|
||||
var chatRequest = JsonSerializer.Serialize(new ChatRequest
|
||||
|
||||
@ -40,7 +40,7 @@ public sealed class ProviderDeepSeek() : BaseProvider(LLMProviders.DEEP_SEEK, "h
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the DeepSeek HTTP chat request:
|
||||
var deepSeekChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest
|
||||
|
||||
@ -40,7 +40,7 @@ public class ProviderFireworks() : BaseProvider(LLMProviders.FIREWORKS, "https:/
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the Fireworks HTTP chat request:
|
||||
var fireworksChatRequest = JsonSerializer.Serialize(new ChatRequest
|
||||
|
||||
@ -40,7 +40,7 @@ public sealed class ProviderGWDG() : BaseProvider(LLMProviders.GWDG, "https://ch
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the GWDG HTTP chat request:
|
||||
var gwdgChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest
|
||||
|
||||
@ -40,7 +40,7 @@ public class ProviderGoogle() : BaseProvider(LLMProviders.GOOGLE, "https://gener
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the Google HTTP chat request:
|
||||
var geminiChatRequest = JsonSerializer.Serialize(new ChatRequest
|
||||
|
||||
@ -40,7 +40,7 @@ public class ProviderGroq() : BaseProvider(LLMProviders.GROQ, "https://api.groq.
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the OpenAI HTTP chat request:
|
||||
var groqChatRequest = JsonSerializer.Serialize(new ChatRequest
|
||||
|
||||
@ -40,7 +40,7 @@ public sealed class ProviderHelmholtz() : BaseProvider(LLMProviders.HELMHOLTZ, "
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the Helmholtz HTTP chat request:
|
||||
var helmholtzChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest
|
||||
|
||||
@ -45,7 +45,7 @@ public sealed class ProviderHuggingFace : BaseProvider
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var message = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel);
|
||||
var message = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the HuggingFace HTTP chat request:
|
||||
var huggingfaceChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest
|
||||
|
||||
@ -38,7 +38,7 @@ public sealed class ProviderMistral() : BaseProvider(LLMProviders.MISTRAL, "http
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the Mistral HTTP chat request:
|
||||
var mistralChatRequest = JsonSerializer.Serialize(new ChatRequest
|
||||
|
||||
@ -90,15 +90,57 @@ public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https
|
||||
var apiParameters = this.ParseAdditionalApiParameters("input", "store", "tools");
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesAsync(this.Provider, chatModel, role => role switch
|
||||
{
|
||||
ChatRole.USER => "user",
|
||||
ChatRole.AI => "assistant",
|
||||
ChatRole.AGENT => "assistant",
|
||||
ChatRole.SYSTEM => systemPromptRole,
|
||||
var messages = await chatThread.Blocks.BuildMessagesAsync(
|
||||
this.Provider, chatModel,
|
||||
|
||||
_ => "user",
|
||||
});
|
||||
// OpenAI-specific role mapping:
|
||||
role => role switch
|
||||
{
|
||||
ChatRole.USER => "user",
|
||||
ChatRole.AI => "assistant",
|
||||
ChatRole.AGENT => "assistant",
|
||||
ChatRole.SYSTEM => systemPromptRole,
|
||||
|
||||
_ => "user",
|
||||
},
|
||||
|
||||
// OpenAI's text sub-content depends on the model, whether we are using
|
||||
// the Responses API or the Chat Completion API:
|
||||
text => usingResponsesAPI switch
|
||||
{
|
||||
// Responses API uses INPUT_TEXT:
|
||||
true => new SubContentInputText
|
||||
{
|
||||
Text = text,
|
||||
},
|
||||
|
||||
// Chat Completion API uses TEXT:
|
||||
false => new SubContentText
|
||||
{
|
||||
Text = text,
|
||||
},
|
||||
},
|
||||
|
||||
// OpenAI's image sub-content depends on the model as well,
|
||||
// whether we are using the Responses API or the Chat Completion API:
|
||||
async attachment => usingResponsesAPI switch
|
||||
{
|
||||
// Responses API uses INPUT_IMAGE:
|
||||
true => new SubContentInputImage
|
||||
{
|
||||
ImageUrl = await attachment.TryAsBase64(token: token) is (true, var base64Content)
|
||||
? $"data:{attachment.DetermineMimeType()};base64,{base64Content}"
|
||||
: string.Empty,
|
||||
},
|
||||
|
||||
// Chat Completion API uses IMAGE_URL:
|
||||
false => new SubContentImageUrl
|
||||
{
|
||||
ImageUrl = await attachment.TryAsBase64(token: token) is (true, var base64Content)
|
||||
? $"data:{attachment.DetermineMimeType()};base64,{base64Content}"
|
||||
: string.Empty,
|
||||
}
|
||||
});
|
||||
|
||||
//
|
||||
// Create the request: either for the Responses API or the Chat Completion API
|
||||
@ -141,6 +183,8 @@ public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https
|
||||
}, JSON_SERIALIZER_OPTIONS),
|
||||
};
|
||||
|
||||
Console.WriteLine($"==============> {openAIChatRequest}");
|
||||
|
||||
async Task<HttpRequestMessage> RequestBuilder()
|
||||
{
|
||||
// Build the HTTP post request:
|
||||
|
||||
@ -43,7 +43,7 @@ public sealed class ProviderOpenRouter() : BaseProvider(LLMProviders.OPEN_ROUTER
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the OpenRouter HTTP chat request:
|
||||
var openRouterChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest
|
||||
|
||||
@ -49,7 +49,7 @@ public sealed class ProviderPerplexity() : BaseProvider(LLMProviders.PERPLEXITY,
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the Perplexity HTTP chat request:
|
||||
var perplexityChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest
|
||||
|
||||
@ -36,7 +36,7 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the OpenAI HTTP chat request:
|
||||
var providerChatRequest = JsonSerializer.Serialize(new ChatRequest
|
||||
|
||||
@ -40,7 +40,7 @@ public sealed class ProviderX() : BaseProvider(LLMProviders.X, "https://api.x.ai
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the xAI HTTP chat request:
|
||||
var xChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest
|
||||
|
||||
Loading…
Reference in New Issue
Block a user