diff --git a/app/MindWork AI Studio/Chat/ListContentBlockExtensions.cs b/app/MindWork AI Studio/Chat/ListContentBlockExtensions.cs index 012a660c..066d22e5 100644 --- a/app/MindWork AI Studio/Chat/ListContentBlockExtensions.cs +++ b/app/MindWork AI Studio/Chat/ListContentBlockExtensions.cs @@ -1,5 +1,6 @@ using AIStudio.Provider; using AIStudio.Provider.OpenAI; +using AIStudio.Settings; namespace AIStudio.Chat; @@ -12,21 +13,41 @@ public static class ListContentBlockExtensions /// A function that transforms each content block into a message result asynchronously. /// The selected LLM provider. /// The selected model. + /// A factory function to create text sub-content. + /// A factory function to create image sub-content. /// An asynchronous task that resolves to a list of transformed results. public static async Task> BuildMessagesAsync( this List blocks, LLMProviders selectedProvider, Model selectedModel, - Func roleTransformer) + Func roleTransformer, + Func textSubContentFactory, + Func> imageSubContentFactory) { + var capabilities = selectedProvider.GetModelCapabilities(selectedModel); + var canProcessImages = capabilities.Contains(Capability.MULTIPLE_IMAGE_INPUT) || + capabilities.Contains(Capability.SINGLE_IMAGE_INPUT); + var messageTaskList = new List>(blocks.Count); foreach (var block in blocks) { switch (block.Content) { - case ContentText text when block.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace(text.Text): + // The prompt may or may not contain image(s), but the provider/model cannot process images. + // Thus, we treat it as a regular text message. + case ContentText text when block.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace(text.Text) && !canProcessImages: messageTaskList.Add(CreateTextMessageAsync(block, text)); break; + + // The regular case for text content without images: + case ContentText text when block.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace(text.Text) && !text.FileAttachments.ContainsImages(): + messageTaskList.Add(CreateTextMessageAsync(block, text)); + break; + + // Text prompt with images as attachments, and the provider/model can process images: + case ContentText text when block.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace(text.Text) && text.FileAttachments.ContainsImages(): + messageTaskList.Add(CreateMultimodalMessageAsync(block, text, textSubContentFactory, imageSubContentFactory)); + break; } } @@ -45,8 +66,38 @@ public static class ListContentBlockExtensions Content = await text.PrepareTextContentForAI(), } as IMessageBase); } + + // Local function to create a multimodal message asynchronously. + Task CreateMultimodalMessageAsync( + ContentBlock block, + ContentText text, + Func innerTextSubContentFactory, + Func> innerImageSubContentFactory) + { + return Task.Run(async () => + { + var imagesTasks = text.FileAttachments + .Where(x => x is { IsImage: true, Exists: true }) + .Cast() + .Select(innerImageSubContentFactory) + .ToList(); + + Task.WaitAll(imagesTasks); + var images = imagesTasks.Select(t => t.Result).ToList(); + + return new MultimodalMessage + { + Role = roleTransformer(block.Role), + Content = + [ + innerTextSubContentFactory(await text.PrepareTextContentForAI()), + ..images, + ] + } as IMessageBase; + }); + } } - + /// /// Processes a list of content blocks using standard role transformations to create message results asynchronously. /// @@ -54,11 +105,28 @@ public static class ListContentBlockExtensions /// The selected LLM provider. /// The selected model. /// >An asynchronous task that resolves to a list of transformed message results. - public static async Task> BuildMessagesUsingStandardRolesAsync( + public static async Task> BuildMessagesUsingStandardsAsync( this List blocks, LLMProviders selectedProvider, - Model selectedModel) => await blocks.BuildMessagesAsync(selectedProvider, selectedModel, StandardRoleTransformer); + Model selectedModel) => await blocks.BuildMessagesAsync( + selectedProvider, + selectedModel, + StandardRoleTransformer, + StandardTextSubContentFactory, + StandardImageSubContentFactory); + private static ISubContent StandardTextSubContentFactory(string text) => new SubContentText + { + Text = text, + }; + + private static async Task StandardImageSubContentFactory(FileAttachmentImage attachment) => new SubContentImageUrl + { + ImageUrl = await attachment.TryAsBase64() is (true, var base64Content) + ? $"data:{attachment.DetermineMimeType()};base64,{base64Content}" + : string.Empty, + }; + private static string StandardRoleTransformer(ChatRole role) => role switch { ChatRole.USER => "user", diff --git a/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs b/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs index e7dea54e..a4fec2c1 100644 --- a/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs +++ b/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs @@ -40,7 +40,7 @@ public sealed class ProviderAlibabaCloud() : BaseProvider(LLMProviders.ALIBABA_C var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); // Prepare the AlibabaCloud HTTP chat request: var alibabaCloudChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs b/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs index d1745280..7f02781b 100644 --- a/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs +++ b/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs @@ -31,14 +31,38 @@ public sealed class ProviderAnthropic() : BaseProvider(LLMProviders.ANTHROPIC, " var apiParameters = this.ParseAdditionalApiParameters("system"); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesAsync(this.Provider, chatModel, role => role switch - { - ChatRole.USER => "user", - ChatRole.AI => "assistant", - ChatRole.AGENT => "assistant", + var messages = await chatThread.Blocks.BuildMessagesAsync( + this.Provider, chatModel, + + // Anthropic-specific role mapping: + role => role switch + { + ChatRole.USER => "user", + ChatRole.AI => "assistant", + ChatRole.AGENT => "assistant", - _ => "user", - }); + _ => "user", + }, + + // Anthropic uses the standard text sub-content: + text => new SubContentText + { + Text = text, + }, + + // Anthropic-specific image sub-content: + async attachment => new SubContentImage + { + Source = new SubContentBase64Image + { + Data = await attachment.TryAsBase64(token: token) is (true, var base64Content) + ? base64Content + : string.Empty, + + MediaType = attachment.DetermineMimeType(), + } + } + ); // Prepare the Anthropic HTTP chat request: var chatRequest = JsonSerializer.Serialize(new ChatRequest diff --git a/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs b/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs index cfff4db8..c7935cbb 100644 --- a/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs +++ b/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs @@ -40,7 +40,7 @@ public sealed class ProviderDeepSeek() : BaseProvider(LLMProviders.DEEP_SEEK, "h var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); // Prepare the DeepSeek HTTP chat request: var deepSeekChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs b/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs index d265f784..c09003da 100644 --- a/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs +++ b/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs @@ -40,7 +40,7 @@ public class ProviderFireworks() : BaseProvider(LLMProviders.FIREWORKS, "https:/ var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); // Prepare the Fireworks HTTP chat request: var fireworksChatRequest = JsonSerializer.Serialize(new ChatRequest diff --git a/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs b/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs index 57b4c824..f07d2a28 100644 --- a/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs +++ b/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs @@ -40,7 +40,7 @@ public sealed class ProviderGWDG() : BaseProvider(LLMProviders.GWDG, "https://ch var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); // Prepare the GWDG HTTP chat request: var gwdgChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs b/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs index a712b4e7..19f46f45 100644 --- a/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs +++ b/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs @@ -40,7 +40,7 @@ public class ProviderGoogle() : BaseProvider(LLMProviders.GOOGLE, "https://gener var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); // Prepare the Google HTTP chat request: var geminiChatRequest = JsonSerializer.Serialize(new ChatRequest diff --git a/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs b/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs index aa49ad1a..350376e7 100644 --- a/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs +++ b/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs @@ -40,7 +40,7 @@ public class ProviderGroq() : BaseProvider(LLMProviders.GROQ, "https://api.groq. var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); // Prepare the OpenAI HTTP chat request: var groqChatRequest = JsonSerializer.Serialize(new ChatRequest diff --git a/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs b/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs index 63997453..f26ff712 100644 --- a/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs +++ b/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs @@ -40,7 +40,7 @@ public sealed class ProviderHelmholtz() : BaseProvider(LLMProviders.HELMHOLTZ, " var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); // Prepare the Helmholtz HTTP chat request: var helmholtzChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs b/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs index 8add4b2b..9e91c127 100644 --- a/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs +++ b/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs @@ -45,7 +45,7 @@ public sealed class ProviderHuggingFace : BaseProvider var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var message = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel); + var message = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); // Prepare the HuggingFace HTTP chat request: var huggingfaceChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs b/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs index efc968c2..787c7819 100644 --- a/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs +++ b/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs @@ -38,7 +38,7 @@ public sealed class ProviderMistral() : BaseProvider(LLMProviders.MISTRAL, "http var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); // Prepare the Mistral HTTP chat request: var mistralChatRequest = JsonSerializer.Serialize(new ChatRequest diff --git a/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs b/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs index 80050103..d8a49567 100644 --- a/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs +++ b/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs @@ -90,15 +90,57 @@ public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https var apiParameters = this.ParseAdditionalApiParameters("input", "store", "tools"); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesAsync(this.Provider, chatModel, role => role switch - { - ChatRole.USER => "user", - ChatRole.AI => "assistant", - ChatRole.AGENT => "assistant", - ChatRole.SYSTEM => systemPromptRole, + var messages = await chatThread.Blocks.BuildMessagesAsync( + this.Provider, chatModel, - _ => "user", - }); + // OpenAI-specific role mapping: + role => role switch + { + ChatRole.USER => "user", + ChatRole.AI => "assistant", + ChatRole.AGENT => "assistant", + ChatRole.SYSTEM => systemPromptRole, + + _ => "user", + }, + + // OpenAI's text sub-content depends on the model, whether we are using + // the Responses API or the Chat Completion API: + text => usingResponsesAPI switch + { + // Responses API uses INPUT_TEXT: + true => new SubContentInputText + { + Text = text, + }, + + // Chat Completion API uses TEXT: + false => new SubContentText + { + Text = text, + }, + }, + + // OpenAI's image sub-content depends on the model as well, + // whether we are using the Responses API or the Chat Completion API: + async attachment => usingResponsesAPI switch + { + // Responses API uses INPUT_IMAGE: + true => new SubContentInputImage + { + ImageUrl = await attachment.TryAsBase64(token: token) is (true, var base64Content) + ? $"data:{attachment.DetermineMimeType()};base64,{base64Content}" + : string.Empty, + }, + + // Chat Completion API uses IMAGE_URL: + false => new SubContentImageUrl + { + ImageUrl = await attachment.TryAsBase64(token: token) is (true, var base64Content) + ? $"data:{attachment.DetermineMimeType()};base64,{base64Content}" + : string.Empty, + } + }); // // Create the request: either for the Responses API or the Chat Completion API @@ -141,6 +183,8 @@ public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https }, JSON_SERIALIZER_OPTIONS), }; + Console.WriteLine($"==============> {openAIChatRequest}"); + async Task RequestBuilder() { // Build the HTTP post request: diff --git a/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs b/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs index 9d024c5d..82b67fa4 100644 --- a/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs +++ b/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs @@ -43,7 +43,7 @@ public sealed class ProviderOpenRouter() : BaseProvider(LLMProviders.OPEN_ROUTER var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); // Prepare the OpenRouter HTTP chat request: var openRouterChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs b/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs index cfd1df16..f45d0295 100644 --- a/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs +++ b/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs @@ -49,7 +49,7 @@ public sealed class ProviderPerplexity() : BaseProvider(LLMProviders.PERPLEXITY, var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); // Prepare the Perplexity HTTP chat request: var perplexityChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs index 59d5640e..2c715fbc 100644 --- a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs +++ b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs @@ -36,7 +36,7 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); // Prepare the OpenAI HTTP chat request: var providerChatRequest = JsonSerializer.Serialize(new ChatRequest diff --git a/app/MindWork AI Studio/Provider/X/ProviderX.cs b/app/MindWork AI Studio/Provider/X/ProviderX.cs index 56189fa6..a638e342 100644 --- a/app/MindWork AI Studio/Provider/X/ProviderX.cs +++ b/app/MindWork AI Studio/Provider/X/ProviderX.cs @@ -40,7 +40,7 @@ public sealed class ProviderX() : BaseProvider(LLMProviders.X, "https://api.x.ai var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardRolesAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); // Prepare the xAI HTTP chat request: var xChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest