diff --git a/app/MindWork AI Studio/Chat/ListContentBlockExtensions.cs b/app/MindWork AI Studio/Chat/ListContentBlockExtensions.cs index 066d22e5..5da41e80 100644 --- a/app/MindWork AI Studio/Chat/ListContentBlockExtensions.cs +++ b/app/MindWork AI Studio/Chat/ListContentBlockExtensions.cs @@ -99,13 +99,20 @@ public static class ListContentBlockExtensions } /// - /// Processes a list of content blocks using standard role transformations to create message results asynchronously. + /// Processes a list of content blocks using direct image URL format to create message results asynchronously. /// /// The list of content blocks to process. /// The selected LLM provider. /// The selected model. - /// >An asynchronous task that resolves to a list of transformed message results. - public static async Task> BuildMessagesUsingStandardsAsync( + /// An asynchronous task that resolves to a list of transformed message results. + /// + /// Uses direct image URL format where the image data is placed directly in the image_url field: + /// + /// { "type": "image_url", "image_url": "data:image/jpeg;base64,..." } + /// + /// This format is used by OpenAI, Mistral, and Ollama. + /// + public static async Task> BuildMessagesUsingDirectImageUrlAsync( this List blocks, LLMProviders selectedProvider, Model selectedModel) => await blocks.BuildMessagesAsync( @@ -113,20 +120,54 @@ public static class ListContentBlockExtensions selectedModel, StandardRoleTransformer, StandardTextSubContentFactory, - StandardImageSubContentFactory); + DirectImageSubContentFactory); + + /// + /// Processes a list of content blocks using nested image URL format to create message results asynchronously. + /// + /// The list of content blocks to process. + /// The selected LLM provider. + /// The selected model. + /// An asynchronous task that resolves to a list of transformed message results. + /// + /// Uses nested image URL format where the image data is wrapped in an object: + /// + /// { "type": "image_url", "image_url": { "url": "data:image/jpeg;base64,..." } } + /// + /// This format is used by LM Studio, VLLM, llama.cpp, and other OpenAI-compatible providers. + /// + public static async Task> BuildMessagesUsingNestedImageUrlAsync( + this List blocks, + LLMProviders selectedProvider, + Model selectedModel) => await blocks.BuildMessagesAsync( + selectedProvider, + selectedModel, + StandardRoleTransformer, + StandardTextSubContentFactory, + NestedImageSubContentFactory); private static ISubContent StandardTextSubContentFactory(string text) => new SubContentText { Text = text, }; - - private static async Task StandardImageSubContentFactory(FileAttachmentImage attachment) => new SubContentImageUrl + + private static async Task DirectImageSubContentFactory(FileAttachmentImage attachment) => new SubContentImageUrl { ImageUrl = await attachment.TryAsBase64() is (true, var base64Content) ? $"data:{attachment.DetermineMimeType()};base64,{base64Content}" : string.Empty, }; - + + private static async Task NestedImageSubContentFactory(FileAttachmentImage attachment) => new SubContentImageUrlNested + { + ImageUrl = new SubContentImageUrlData + { + Url = await attachment.TryAsBase64() is (true, var base64Content) + ? $"data:{attachment.DetermineMimeType()};base64,{base64Content}" + : string.Empty, + }, + }; + private static string StandardRoleTransformer(ChatRole role) => role switch { ChatRole.USER => "user", diff --git a/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs b/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs index a4fec2c1..dacfeea5 100644 --- a/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs +++ b/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs @@ -40,7 +40,7 @@ public sealed class ProviderAlibabaCloud() : BaseProvider(LLMProviders.ALIBABA_C var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel); // Prepare the AlibabaCloud HTTP chat request: var alibabaCloudChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/BaseProvider.cs b/app/MindWork AI Studio/Provider/BaseProvider.cs index 48e7716e..62464910 100644 --- a/app/MindWork AI Studio/Provider/BaseProvider.cs +++ b/app/MindWork AI Studio/Provider/BaseProvider.cs @@ -49,6 +49,7 @@ public abstract class BaseProvider : IProvider, ISecretId new MessageBaseConverter(), new SubContentConverter(), new SubContentImageSourceConverter(), + new SubContentImageUrlConverter(), }, AllowTrailingCommas = false }; diff --git a/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs b/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs index c7935cbb..4b597601 100644 --- a/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs +++ b/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs @@ -40,7 +40,7 @@ public sealed class ProviderDeepSeek() : BaseProvider(LLMProviders.DEEP_SEEK, "h var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingDirectImageUrlAsync(this.Provider, chatModel); // Prepare the DeepSeek HTTP chat request: var deepSeekChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs b/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs index c09003da..5e056674 100644 --- a/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs +++ b/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs @@ -40,7 +40,7 @@ public class ProviderFireworks() : BaseProvider(LLMProviders.FIREWORKS, "https:/ var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel); // Prepare the Fireworks HTTP chat request: var fireworksChatRequest = JsonSerializer.Serialize(new ChatRequest diff --git a/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs b/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs index f07d2a28..deecd6d5 100644 --- a/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs +++ b/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs @@ -40,7 +40,7 @@ public sealed class ProviderGWDG() : BaseProvider(LLMProviders.GWDG, "https://ch var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel); // Prepare the GWDG HTTP chat request: var gwdgChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs b/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs index 19f46f45..a7cb6daa 100644 --- a/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs +++ b/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs @@ -40,7 +40,7 @@ public class ProviderGoogle() : BaseProvider(LLMProviders.GOOGLE, "https://gener var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel); // Prepare the Google HTTP chat request: var geminiChatRequest = JsonSerializer.Serialize(new ChatRequest diff --git a/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs b/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs index 350376e7..60d449b0 100644 --- a/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs +++ b/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs @@ -40,7 +40,7 @@ public class ProviderGroq() : BaseProvider(LLMProviders.GROQ, "https://api.groq. var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel); // Prepare the OpenAI HTTP chat request: var groqChatRequest = JsonSerializer.Serialize(new ChatRequest diff --git a/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs b/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs index f26ff712..07263d39 100644 --- a/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs +++ b/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs @@ -40,7 +40,7 @@ public sealed class ProviderHelmholtz() : BaseProvider(LLMProviders.HELMHOLTZ, " var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel); // Prepare the Helmholtz HTTP chat request: var helmholtzChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs b/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs index b051daa3..cfd2346c 100644 --- a/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs +++ b/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs @@ -45,7 +45,7 @@ public sealed class ProviderHuggingFace : BaseProvider var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var message = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); + var message = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel); // Prepare the HuggingFace HTTP chat request: var huggingfaceChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs b/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs index 787c7819..522757ea 100644 --- a/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs +++ b/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs @@ -38,7 +38,7 @@ public sealed class ProviderMistral() : BaseProvider(LLMProviders.MISTRAL, "http var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingDirectImageUrlAsync(this.Provider, chatModel); // Prepare the Mistral HTTP chat request: var mistralChatRequest = JsonSerializer.Serialize(new ChatRequest diff --git a/app/MindWork AI Studio/Provider/OpenAI/ISubContentImageUrl.cs b/app/MindWork AI Studio/Provider/OpenAI/ISubContentImageUrl.cs new file mode 100644 index 00000000..b2f9b51d --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ISubContentImageUrl.cs @@ -0,0 +1,19 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Contract for nested image URL sub-content. +/// +/// +/// Some providers use a nested object format for image URLs: +/// +/// { "type": "image_url", "image_url": { "url": "data:image/jpeg;base64,..." } } +/// +/// This interface represents the inner object with the "url" property. +/// +public interface ISubContentImageUrl +{ + /// + /// The URL or base64-encoded data URI of the image. + /// + public string Url { get; init; } +} diff --git a/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs b/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs index 7fae4afc..b5a11e60 100644 --- a/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs +++ b/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs @@ -134,11 +134,14 @@ public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https }, // Chat Completion API uses IMAGE_URL: - false => new SubContentImageUrl + false => new SubContentImageUrlNested { - ImageUrl = await attachment.TryAsBase64(token: token) is (true, var base64Content) - ? $"data:{attachment.DetermineMimeType()};base64,{base64Content}" - : string.Empty, + ImageUrl = new SubContentImageUrlData + { + Url = await attachment.TryAsBase64(token: token) is (true, var base64Content) + ? $"data:{attachment.DetermineMimeType()};base64,{base64Content}" + : string.Empty, + }, } }); diff --git a/app/MindWork AI Studio/Provider/OpenAI/SubContentImageUrlData.cs b/app/MindWork AI Studio/Provider/OpenAI/SubContentImageUrlData.cs new file mode 100644 index 00000000..af0ffc9e --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/SubContentImageUrlData.cs @@ -0,0 +1,17 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Represents the inner object of a nested image URL sub-content. +/// +/// +/// This record is used when the provider expects the format: +/// +/// { "type": "image_url", "image_url": { "url": "data:image/jpeg;base64,..." } } +/// +/// This class represents the inner { "url": "..." } part. +/// +public record SubContentImageUrlData : ISubContentImageUrl +{ + /// + public string Url { get; init; } = string.Empty; +} diff --git a/app/MindWork AI Studio/Provider/OpenAI/SubContentImageUrlNested.cs b/app/MindWork AI Studio/Provider/OpenAI/SubContentImageUrlNested.cs new file mode 100644 index 00000000..297a73fe --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/SubContentImageUrlNested.cs @@ -0,0 +1,18 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Image sub-content for multimodal messages using nested URL format. +/// +/// +/// This record is used when the provider expects the format: +/// +/// { "type": "image_url", "image_url": { "url": "data:image/jpeg;base64,..." } } +/// +/// Used by LM Studio, VLLM, and other OpenAI-compatible providers. +/// +public record SubContentImageUrlNested(SubContentType Type, ISubContentImageUrl ImageUrl) : ISubContent +{ + public SubContentImageUrlNested() : this(SubContentType.IMAGE_URL, new SubContentImageUrlData()) + { + } +} diff --git a/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs b/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs index 82b67fa4..9f2d3648 100644 --- a/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs +++ b/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs @@ -43,7 +43,7 @@ public sealed class ProviderOpenRouter() : BaseProvider(LLMProviders.OPEN_ROUTER var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel); // Prepare the OpenRouter HTTP chat request: var openRouterChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs b/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs index f45d0295..27101716 100644 --- a/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs +++ b/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs @@ -49,7 +49,7 @@ public sealed class ProviderPerplexity() : BaseProvider(LLMProviders.PERPLEXITY, var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel); // Prepare the Perplexity HTTP chat request: var perplexityChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs index 2c715fbc..70228589 100644 --- a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs +++ b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs @@ -34,9 +34,15 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide // Parse the API parameters: var apiParameters = this.ParseAdditionalApiParameters(); - - // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); + + // Build the list of messages. The image format depends on the host: + // - Ollama uses the direct image URL format: { "type": "image_url", "image_url": "data:..." } + // - LM Studio, vLLM, and llama.cpp use the nested image URL format: { "type": "image_url", "image_url": { "url": "data:..." } } + var messages = host switch + { + Host.OLLAMA => await chatThread.Blocks.BuildMessagesUsingDirectImageUrlAsync(this.Provider, chatModel), + _ => await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel), + }; // Prepare the OpenAI HTTP chat request: var providerChatRequest = JsonSerializer.Serialize(new ChatRequest diff --git a/app/MindWork AI Studio/Provider/SubContentImageUrlConverter.cs b/app/MindWork AI Studio/Provider/SubContentImageUrlConverter.cs new file mode 100644 index 00000000..d6df6878 --- /dev/null +++ b/app/MindWork AI Studio/Provider/SubContentImageUrlConverter.cs @@ -0,0 +1,34 @@ +using System.Text.Json; +using System.Text.Json.Serialization; + +using AIStudio.Provider.OpenAI; + +namespace AIStudio.Provider; + +/// +/// Custom JSON converter for the ISubContentImageUrl interface to handle polymorphic serialization. +/// +/// +/// This converter ensures that when serializing ISubContentImageUrl objects, all properties +/// of the concrete implementation (e.g., SubContentImageUrlData) are serialized, +/// not just the properties defined in the ISubContentImageUrl interface. +/// +public sealed class SubContentImageUrlConverter : JsonConverter +{ + private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); + + public override ISubContentImageUrl? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + // Deserialization is not needed for request objects, as sub-content image URLs are only serialized + // when sending requests to LLM providers. + LOGGER.LogError("Deserializing ISubContentImageUrl is not supported. This converter is only used for serializing request messages."); + return null; + } + + public override void Write(Utf8JsonWriter writer, ISubContentImageUrl value, JsonSerializerOptions options) + { + // Serialize the actual concrete type (e.g., SubContentImageUrlData) instead of just the ISubContentImageUrl interface. + // This ensures all properties of the concrete type are included in the JSON output. + JsonSerializer.Serialize(writer, value, value.GetType(), options); + } +} diff --git a/app/MindWork AI Studio/Provider/X/ProviderX.cs b/app/MindWork AI Studio/Provider/X/ProviderX.cs index a638e342..db3f3a29 100644 --- a/app/MindWork AI Studio/Provider/X/ProviderX.cs +++ b/app/MindWork AI Studio/Provider/X/ProviderX.cs @@ -40,7 +40,7 @@ public sealed class ProviderX() : BaseProvider(LLMProviders.X, "https://api.x.ai var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel); + var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel); // Prepare the xAI HTTP chat request: var xChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest