Add support for nested and direct image URL formats in message processing

This commit is contained in:
Thorsten Sommer 2025-12-29 20:50:12 +01:00
parent f96103d055
commit c836de5a2c
Signed by: tsommer
GPG Key ID: 371BBA77A02C0108
20 changed files with 165 additions and 26 deletions

View File

@ -99,13 +99,20 @@ public static class ListContentBlockExtensions
}
/// <summary>
/// Processes a list of content blocks using standard role transformations to create message results asynchronously.
/// Processes a list of content blocks using direct image URL format to create message results asynchronously.
/// </summary>
/// <param name="blocks">The list of content blocks to process.</param>
/// <param name="selectedProvider">The selected LLM provider.</param>
/// <param name="selectedModel">The selected model.</param>
/// <returns>>An asynchronous task that resolves to a list of transformed message results.</returns>
public static async Task<IList<IMessageBase>> BuildMessagesUsingStandardsAsync(
/// <returns>An asynchronous task that resolves to a list of transformed message results.</returns>
/// <remarks>
/// Uses direct image URL format where the image data is placed directly in the image_url field:
/// <code>
/// { "type": "image_url", "image_url": "data:image/jpeg;base64,..." }
/// </code>
/// This format is used by OpenAI, Mistral, and Ollama.
/// </remarks>
public static async Task<IList<IMessageBase>> BuildMessagesUsingDirectImageUrlAsync(
this List<ContentBlock> blocks,
LLMProviders selectedProvider,
Model selectedModel) => await blocks.BuildMessagesAsync(
@ -113,20 +120,54 @@ public static class ListContentBlockExtensions
selectedModel,
StandardRoleTransformer,
StandardTextSubContentFactory,
StandardImageSubContentFactory);
DirectImageSubContentFactory);
/// <summary>
/// Processes a list of content blocks using nested image URL format to create message results asynchronously.
/// </summary>
/// <param name="blocks">The list of content blocks to process.</param>
/// <param name="selectedProvider">The selected LLM provider.</param>
/// <param name="selectedModel">The selected model.</param>
/// <returns>An asynchronous task that resolves to a list of transformed message results.</returns>
/// <remarks>
/// Uses nested image URL format where the image data is wrapped in an object:
/// <code>
/// { "type": "image_url", "image_url": { "url": "data:image/jpeg;base64,..." } }
/// </code>
/// This format is used by LM Studio, VLLM, llama.cpp, and other OpenAI-compatible providers.
/// </remarks>
public static async Task<IList<IMessageBase>> BuildMessagesUsingNestedImageUrlAsync(
this List<ContentBlock> blocks,
LLMProviders selectedProvider,
Model selectedModel) => await blocks.BuildMessagesAsync(
selectedProvider,
selectedModel,
StandardRoleTransformer,
StandardTextSubContentFactory,
NestedImageSubContentFactory);
private static ISubContent StandardTextSubContentFactory(string text) => new SubContentText
{
Text = text,
};
private static async Task<ISubContent> StandardImageSubContentFactory(FileAttachmentImage attachment) => new SubContentImageUrl
private static async Task<ISubContent> DirectImageSubContentFactory(FileAttachmentImage attachment) => new SubContentImageUrl
{
ImageUrl = await attachment.TryAsBase64() is (true, var base64Content)
? $"data:{attachment.DetermineMimeType()};base64,{base64Content}"
: string.Empty,
};
private static async Task<ISubContent> NestedImageSubContentFactory(FileAttachmentImage attachment) => new SubContentImageUrlNested
{
ImageUrl = new SubContentImageUrlData
{
Url = await attachment.TryAsBase64() is (true, var base64Content)
? $"data:{attachment.DetermineMimeType()};base64,{base64Content}"
: string.Empty,
},
};
private static string StandardRoleTransformer(ChatRole role) => role switch
{
ChatRole.USER => "user",

View File

@ -40,7 +40,7 @@ public sealed class ProviderAlibabaCloud() : BaseProvider(LLMProviders.ALIBABA_C
var apiParameters = this.ParseAdditionalApiParameters();
// Build the list of messages:
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel);
// Prepare the AlibabaCloud HTTP chat request:
var alibabaCloudChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest

View File

@ -49,6 +49,7 @@ public abstract class BaseProvider : IProvider, ISecretId
new MessageBaseConverter(),
new SubContentConverter(),
new SubContentImageSourceConverter(),
new SubContentImageUrlConverter(),
},
AllowTrailingCommas = false
};

View File

@ -40,7 +40,7 @@ public sealed class ProviderDeepSeek() : BaseProvider(LLMProviders.DEEP_SEEK, "h
var apiParameters = this.ParseAdditionalApiParameters();
// Build the list of messages:
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
var messages = await chatThread.Blocks.BuildMessagesUsingDirectImageUrlAsync(this.Provider, chatModel);
// Prepare the DeepSeek HTTP chat request:
var deepSeekChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest

View File

@ -40,7 +40,7 @@ public class ProviderFireworks() : BaseProvider(LLMProviders.FIREWORKS, "https:/
var apiParameters = this.ParseAdditionalApiParameters();
// Build the list of messages:
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel);
// Prepare the Fireworks HTTP chat request:
var fireworksChatRequest = JsonSerializer.Serialize(new ChatRequest

View File

@ -40,7 +40,7 @@ public sealed class ProviderGWDG() : BaseProvider(LLMProviders.GWDG, "https://ch
var apiParameters = this.ParseAdditionalApiParameters();
// Build the list of messages:
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel);
// Prepare the GWDG HTTP chat request:
var gwdgChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest

View File

@ -40,7 +40,7 @@ public class ProviderGoogle() : BaseProvider(LLMProviders.GOOGLE, "https://gener
var apiParameters = this.ParseAdditionalApiParameters();
// Build the list of messages:
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel);
// Prepare the Google HTTP chat request:
var geminiChatRequest = JsonSerializer.Serialize(new ChatRequest

View File

@ -40,7 +40,7 @@ public class ProviderGroq() : BaseProvider(LLMProviders.GROQ, "https://api.groq.
var apiParameters = this.ParseAdditionalApiParameters();
// Build the list of messages:
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel);
// Prepare the OpenAI HTTP chat request:
var groqChatRequest = JsonSerializer.Serialize(new ChatRequest

View File

@ -40,7 +40,7 @@ public sealed class ProviderHelmholtz() : BaseProvider(LLMProviders.HELMHOLTZ, "
var apiParameters = this.ParseAdditionalApiParameters();
// Build the list of messages:
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel);
// Prepare the Helmholtz HTTP chat request:
var helmholtzChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest

View File

@ -45,7 +45,7 @@ public sealed class ProviderHuggingFace : BaseProvider
var apiParameters = this.ParseAdditionalApiParameters();
// Build the list of messages:
var message = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
var message = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel);
// Prepare the HuggingFace HTTP chat request:
var huggingfaceChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest

View File

@ -38,7 +38,7 @@ public sealed class ProviderMistral() : BaseProvider(LLMProviders.MISTRAL, "http
var apiParameters = this.ParseAdditionalApiParameters();
// Build the list of messages:
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
var messages = await chatThread.Blocks.BuildMessagesUsingDirectImageUrlAsync(this.Provider, chatModel);
// Prepare the Mistral HTTP chat request:
var mistralChatRequest = JsonSerializer.Serialize(new ChatRequest

View File

@ -0,0 +1,19 @@
namespace AIStudio.Provider.OpenAI;
/// <summary>
/// Contract for nested image URL sub-content.
/// </summary>
/// <remarks>
/// Some providers use a nested object format for image URLs:
/// <code>
/// { "type": "image_url", "image_url": { "url": "data:image/jpeg;base64,..." } }
/// </code>
/// This interface represents the inner object with the "url" property.
/// </remarks>
public interface ISubContentImageUrl
{
/// <summary>
/// The URL or base64-encoded data URI of the image.
/// </summary>
public string Url { get; init; }
}

View File

@ -134,11 +134,14 @@ public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https
},
// Chat Completion API uses IMAGE_URL:
false => new SubContentImageUrl
false => new SubContentImageUrlNested
{
ImageUrl = await attachment.TryAsBase64(token: token) is (true, var base64Content)
? $"data:{attachment.DetermineMimeType()};base64,{base64Content}"
: string.Empty,
ImageUrl = new SubContentImageUrlData
{
Url = await attachment.TryAsBase64(token: token) is (true, var base64Content)
? $"data:{attachment.DetermineMimeType()};base64,{base64Content}"
: string.Empty,
},
}
});

View File

@ -0,0 +1,17 @@
namespace AIStudio.Provider.OpenAI;
/// <summary>
/// Represents the inner object of a nested image URL sub-content.
/// </summary>
/// <remarks>
/// This record is used when the provider expects the format:
/// <code>
/// { "type": "image_url", "image_url": { "url": "data:image/jpeg;base64,..." } }
/// </code>
/// This class represents the inner <c>{ "url": "..." }</c> part.
/// </remarks>
public record SubContentImageUrlData : ISubContentImageUrl
{
/// <inheritdoc />
public string Url { get; init; } = string.Empty;
}

View File

@ -0,0 +1,18 @@
namespace AIStudio.Provider.OpenAI;
/// <summary>
/// Image sub-content for multimodal messages using nested URL format.
/// </summary>
/// <remarks>
/// This record is used when the provider expects the format:
/// <code>
/// { "type": "image_url", "image_url": { "url": "data:image/jpeg;base64,..." } }
/// </code>
/// Used by LM Studio, VLLM, and other OpenAI-compatible providers.
/// </remarks>
public record SubContentImageUrlNested(SubContentType Type, ISubContentImageUrl ImageUrl) : ISubContent
{
public SubContentImageUrlNested() : this(SubContentType.IMAGE_URL, new SubContentImageUrlData())
{
}
}

View File

@ -43,7 +43,7 @@ public sealed class ProviderOpenRouter() : BaseProvider(LLMProviders.OPEN_ROUTER
var apiParameters = this.ParseAdditionalApiParameters();
// Build the list of messages:
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel);
// Prepare the OpenRouter HTTP chat request:
var openRouterChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest

View File

@ -49,7 +49,7 @@ public sealed class ProviderPerplexity() : BaseProvider(LLMProviders.PERPLEXITY,
var apiParameters = this.ParseAdditionalApiParameters();
// Build the list of messages:
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel);
// Prepare the Perplexity HTTP chat request:
var perplexityChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest

View File

@ -34,9 +34,15 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide
// Parse the API parameters:
var apiParameters = this.ParseAdditionalApiParameters();
// Build the list of messages:
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
// Build the list of messages. The image format depends on the host:
// - Ollama uses the direct image URL format: { "type": "image_url", "image_url": "data:..." }
// - LM Studio, vLLM, and llama.cpp use the nested image URL format: { "type": "image_url", "image_url": { "url": "data:..." } }
var messages = host switch
{
Host.OLLAMA => await chatThread.Blocks.BuildMessagesUsingDirectImageUrlAsync(this.Provider, chatModel),
_ => await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel),
};
// Prepare the OpenAI HTTP chat request:
var providerChatRequest = JsonSerializer.Serialize(new ChatRequest

View File

@ -0,0 +1,34 @@
using System.Text.Json;
using System.Text.Json.Serialization;
using AIStudio.Provider.OpenAI;
namespace AIStudio.Provider;
/// <summary>
/// Custom JSON converter for the ISubContentImageUrl interface to handle polymorphic serialization.
/// </summary>
/// <remarks>
/// This converter ensures that when serializing ISubContentImageUrl objects, all properties
/// of the concrete implementation (e.g., SubContentImageUrlData) are serialized,
/// not just the properties defined in the ISubContentImageUrl interface.
/// </remarks>
public sealed class SubContentImageUrlConverter : JsonConverter<ISubContentImageUrl>
{
private static readonly ILogger<SubContentImageUrlConverter> LOGGER = Program.LOGGER_FACTORY.CreateLogger<SubContentImageUrlConverter>();
public override ISubContentImageUrl? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
{
// Deserialization is not needed for request objects, as sub-content image URLs are only serialized
// when sending requests to LLM providers.
LOGGER.LogError("Deserializing ISubContentImageUrl is not supported. This converter is only used for serializing request messages.");
return null;
}
public override void Write(Utf8JsonWriter writer, ISubContentImageUrl value, JsonSerializerOptions options)
{
// Serialize the actual concrete type (e.g., SubContentImageUrlData) instead of just the ISubContentImageUrl interface.
// This ensures all properties of the concrete type are included in the JSON output.
JsonSerializer.Serialize(writer, value, value.GetType(), options);
}
}

View File

@ -40,7 +40,7 @@ public sealed class ProviderX() : BaseProvider(LLMProviders.X, "https://api.x.ai
var apiParameters = this.ParseAdditionalApiParameters();
// Build the list of messages:
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel);
// Prepare the xAI HTTP chat request:
var xChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest