mirror of
https://github.com/MindWorkAI/AI-Studio.git
synced 2026-02-13 01:41:36 +00:00
Add support for nested and direct image URL formats in message processing
This commit is contained in:
parent
f96103d055
commit
c836de5a2c
@ -99,13 +99,20 @@ public static class ListContentBlockExtensions
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Processes a list of content blocks using standard role transformations to create message results asynchronously.
|
||||
/// Processes a list of content blocks using direct image URL format to create message results asynchronously.
|
||||
/// </summary>
|
||||
/// <param name="blocks">The list of content blocks to process.</param>
|
||||
/// <param name="selectedProvider">The selected LLM provider.</param>
|
||||
/// <param name="selectedModel">The selected model.</param>
|
||||
/// <returns>>An asynchronous task that resolves to a list of transformed message results.</returns>
|
||||
public static async Task<IList<IMessageBase>> BuildMessagesUsingStandardsAsync(
|
||||
/// <returns>An asynchronous task that resolves to a list of transformed message results.</returns>
|
||||
/// <remarks>
|
||||
/// Uses direct image URL format where the image data is placed directly in the image_url field:
|
||||
/// <code>
|
||||
/// { "type": "image_url", "image_url": "data:image/jpeg;base64,..." }
|
||||
/// </code>
|
||||
/// This format is used by OpenAI, Mistral, and Ollama.
|
||||
/// </remarks>
|
||||
public static async Task<IList<IMessageBase>> BuildMessagesUsingDirectImageUrlAsync(
|
||||
this List<ContentBlock> blocks,
|
||||
LLMProviders selectedProvider,
|
||||
Model selectedModel) => await blocks.BuildMessagesAsync(
|
||||
@ -113,20 +120,54 @@ public static class ListContentBlockExtensions
|
||||
selectedModel,
|
||||
StandardRoleTransformer,
|
||||
StandardTextSubContentFactory,
|
||||
StandardImageSubContentFactory);
|
||||
DirectImageSubContentFactory);
|
||||
|
||||
/// <summary>
|
||||
/// Processes a list of content blocks using nested image URL format to create message results asynchronously.
|
||||
/// </summary>
|
||||
/// <param name="blocks">The list of content blocks to process.</param>
|
||||
/// <param name="selectedProvider">The selected LLM provider.</param>
|
||||
/// <param name="selectedModel">The selected model.</param>
|
||||
/// <returns>An asynchronous task that resolves to a list of transformed message results.</returns>
|
||||
/// <remarks>
|
||||
/// Uses nested image URL format where the image data is wrapped in an object:
|
||||
/// <code>
|
||||
/// { "type": "image_url", "image_url": { "url": "data:image/jpeg;base64,..." } }
|
||||
/// </code>
|
||||
/// This format is used by LM Studio, VLLM, llama.cpp, and other OpenAI-compatible providers.
|
||||
/// </remarks>
|
||||
public static async Task<IList<IMessageBase>> BuildMessagesUsingNestedImageUrlAsync(
|
||||
this List<ContentBlock> blocks,
|
||||
LLMProviders selectedProvider,
|
||||
Model selectedModel) => await blocks.BuildMessagesAsync(
|
||||
selectedProvider,
|
||||
selectedModel,
|
||||
StandardRoleTransformer,
|
||||
StandardTextSubContentFactory,
|
||||
NestedImageSubContentFactory);
|
||||
|
||||
private static ISubContent StandardTextSubContentFactory(string text) => new SubContentText
|
||||
{
|
||||
Text = text,
|
||||
};
|
||||
|
||||
private static async Task<ISubContent> StandardImageSubContentFactory(FileAttachmentImage attachment) => new SubContentImageUrl
|
||||
|
||||
private static async Task<ISubContent> DirectImageSubContentFactory(FileAttachmentImage attachment) => new SubContentImageUrl
|
||||
{
|
||||
ImageUrl = await attachment.TryAsBase64() is (true, var base64Content)
|
||||
? $"data:{attachment.DetermineMimeType()};base64,{base64Content}"
|
||||
: string.Empty,
|
||||
};
|
||||
|
||||
|
||||
private static async Task<ISubContent> NestedImageSubContentFactory(FileAttachmentImage attachment) => new SubContentImageUrlNested
|
||||
{
|
||||
ImageUrl = new SubContentImageUrlData
|
||||
{
|
||||
Url = await attachment.TryAsBase64() is (true, var base64Content)
|
||||
? $"data:{attachment.DetermineMimeType()};base64,{base64Content}"
|
||||
: string.Empty,
|
||||
},
|
||||
};
|
||||
|
||||
private static string StandardRoleTransformer(ChatRole role) => role switch
|
||||
{
|
||||
ChatRole.USER => "user",
|
||||
|
||||
@ -40,7 +40,7 @@ public sealed class ProviderAlibabaCloud() : BaseProvider(LLMProviders.ALIBABA_C
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the AlibabaCloud HTTP chat request:
|
||||
var alibabaCloudChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest
|
||||
|
||||
@ -49,6 +49,7 @@ public abstract class BaseProvider : IProvider, ISecretId
|
||||
new MessageBaseConverter(),
|
||||
new SubContentConverter(),
|
||||
new SubContentImageSourceConverter(),
|
||||
new SubContentImageUrlConverter(),
|
||||
},
|
||||
AllowTrailingCommas = false
|
||||
};
|
||||
|
||||
@ -40,7 +40,7 @@ public sealed class ProviderDeepSeek() : BaseProvider(LLMProviders.DEEP_SEEK, "h
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingDirectImageUrlAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the DeepSeek HTTP chat request:
|
||||
var deepSeekChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest
|
||||
|
||||
@ -40,7 +40,7 @@ public class ProviderFireworks() : BaseProvider(LLMProviders.FIREWORKS, "https:/
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the Fireworks HTTP chat request:
|
||||
var fireworksChatRequest = JsonSerializer.Serialize(new ChatRequest
|
||||
|
||||
@ -40,7 +40,7 @@ public sealed class ProviderGWDG() : BaseProvider(LLMProviders.GWDG, "https://ch
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the GWDG HTTP chat request:
|
||||
var gwdgChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest
|
||||
|
||||
@ -40,7 +40,7 @@ public class ProviderGoogle() : BaseProvider(LLMProviders.GOOGLE, "https://gener
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the Google HTTP chat request:
|
||||
var geminiChatRequest = JsonSerializer.Serialize(new ChatRequest
|
||||
|
||||
@ -40,7 +40,7 @@ public class ProviderGroq() : BaseProvider(LLMProviders.GROQ, "https://api.groq.
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the OpenAI HTTP chat request:
|
||||
var groqChatRequest = JsonSerializer.Serialize(new ChatRequest
|
||||
|
||||
@ -40,7 +40,7 @@ public sealed class ProviderHelmholtz() : BaseProvider(LLMProviders.HELMHOLTZ, "
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the Helmholtz HTTP chat request:
|
||||
var helmholtzChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest
|
||||
|
||||
@ -45,7 +45,7 @@ public sealed class ProviderHuggingFace : BaseProvider
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var message = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
var message = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the HuggingFace HTTP chat request:
|
||||
var huggingfaceChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest
|
||||
|
||||
@ -38,7 +38,7 @@ public sealed class ProviderMistral() : BaseProvider(LLMProviders.MISTRAL, "http
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingDirectImageUrlAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the Mistral HTTP chat request:
|
||||
var mistralChatRequest = JsonSerializer.Serialize(new ChatRequest
|
||||
|
||||
@ -0,0 +1,19 @@
|
||||
namespace AIStudio.Provider.OpenAI;
|
||||
|
||||
/// <summary>
|
||||
/// Contract for nested image URL sub-content.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Some providers use a nested object format for image URLs:
|
||||
/// <code>
|
||||
/// { "type": "image_url", "image_url": { "url": "data:image/jpeg;base64,..." } }
|
||||
/// </code>
|
||||
/// This interface represents the inner object with the "url" property.
|
||||
/// </remarks>
|
||||
public interface ISubContentImageUrl
|
||||
{
|
||||
/// <summary>
|
||||
/// The URL or base64-encoded data URI of the image.
|
||||
/// </summary>
|
||||
public string Url { get; init; }
|
||||
}
|
||||
@ -134,11 +134,14 @@ public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https
|
||||
},
|
||||
|
||||
// Chat Completion API uses IMAGE_URL:
|
||||
false => new SubContentImageUrl
|
||||
false => new SubContentImageUrlNested
|
||||
{
|
||||
ImageUrl = await attachment.TryAsBase64(token: token) is (true, var base64Content)
|
||||
? $"data:{attachment.DetermineMimeType()};base64,{base64Content}"
|
||||
: string.Empty,
|
||||
ImageUrl = new SubContentImageUrlData
|
||||
{
|
||||
Url = await attachment.TryAsBase64(token: token) is (true, var base64Content)
|
||||
? $"data:{attachment.DetermineMimeType()};base64,{base64Content}"
|
||||
: string.Empty,
|
||||
},
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@ -0,0 +1,17 @@
|
||||
namespace AIStudio.Provider.OpenAI;
|
||||
|
||||
/// <summary>
|
||||
/// Represents the inner object of a nested image URL sub-content.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This record is used when the provider expects the format:
|
||||
/// <code>
|
||||
/// { "type": "image_url", "image_url": { "url": "data:image/jpeg;base64,..." } }
|
||||
/// </code>
|
||||
/// This class represents the inner <c>{ "url": "..." }</c> part.
|
||||
/// </remarks>
|
||||
public record SubContentImageUrlData : ISubContentImageUrl
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public string Url { get; init; } = string.Empty;
|
||||
}
|
||||
@ -0,0 +1,18 @@
|
||||
namespace AIStudio.Provider.OpenAI;
|
||||
|
||||
/// <summary>
|
||||
/// Image sub-content for multimodal messages using nested URL format.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This record is used when the provider expects the format:
|
||||
/// <code>
|
||||
/// { "type": "image_url", "image_url": { "url": "data:image/jpeg;base64,..." } }
|
||||
/// </code>
|
||||
/// Used by LM Studio, VLLM, and other OpenAI-compatible providers.
|
||||
/// </remarks>
|
||||
public record SubContentImageUrlNested(SubContentType Type, ISubContentImageUrl ImageUrl) : ISubContent
|
||||
{
|
||||
public SubContentImageUrlNested() : this(SubContentType.IMAGE_URL, new SubContentImageUrlData())
|
||||
{
|
||||
}
|
||||
}
|
||||
@ -43,7 +43,7 @@ public sealed class ProviderOpenRouter() : BaseProvider(LLMProviders.OPEN_ROUTER
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the OpenRouter HTTP chat request:
|
||||
var openRouterChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest
|
||||
|
||||
@ -49,7 +49,7 @@ public sealed class ProviderPerplexity() : BaseProvider(LLMProviders.PERPLEXITY,
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the Perplexity HTTP chat request:
|
||||
var perplexityChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest
|
||||
|
||||
@ -34,9 +34,15 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide
|
||||
|
||||
// Parse the API parameters:
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
|
||||
// Build the list of messages. The image format depends on the host:
|
||||
// - Ollama uses the direct image URL format: { "type": "image_url", "image_url": "data:..." }
|
||||
// - LM Studio, vLLM, and llama.cpp use the nested image URL format: { "type": "image_url", "image_url": { "url": "data:..." } }
|
||||
var messages = host switch
|
||||
{
|
||||
Host.OLLAMA => await chatThread.Blocks.BuildMessagesUsingDirectImageUrlAsync(this.Provider, chatModel),
|
||||
_ => await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel),
|
||||
};
|
||||
|
||||
// Prepare the OpenAI HTTP chat request:
|
||||
var providerChatRequest = JsonSerializer.Serialize(new ChatRequest
|
||||
|
||||
@ -0,0 +1,34 @@
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
using AIStudio.Provider.OpenAI;
|
||||
|
||||
namespace AIStudio.Provider;
|
||||
|
||||
/// <summary>
|
||||
/// Custom JSON converter for the ISubContentImageUrl interface to handle polymorphic serialization.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This converter ensures that when serializing ISubContentImageUrl objects, all properties
|
||||
/// of the concrete implementation (e.g., SubContentImageUrlData) are serialized,
|
||||
/// not just the properties defined in the ISubContentImageUrl interface.
|
||||
/// </remarks>
|
||||
public sealed class SubContentImageUrlConverter : JsonConverter<ISubContentImageUrl>
|
||||
{
|
||||
private static readonly ILogger<SubContentImageUrlConverter> LOGGER = Program.LOGGER_FACTORY.CreateLogger<SubContentImageUrlConverter>();
|
||||
|
||||
public override ISubContentImageUrl? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
// Deserialization is not needed for request objects, as sub-content image URLs are only serialized
|
||||
// when sending requests to LLM providers.
|
||||
LOGGER.LogError("Deserializing ISubContentImageUrl is not supported. This converter is only used for serializing request messages.");
|
||||
return null;
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, ISubContentImageUrl value, JsonSerializerOptions options)
|
||||
{
|
||||
// Serialize the actual concrete type (e.g., SubContentImageUrlData) instead of just the ISubContentImageUrl interface.
|
||||
// This ensures all properties of the concrete type are included in the JSON output.
|
||||
JsonSerializer.Serialize(writer, value, value.GetType(), options);
|
||||
}
|
||||
}
|
||||
@ -40,7 +40,7 @@ public sealed class ProviderX() : BaseProvider(LLMProviders.X, "https://api.x.ai
|
||||
var apiParameters = this.ParseAdditionalApiParameters();
|
||||
|
||||
// Build the list of messages:
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingStandardsAsync(this.Provider, chatModel);
|
||||
var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel);
|
||||
|
||||
// Prepare the xAI HTTP chat request:
|
||||
var xChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest
|
||||
|
||||
Loading…
Reference in New Issue
Block a user