using AIStudio.Provider; using AIStudio.Provider.OpenAI; using AIStudio.Settings; namespace AIStudio.Chat; public static class ListContentBlockExtensions { /// /// Processes a list of content blocks by transforming them into a collection of message results asynchronously. /// /// The list of content blocks to process. /// A function that transforms each content block into a message result asynchronously. /// The selected LLM provider. /// The selected model. /// A factory function to create text sub-content. /// A factory function to create image sub-content. /// An asynchronous task that resolves to a list of transformed results. public static async Task> BuildMessagesAsync( this List blocks, LLMProviders selectedProvider, Model selectedModel, Func roleTransformer, Func textSubContentFactory, Func> imageSubContentFactory) { var capabilities = selectedProvider.GetModelCapabilities(selectedModel); var canProcessImages = capabilities.Contains(Capability.MULTIPLE_IMAGE_INPUT) || capabilities.Contains(Capability.SINGLE_IMAGE_INPUT); var messageTaskList = new List>(blocks.Count); foreach (var block in blocks) { switch (block.Content) { // The prompt may or may not contain image(s), but the provider/model cannot process images. // Thus, we treat it as a regular text message. case ContentText text when block.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace(text.Text) && !canProcessImages: messageTaskList.Add(CreateTextMessageAsync(block, text)); break; // The regular case for text content without images: case ContentText text when block.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace(text.Text) && !text.FileAttachments.ContainsImages(): messageTaskList.Add(CreateTextMessageAsync(block, text)); break; // Text prompt with images as attachments, and the provider/model can process images: case ContentText text when block.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace(text.Text) && text.FileAttachments.ContainsImages(): messageTaskList.Add(CreateMultimodalMessageAsync(block, text, textSubContentFactory, imageSubContentFactory)); break; } } // Await all messages: await Task.WhenAll(messageTaskList); // Select all results: return messageTaskList.Select(n => n.Result).ToList(); // Local function to create a text message asynchronously. Task CreateTextMessageAsync(ContentBlock block, ContentText text) { return Task.Run(async () => new TextMessage { Role = roleTransformer(block.Role), Content = await text.PrepareTextContentForAI(), } as IMessageBase); } // Local function to create a multimodal message asynchronously. Task CreateMultimodalMessageAsync( ContentBlock block, ContentText text, Func innerTextSubContentFactory, Func> innerImageSubContentFactory) { return Task.Run(async () => { var imagesTasks = text.FileAttachments .Where(x => x is { IsImage: true, Exists: true }) .Cast() .Select(innerImageSubContentFactory) .ToList(); Task.WaitAll(imagesTasks); var images = imagesTasks.Select(t => t.Result).ToList(); return new MultimodalMessage { Role = roleTransformer(block.Role), Content = [ innerTextSubContentFactory(await text.PrepareTextContentForAI()), ..images, ] } as IMessageBase; }); } } /// /// Processes a list of content blocks using direct image URL format to create message results asynchronously. /// /// The list of content blocks to process. /// The selected LLM provider. /// The selected model. /// An asynchronous task that resolves to a list of transformed message results. /// /// Uses direct image URL format where the image data is placed directly in the image_url field: /// /// { "type": "image_url", "image_url": "data:image/jpeg;base64,..." } /// /// This format is used by OpenAI, Mistral, and Ollama. /// public static async Task> BuildMessagesUsingDirectImageUrlAsync( this List blocks, LLMProviders selectedProvider, Model selectedModel) => await blocks.BuildMessagesAsync( selectedProvider, selectedModel, StandardRoleTransformer, StandardTextSubContentFactory, DirectImageSubContentFactory); /// /// Processes a list of content blocks using nested image URL format to create message results asynchronously. /// /// The list of content blocks to process. /// The selected LLM provider. /// The selected model. /// An asynchronous task that resolves to a list of transformed message results. /// /// Uses nested image URL format where the image data is wrapped in an object: /// /// { "type": "image_url", "image_url": { "url": "data:image/jpeg;base64,..." } } /// /// This format is used by LM Studio, VLLM, llama.cpp, and other OpenAI-compatible providers. /// public static async Task> BuildMessagesUsingNestedImageUrlAsync( this List blocks, LLMProviders selectedProvider, Model selectedModel) => await blocks.BuildMessagesAsync( selectedProvider, selectedModel, StandardRoleTransformer, StandardTextSubContentFactory, NestedImageSubContentFactory); private static ISubContent StandardTextSubContentFactory(string text) => new SubContentText { Text = text, }; private static async Task DirectImageSubContentFactory(FileAttachmentImage attachment) => new SubContentImageUrl { ImageUrl = await attachment.TryAsBase64() is (true, var base64Content) ? $"data:{attachment.DetermineMimeType()};base64,{base64Content}" : string.Empty, }; private static async Task NestedImageSubContentFactory(FileAttachmentImage attachment) => new SubContentImageUrlNested { ImageUrl = new SubContentImageUrlData { Url = await attachment.TryAsBase64() is (true, var base64Content) ? $"data:{attachment.DetermineMimeType()};base64,{base64Content}" : string.Empty, }, }; private static string StandardRoleTransformer(ChatRole role) => role switch { ChatRole.USER => "user", ChatRole.AI => "assistant", ChatRole.AGENT => "assistant", ChatRole.SYSTEM => "system", _ => "user", }; }