diff --git a/app/MindWork AI Studio/Agents/AgentDataSourceSelection.cs b/app/MindWork AI Studio/Agents/AgentDataSourceSelection.cs index 778fafb9..f7947462 100644 --- a/app/MindWork AI Studio/Agents/AgentDataSourceSelection.cs +++ b/app/MindWork AI Studio/Agents/AgentDataSourceSelection.cs @@ -159,7 +159,9 @@ public sealed class AgentDataSourceSelection (ILogger ContentText text => text.Text, // Image prompts may be empty, e.g., when the image is too large: - ContentImage image => await image.AsBase64(token), + ContentImage image => await image.TryAsBase64(token) is (success: true, { } base64Image) + ? base64Image + : string.Empty, // Other content types are not supported yet: _ => string.Empty, diff --git a/app/MindWork AI Studio/Agents/AgentRetrievalContextValidation.cs b/app/MindWork AI Studio/Agents/AgentRetrievalContextValidation.cs index 7496301f..ee2437d9 100644 --- a/app/MindWork AI Studio/Agents/AgentRetrievalContextValidation.cs +++ b/app/MindWork AI Studio/Agents/AgentRetrievalContextValidation.cs @@ -219,7 +219,9 @@ public sealed class AgentRetrievalContextValidation (ILogger text.Text, // Image prompts may be empty, e.g., when the image is too large: - ContentImage image => await image.AsBase64(token), + ContentImage image => await image.TryAsBase64(token) is (success: true, { } base64Image) + ? base64Image + : string.Empty, // Other content types are not supported yet: _ => string.Empty, diff --git a/app/MindWork AI Studio/Assistants/AssistantBase.razor.cs b/app/MindWork AI Studio/Assistants/AssistantBase.razor.cs index de346761..87d451fb 100644 --- a/app/MindWork AI Studio/Assistants/AssistantBase.razor.cs +++ b/app/MindWork AI Studio/Assistants/AssistantBase.razor.cs @@ -217,12 +217,13 @@ public abstract partial class AssistantBase : AssistantLowerBase wher return chatId; } - protected DateTimeOffset AddUserRequest(string request, bool hideContentFromUser = false) + protected DateTimeOffset AddUserRequest(string request, bool hideContentFromUser = false, params List attachments) { var time = DateTimeOffset.Now; this.lastUserPrompt = new ContentText { Text = request, + FileAttachments = attachments, }; this.chatThread!.Blocks.Add(new ContentBlock diff --git a/app/MindWork AI Studio/Assistants/DocumentAnalysis/DocumentAnalysisAssistant.razor b/app/MindWork AI Studio/Assistants/DocumentAnalysis/DocumentAnalysisAssistant.razor index 4b68618a..ec801e84 100644 --- a/app/MindWork AI Studio/Assistants/DocumentAnalysis/DocumentAnalysisAssistant.razor +++ b/app/MindWork AI Studio/Assistants/DocumentAnalysis/DocumentAnalysisAssistant.razor @@ -103,7 +103,7 @@ else @T("Documents for the analysis") - + diff --git a/app/MindWork AI Studio/Assistants/DocumentAnalysis/DocumentAnalysisAssistant.razor.cs b/app/MindWork AI Studio/Assistants/DocumentAnalysis/DocumentAnalysisAssistant.razor.cs index d7ee8987..d0265178 100644 --- a/app/MindWork AI Studio/Assistants/DocumentAnalysis/DocumentAnalysisAssistant.razor.cs +++ b/app/MindWork AI Studio/Assistants/DocumentAnalysis/DocumentAnalysisAssistant.razor.cs @@ -1,3 +1,5 @@ +using System.Text; + using AIStudio.Chat; using AIStudio.Dialogs; using AIStudio.Dialogs.Settings; @@ -34,11 +36,13 @@ public partial class DocumentAnalysisAssistant : AssistantBaseCore - this.loadedDocumentPaths.Count > 1 - ? $"Your task is to analyze {this.loadedDocumentPaths.Count} DOCUMENTS. Different DOCUMENTS are divided by a horizontal rule in markdown formatting followed by the name of the document." - : "Your task is to analyze a single document."; + private string GetDocumentTaskDescription() + { + var numDocuments = this.loadedDocumentPaths.Count(x => x is { Exists: true, IsImage: false }); + var numImages = this.loadedDocumentPaths.Count(x => x is { Exists: true, IsImage: true }); + + return (numDocuments, numImages) switch + { + (0, 1) => "Your task is to analyze a single image file attached as a document.", + (0, > 1) => $"Your task is to analyze {numImages} image file(s) attached as documents.", + + (1, 0) => "Your task is to analyze a single DOCUMENT.", + (1, 1) => "Your task is to analyze a single DOCUMENT and 1 image file attached as a document.", + (1, > 1) => $"Your task is to analyze a single DOCUMENT and {numImages} image file(s) attached as documents.", + + (> 0, 0) => $"Your task is to analyze {numDocuments} DOCUMENTS. Different DOCUMENTS are divided by a horizontal rule in markdown formatting followed by the name of the document.", + (> 0, 1) => $"Your task is to analyze {numDocuments} DOCUMENTS and 1 image file attached as a document. Different DOCUMENTS are divided by a horizontal rule in Markdown formatting followed by the name of the document.", + (> 0, > 0) => $"Your task is to analyze {numDocuments} DOCUMENTS and {numImages} image file(s) attached as documents. Different DOCUMENTS are divided by a horizontal rule in Markdown formatting followed by the name of the document.", + + _ => "Your task is to analyze a single DOCUMENT." + }; + } protected override IReadOnlyList FooterButtons => []; @@ -327,37 +348,68 @@ public partial class DocumentAnalysisAssistant : AssistantBaseCore(); - var count = 1; + var documents = this.loadedDocumentPaths.Where(n => n is { Exists: true, IsImage: false }).ToList(); + var sb = new StringBuilder(); - foreach (var fileAttachment in this.loadedDocumentPaths) + if (documents.Count > 0) { - if (fileAttachment.IsForbidden) - { - this.Logger.LogWarning($"Skipping forbidden file: '{fileAttachment.FilePath}'."); - continue; - } - - var fileContent = await this.RustService.ReadArbitraryFileData(fileAttachment.FilePath, int.MaxValue); + sb.AppendLine(""" + # DOCUMENTS: - documentSections.Add($""" - ## DOCUMENT {count}: - File path: {fileAttachment.FilePath} - Content: - ``` - {fileContent} - ``` - - --- - """); - count++; + """); } - return $""" - # DOCUMENTS: + var numDocuments = 1; + foreach (var document in documents) + { + if (document.IsForbidden) + { + this.Logger.LogWarning($"Skipping forbidden file: '{document.FilePath}'."); + continue; + } - {string.Join("\n", documentSections)} - """; + var fileContent = await this.RustService.ReadArbitraryFileData(document.FilePath, int.MaxValue); + sb.AppendLine($""" + + ## DOCUMENT {numDocuments}: + File path: {document.FilePath} + Content: + ``` + {fileContent} + ``` + + --- + + """); + numDocuments++; + } + + var numImages = this.loadedDocumentPaths.Count(x => x is { IsImage: true, Exists: true }); + if (numImages > 0) + { + if (documents.Count == 0) + { + sb.AppendLine($""" + + There are {numImages} image file(s) attached as documents. + Please consider them as documents as well and use them to + answer accordingly. + + """); + } + else + { + sb.AppendLine($""" + + Additionally, there are {numImages} image file(s) attached. + Please consider them as documents as well and use them to + answer accordingly. + + """); + } + } + + return sb.ToString(); } private async Task Analyze() @@ -370,7 +422,9 @@ public partial class DocumentAnalysisAssistant : AssistantBaseCore n is { Exists: true, IsImage: true }).ToList()); await this.AddAIResponseAsync(userRequest); } diff --git a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua index ca8d522f..497e4bd4 100644 --- a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua +++ b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua @@ -1513,6 +1513,12 @@ UI_TEXT_CONTENT["AISTUDIO::CHAT::CONTENTBLOCKCOMPONENT::T4188329028"] = "No, kee -- Export Chat to Microsoft Word UI_TEXT_CONTENT["AISTUDIO::CHAT::CONTENTBLOCKCOMPONENT::T861873672"] = "Export Chat to Microsoft Word" +-- The local image file does not exist. Skipping the image. +UI_TEXT_CONTENT["AISTUDIO::CHAT::IIMAGESOURCEEXTENSIONS::T255679918"] = "The local image file does not exist. Skipping the image." + +-- Failed to download the image from the URL. Skipping the image. +UI_TEXT_CONTENT["AISTUDIO::CHAT::IIMAGESOURCEEXTENSIONS::T2996654916"] = "Failed to download the image from the URL. Skipping the image." + -- The local image file is too large (>10 MB). Skipping the image. UI_TEXT_CONTENT["AISTUDIO::CHAT::IIMAGESOURCEEXTENSIONS::T3219823625"] = "The local image file is too large (>10 MB). Skipping the image." @@ -2968,6 +2974,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T1373123357"] = "Markdo -- Load file UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T2129302565"] = "Load file" +-- Image View +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T2199753423"] = "Image View" + -- See how we load your file. Review the content before we process it further. UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T3271853346"] = "See how we load your file. Review the content before we process it further." @@ -2986,6 +2995,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T652739927"] = "This is -- File Path UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T729508546"] = "File Path" +-- The specified file could not be found. The file have been moved, deleted, renamed, or is otherwise inaccessible. +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T973777830"] = "The specified file could not be found. The file have been moved, deleted, renamed, or is otherwise inaccessible." + -- Embedding Name UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGMETHODDIALOG::T1427271797"] = "Embedding Name" @@ -3436,12 +3448,21 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T1746160064"] = "He -- There aren't any file attachments available right now. UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T2111340711"] = "There aren't any file attachments available right now." +-- Document Preview +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T285154968"] = "Document Preview" + -- The file was deleted, renamed, or moved. UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T3083729256"] = "The file was deleted, renamed, or moved." -- Your attached file. UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T3154198222"] = "Your attached file." +-- Preview what we send to the AI. +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T3160778981"] = "Preview what we send to the AI." + +-- Close +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T3448155331"] = "Close" + -- Your attached files UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T3909191077"] = "Your attached files" @@ -6058,9 +6079,15 @@ UI_TEXT_CONTENT["AISTUDIO::TOOLS::VALIDATION::FILEEXTENSIONVALIDATION::T29289275 -- Images are not supported yet UI_TEXT_CONTENT["AISTUDIO::TOOLS::VALIDATION::FILEEXTENSIONVALIDATION::T298062956"] = "Images are not supported yet" +-- Images are not supported at this place +UI_TEXT_CONTENT["AISTUDIO::TOOLS::VALIDATION::FILEEXTENSIONVALIDATION::T305247150"] = "Images are not supported at this place" + -- Executables are not allowed UI_TEXT_CONTENT["AISTUDIO::TOOLS::VALIDATION::FILEEXTENSIONVALIDATION::T4167762413"] = "Executables are not allowed" +-- Images are not supported by the selected provider and model +UI_TEXT_CONTENT["AISTUDIO::TOOLS::VALIDATION::FILEEXTENSIONVALIDATION::T999194030"] = "Images are not supported by the selected provider and model" + -- The hostname is not a valid HTTP(S) URL. UI_TEXT_CONTENT["AISTUDIO::TOOLS::VALIDATION::PROVIDERVALIDATION::T1013354736"] = "The hostname is not a valid HTTP(S) URL." diff --git a/app/MindWork AI Studio/Chat/ChatThread.cs b/app/MindWork AI Studio/Chat/ChatThread.cs index da01b153..134c555f 100644 --- a/app/MindWork AI Studio/Chat/ChatThread.cs +++ b/app/MindWork AI Studio/Chat/ChatThread.cs @@ -238,7 +238,7 @@ public sealed record ChatThread { var (contentData, contentType) = block.Content switch { - ContentImage image => (await image.AsBase64(token), Tools.ERIClient.DataModel.ContentType.IMAGE), + ContentImage image => (await image.TryAsBase64(token) is (success: true, { } base64Image) ? base64Image : string.Empty, Tools.ERIClient.DataModel.ContentType.IMAGE), ContentText text => (text.Text, Tools.ERIClient.DataModel.ContentType.TEXT), _ => (string.Empty, Tools.ERIClient.DataModel.ContentType.UNKNOWN), diff --git a/app/MindWork AI Studio/Chat/ContentImage.cs b/app/MindWork AI Studio/Chat/ContentImage.cs index 4c162cac..0eb36442 100644 --- a/app/MindWork AI Studio/Chat/ContentImage.cs +++ b/app/MindWork AI Studio/Chat/ContentImage.cs @@ -47,6 +47,8 @@ public sealed class ContentImage : IContent, IImageSource InitialRemoteWait = this.InitialRemoteWait, IsStreaming = this.IsStreaming, SourceType = this.SourceType, + Sources = [..this.Sources], + FileAttachments = [..this.FileAttachments], }; #endregion diff --git a/app/MindWork AI Studio/Chat/ContentText.cs b/app/MindWork AI Studio/Chat/ContentText.cs index b0a67f79..3a9b8f9d 100644 --- a/app/MindWork AI Studio/Chat/ContentText.cs +++ b/app/MindWork AI Studio/Chat/ContentText.cs @@ -195,6 +195,14 @@ public sealed class ContentText : IContent sb.AppendLine(await Program.RUST_SERVICE.ReadArbitraryFileData(document.FilePath, int.MaxValue)); sb.AppendLine("````"); } + + var numImages = this.FileAttachments.Count(x => x is { IsImage: true, Exists: true }); + if (numImages > 0) + { + sb.AppendLine(); + sb.AppendLine($"Additionally, there are {numImages} image file(s) attached to this message. "); + sb.AppendLine("Please consider them as part of the message content and use them to answer accordingly."); + } } } } diff --git a/app/MindWork AI Studio/Chat/FileAttachment.cs b/app/MindWork AI Studio/Chat/FileAttachment.cs index bacae923..9d25526c 100644 --- a/app/MindWork AI Studio/Chat/FileAttachment.cs +++ b/app/MindWork AI Studio/Chat/FileAttachment.cs @@ -1,3 +1,5 @@ +using System.Text.Json.Serialization; + using AIStudio.Tools.Rust; namespace AIStudio.Chat; @@ -9,22 +11,47 @@ namespace AIStudio.Chat; /// The name of the file, including extension. /// The full path to the file, including the filename and extension. /// The size of the file in bytes. -public readonly record struct FileAttachment(FileAttachmentType Type, string FileName, string FilePath, long FileSizeBytes) +[JsonPolymorphic(TypeDiscriminatorPropertyName = "$type")] +[JsonDerivedType(typeof(FileAttachment), typeDiscriminator: "file")] +[JsonDerivedType(typeof(FileAttachmentImage), typeDiscriminator: "image")] +public record FileAttachment(FileAttachmentType Type, string FileName, string FilePath, long FileSizeBytes) { - /// - /// Gets a value indicating whether the file still exists on the file system. - /// - public bool Exists => File.Exists(this.FilePath); - /// /// Gets a value indicating whether the file type is forbidden and should not be attached. /// - public bool IsForbidden => this.Type == FileAttachmentType.FORBIDDEN; + /// + /// The state is determined once during construction and does not change. + /// + public bool IsForbidden { get; } = Type == FileAttachmentType.FORBIDDEN; /// /// Gets a value indicating whether the file type is valid and allowed to be attached. /// - public bool IsValid => this.Type != FileAttachmentType.FORBIDDEN; + /// + /// The state is determined once during construction and does not change. + /// + public bool IsValid { get; } = Type != FileAttachmentType.FORBIDDEN; + + /// + /// Gets a value indicating whether the file type is an image. + /// + /// + /// The state is determined once during construction and does not change. + /// + public bool IsImage { get; } = Type == FileAttachmentType.IMAGE; + + /// + /// Gets the file path for loading the file from the web browser-side (Blazor). + /// + public string FilePathAsUrl { get; } = FileHandler.CreateFileUrl(FilePath); + + /// + /// Gets a value indicating whether the file still exists on the file system. + /// + /// + /// This property checks the file system each time it is accessed. + /// + public bool Exists => File.Exists(this.FilePath); /// /// Creates a FileAttachment from a file path by automatically determining the type, @@ -38,7 +65,13 @@ public readonly record struct FileAttachment(FileAttachmentType Type, string Fil var fileSize = File.Exists(filePath) ? new FileInfo(filePath).Length : 0; var type = DetermineFileType(filePath); - return new FileAttachment(type, fileName, filePath, fileSize); + return type switch + { + FileAttachmentType.DOCUMENT => new FileAttachment(type, fileName, filePath, fileSize), + FileAttachmentType.IMAGE => new FileAttachmentImage(fileName, filePath, fileSize), + + _ => new FileAttachment(type, fileName, filePath, fileSize), + }; } /// diff --git a/app/MindWork AI Studio/Chat/FileAttachmentImage.cs b/app/MindWork AI Studio/Chat/FileAttachmentImage.cs new file mode 100644 index 00000000..81b46f20 --- /dev/null +++ b/app/MindWork AI Studio/Chat/FileAttachmentImage.cs @@ -0,0 +1,17 @@ +namespace AIStudio.Chat; + +public record FileAttachmentImage(string FileName, string FilePath, long FileSizeBytes) : FileAttachment(FileAttachmentType.IMAGE, FileName, FilePath, FileSizeBytes), IImageSource +{ + /// + /// The type of the image source. + /// + /// + /// Is the image source a URL, a local file path, a base64 string, etc.? + /// + public ContentImageSource SourceType { get; init; } = ContentImageSource.LOCAL_PATH; + + /// + /// The image source. + /// + public string Source { get; set; } = FilePath; +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Chat/IImageSourceExtensions.cs b/app/MindWork AI Studio/Chat/IImageSourceExtensions.cs index 7ad62a8e..41706047 100644 --- a/app/MindWork AI Studio/Chat/IImageSourceExtensions.cs +++ b/app/MindWork AI Studio/Chat/IImageSourceExtensions.cs @@ -6,27 +6,89 @@ public static class IImageSourceExtensions { private static string TB(string fallbackEN) => I18N.I.T(fallbackEN, typeof(IImageSourceExtensions).Namespace, nameof(IImageSourceExtensions)); + public static string DetermineMimeType(this IImageSource image) + { + switch (image.SourceType) + { + case ContentImageSource.BASE64: + { + // Try to detect the mime type from the base64 string: + var base64Data = image.Source; + if (base64Data.StartsWith("data:", StringComparison.OrdinalIgnoreCase)) + { + var mimeEnd = base64Data.IndexOf(';'); + if (mimeEnd > 5) + { + return base64Data[5..mimeEnd]; + } + } + + // Fallback: + return "application/octet-stream"; + } + + case ContentImageSource.URL: + { + // Try to detect the mime type from the URL extension: + var uri = new Uri(image.Source); + var extension = Path.GetExtension(uri.AbsolutePath).ToLowerInvariant(); + return extension switch + { + ".png" => "image/png", + ".jpg" or ".jpeg" => "image/jpeg", + ".gif" => "image/gif", + ".bmp" => "image/bmp", + ".webp" => "image/webp", + + _ => "application/octet-stream" + }; + } + + case ContentImageSource.LOCAL_PATH: + { + var extension = Path.GetExtension(image.Source).ToLowerInvariant(); + return extension switch + { + ".png" => "image/png", + ".jpg" or ".jpeg" => "image/jpeg", + ".gif" => "image/gif", + ".bmp" => "image/bmp", + ".webp" => "image/webp", + + _ => "application/octet-stream" + }; + } + + default: + return "application/octet-stream"; + } + } + /// /// Read the image content as a base64 string. /// /// /// The images are directly converted to base64 strings. The maximum /// size of the image is around 10 MB. If the image is larger, the method - /// returns an empty string. - /// + /// returns an empty string.
+ ///
/// As of now, this method does no sort of image processing. LLMs usually /// do not work with arbitrary image sizes. In the future, we might have - /// to resize the images before sending them to the model. + /// to resize the images before sending them to the model.
+ ///
+ /// Note as well that this method returns just the base64 string without + /// any data URI prefix (like "data:image/png;base64,"). The caller has + /// to take care of that if needed. ///
/// The image source. /// The cancellation token. /// The image content as a base64 string; might be empty. - public static async Task AsBase64(this IImageSource image, CancellationToken token = default) + public static async Task<(bool success, string base64Content)> TryAsBase64(this IImageSource image, CancellationToken token = default) { switch (image.SourceType) { case ContentImageSource.BASE64: - return image.Source; + return (success: true, image.Source); case ContentImageSource.URL: { @@ -39,14 +101,15 @@ public static class IImageSourceExtensions if(lengthBytes > 10_000_000) { await MessageBus.INSTANCE.SendError(new(Icons.Material.Filled.ImageNotSupported, TB("The image at the URL is too large (>10 MB). Skipping the image."))); - return string.Empty; + return (success: false, string.Empty); } var bytes = await response.Content.ReadAsByteArrayAsync(token); - return Convert.ToBase64String(bytes); + return (success: true, Convert.ToBase64String(bytes)); } - return string.Empty; + await MessageBus.INSTANCE.SendError(new(Icons.Material.Filled.ImageNotSupported, TB("Failed to download the image from the URL. Skipping the image."))); + return (success: false, string.Empty); } case ContentImageSource.LOCAL_PATH: @@ -57,17 +120,18 @@ public static class IImageSourceExtensions if(length > 10_000_000) { await MessageBus.INSTANCE.SendError(new(Icons.Material.Filled.ImageNotSupported, TB("The local image file is too large (>10 MB). Skipping the image."))); - return string.Empty; + return (success: false, string.Empty); } var bytes = await File.ReadAllBytesAsync(image.Source, token); - return Convert.ToBase64String(bytes); + return (success: true, Convert.ToBase64String(bytes)); } - return string.Empty; + await MessageBus.INSTANCE.SendError(new(Icons.Material.Filled.ImageNotSupported, TB("The local image file does not exist. Skipping the image."))); + return (success: false, string.Empty); default: - return string.Empty; + return (success: false, string.Empty); } } } \ No newline at end of file diff --git a/app/MindWork AI Studio/Chat/ListContentBlockExtensions.cs b/app/MindWork AI Studio/Chat/ListContentBlockExtensions.cs index 5c9883e7..5da41e80 100644 --- a/app/MindWork AI Studio/Chat/ListContentBlockExtensions.cs +++ b/app/MindWork AI Studio/Chat/ListContentBlockExtensions.cs @@ -1,4 +1,6 @@ using AIStudio.Provider; +using AIStudio.Provider.OpenAI; +using AIStudio.Settings; namespace AIStudio.Chat; @@ -8,19 +10,171 @@ public static class ListContentBlockExtensions /// Processes a list of content blocks by transforming them into a collection of message results asynchronously. ///
/// The list of content blocks to process. - /// A function that transforms each content block into a message result asynchronously. + /// A function that transforms each content block into a message result asynchronously. + /// The selected LLM provider. + /// The selected model. + /// A factory function to create text sub-content. + /// A factory function to create image sub-content. /// An asynchronous task that resolves to a list of transformed results. - public static async Task> BuildMessages(this List blocks, Func> transformer) + public static async Task> BuildMessagesAsync( + this List blocks, + LLMProviders selectedProvider, + Model selectedModel, + Func roleTransformer, + Func textSubContentFactory, + Func> imageSubContentFactory) { - var messages = blocks - .Where(n => n.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace((n.Content as ContentText)?.Text)) - .Select(transformer) - .ToList(); + var capabilities = selectedProvider.GetModelCapabilities(selectedModel); + var canProcessImages = capabilities.Contains(Capability.MULTIPLE_IMAGE_INPUT) || + capabilities.Contains(Capability.SINGLE_IMAGE_INPUT); - // Await all messages: - await Task.WhenAll(messages); + var messageTaskList = new List>(blocks.Count); + foreach (var block in blocks) + { + switch (block.Content) + { + // The prompt may or may not contain image(s), but the provider/model cannot process images. + // Thus, we treat it as a regular text message. + case ContentText text when block.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace(text.Text) && !canProcessImages: + messageTaskList.Add(CreateTextMessageAsync(block, text)); + break; + + // The regular case for text content without images: + case ContentText text when block.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace(text.Text) && !text.FileAttachments.ContainsImages(): + messageTaskList.Add(CreateTextMessageAsync(block, text)); + break; + + // Text prompt with images as attachments, and the provider/model can process images: + case ContentText text when block.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace(text.Text) && text.FileAttachments.ContainsImages(): + messageTaskList.Add(CreateMultimodalMessageAsync(block, text, textSubContentFactory, imageSubContentFactory)); + break; + } + } + // Await all messages: + await Task.WhenAll(messageTaskList); + // Select all results: - return messages.Select(n => n.Result).ToList(); + return messageTaskList.Select(n => n.Result).ToList(); + + // Local function to create a text message asynchronously. + Task CreateTextMessageAsync(ContentBlock block, ContentText text) + { + return Task.Run(async () => new TextMessage + { + Role = roleTransformer(block.Role), + Content = await text.PrepareTextContentForAI(), + } as IMessageBase); + } + + // Local function to create a multimodal message asynchronously. + Task CreateMultimodalMessageAsync( + ContentBlock block, + ContentText text, + Func innerTextSubContentFactory, + Func> innerImageSubContentFactory) + { + return Task.Run(async () => + { + var imagesTasks = text.FileAttachments + .Where(x => x is { IsImage: true, Exists: true }) + .Cast() + .Select(innerImageSubContentFactory) + .ToList(); + + Task.WaitAll(imagesTasks); + var images = imagesTasks.Select(t => t.Result).ToList(); + + return new MultimodalMessage + { + Role = roleTransformer(block.Role), + Content = + [ + innerTextSubContentFactory(await text.PrepareTextContentForAI()), + ..images, + ] + } as IMessageBase; + }); + } } + + /// + /// Processes a list of content blocks using direct image URL format to create message results asynchronously. + /// + /// The list of content blocks to process. + /// The selected LLM provider. + /// The selected model. + /// An asynchronous task that resolves to a list of transformed message results. + /// + /// Uses direct image URL format where the image data is placed directly in the image_url field: + /// + /// { "type": "image_url", "image_url": "data:image/jpeg;base64,..." } + /// + /// This format is used by OpenAI, Mistral, and Ollama. + /// + public static async Task> BuildMessagesUsingDirectImageUrlAsync( + this List blocks, + LLMProviders selectedProvider, + Model selectedModel) => await blocks.BuildMessagesAsync( + selectedProvider, + selectedModel, + StandardRoleTransformer, + StandardTextSubContentFactory, + DirectImageSubContentFactory); + + /// + /// Processes a list of content blocks using nested image URL format to create message results asynchronously. + /// + /// The list of content blocks to process. + /// The selected LLM provider. + /// The selected model. + /// An asynchronous task that resolves to a list of transformed message results. + /// + /// Uses nested image URL format where the image data is wrapped in an object: + /// + /// { "type": "image_url", "image_url": { "url": "data:image/jpeg;base64,..." } } + /// + /// This format is used by LM Studio, VLLM, llama.cpp, and other OpenAI-compatible providers. + /// + public static async Task> BuildMessagesUsingNestedImageUrlAsync( + this List blocks, + LLMProviders selectedProvider, + Model selectedModel) => await blocks.BuildMessagesAsync( + selectedProvider, + selectedModel, + StandardRoleTransformer, + StandardTextSubContentFactory, + NestedImageSubContentFactory); + + private static ISubContent StandardTextSubContentFactory(string text) => new SubContentText + { + Text = text, + }; + + private static async Task DirectImageSubContentFactory(FileAttachmentImage attachment) => new SubContentImageUrl + { + ImageUrl = await attachment.TryAsBase64() is (true, var base64Content) + ? $"data:{attachment.DetermineMimeType()};base64,{base64Content}" + : string.Empty, + }; + + private static async Task NestedImageSubContentFactory(FileAttachmentImage attachment) => new SubContentImageUrlNested + { + ImageUrl = new SubContentImageUrlData + { + Url = await attachment.TryAsBase64() is (true, var base64Content) + ? $"data:{attachment.DetermineMimeType()};base64,{base64Content}" + : string.Empty, + }, + }; + + private static string StandardRoleTransformer(ChatRole role) => role switch + { + ChatRole.USER => "user", + ChatRole.AI => "assistant", + ChatRole.AGENT => "assistant", + ChatRole.SYSTEM => "system", + + _ => "user", + }; } \ No newline at end of file diff --git a/app/MindWork AI Studio/Chat/ListFileAttachmentExtensions.cs b/app/MindWork AI Studio/Chat/ListFileAttachmentExtensions.cs new file mode 100644 index 00000000..497c965d --- /dev/null +++ b/app/MindWork AI Studio/Chat/ListFileAttachmentExtensions.cs @@ -0,0 +1,6 @@ +namespace AIStudio.Chat; + +public static class ListFileAttachmentExtensions +{ + public static bool ContainsImages(this List attachments) => attachments.Any(attachment => attachment.IsImage); +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Components/AttachDocuments.razor.cs b/app/MindWork AI Studio/Components/AttachDocuments.razor.cs index a647876e..9bc48747 100644 --- a/app/MindWork AI Studio/Components/AttachDocuments.razor.cs +++ b/app/MindWork AI Studio/Components/AttachDocuments.razor.cs @@ -36,6 +36,9 @@ public partial class AttachDocuments : MSGComponentBase [Parameter] public bool UseSmallForm { get; set; } + [Parameter] + public AIStudio.Settings.Provider? Provider { get; set; } + [Inject] private ILogger Logger { get; set; } = null!; @@ -114,7 +117,7 @@ public partial class AttachDocuments : MSGComponentBase foreach (var path in paths) { - if(!await FileExtensionValidation.IsExtensionValidWithNotifyAsync(path)) + if(!await FileExtensionValidation.IsExtensionValidWithNotifyAsync(FileExtensionValidation.UseCase.ATTACHING_CONTENT, path, this.Provider)) continue; this.DocumentPaths.Add(FileAttachment.FromPath(path)); @@ -158,7 +161,7 @@ public partial class AttachDocuments : MSGComponentBase if (!File.Exists(selectedFilePath)) continue; - if (!await FileExtensionValidation.IsExtensionValidWithNotifyAsync(selectedFilePath)) + if (!await FileExtensionValidation.IsExtensionValidWithNotifyAsync(FileExtensionValidation.UseCase.ATTACHING_CONTENT, selectedFilePath, this.Provider)) continue; this.DocumentPaths.Add(FileAttachment.FromPath(selectedFilePath)); @@ -216,7 +219,7 @@ public partial class AttachDocuments : MSGComponentBase { var dialogParameters = new DialogParameters { - { x => x.FilePath, fileAttachment.FilePath }, + { x => x.Document, fileAttachment }, }; await this.DialogService.ShowAsync(T("Document Preview"), dialogParameters, DialogOptions.FULLSCREEN); diff --git a/app/MindWork AI Studio/Components/ChatComponent.razor b/app/MindWork AI Studio/Components/ChatComponent.razor index 8f4eca23..1eb28244 100644 --- a/app/MindWork AI Studio/Components/ChatComponent.razor +++ b/app/MindWork AI Studio/Components/ChatComponent.razor @@ -83,7 +83,7 @@ - + @if (this.SettingsManager.ConfigurationData.Workspace.StorageBehavior is WorkspaceStorageBehavior.STORE_CHATS_AUTOMATICALLY) { diff --git a/app/MindWork AI Studio/Components/ChatComponent.razor.cs b/app/MindWork AI Studio/Components/ChatComponent.razor.cs index 347c096b..3236d31e 100644 --- a/app/MindWork AI Studio/Components/ChatComponent.razor.cs +++ b/app/MindWork AI Studio/Components/ChatComponent.razor.cs @@ -933,10 +933,17 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable if (this.cancellationTokenSource is not null) { - if(!this.cancellationTokenSource.IsCancellationRequested) - await this.cancellationTokenSource.CancelAsync(); + try + { + if(!this.cancellationTokenSource.IsCancellationRequested) + await this.cancellationTokenSource.CancelAsync(); - this.cancellationTokenSource.Dispose(); + this.cancellationTokenSource.Dispose(); + } + catch + { + // ignored + } } } diff --git a/app/MindWork AI Studio/Components/ReadFileContent.razor.cs b/app/MindWork AI Studio/Components/ReadFileContent.razor.cs index cd970cd4..5db85e21 100644 --- a/app/MindWork AI Studio/Components/ReadFileContent.razor.cs +++ b/app/MindWork AI Studio/Components/ReadFileContent.razor.cs @@ -55,7 +55,7 @@ public partial class ReadFileContent : MSGComponentBase return; } - if (!await FileExtensionValidation.IsExtensionValidWithNotifyAsync(selectedFile.SelectedFilePath)) + if (!await FileExtensionValidation.IsExtensionValidWithNotifyAsync(FileExtensionValidation.UseCase.DIRECTLY_LOADING_CONTENT, selectedFile.SelectedFilePath)) { this.Logger.LogWarning("User attempted to load unsupported file: {FilePath}", selectedFile.SelectedFilePath); return; diff --git a/app/MindWork AI Studio/Dialogs/DocumentCheckDialog.razor b/app/MindWork AI Studio/Dialogs/DocumentCheckDialog.razor index 88e5353f..bd9f47d5 100644 --- a/app/MindWork AI Studio/Dialogs/DocumentCheckDialog.razor +++ b/app/MindWork AI Studio/Dialogs/DocumentCheckDialog.razor @@ -6,7 +6,7 @@ @T("See how we load your file. Review the content before we process it further.") - @if (string.IsNullOrWhiteSpace(this.FilePath)) + @if (this.Document is null) { } @@ -14,7 +14,7 @@ { } - - - -
- -
-
-
- - - -
- + @if (!this.Document?.Exists ?? false) + { + + @T("The specified file could not be found. The file have been moved, deleted, renamed, or is otherwise inaccessible.") + + } + else + { + + @if (this.Document?.IsImage ?? false) + { + + + + } + else + { + + +
+ +
+
+
+ + + + } +
+ } + - + @T("Close") diff --git a/app/MindWork AI Studio/Dialogs/DocumentCheckDialog.razor.cs b/app/MindWork AI Studio/Dialogs/DocumentCheckDialog.razor.cs index 39abd602..4bf306f1 100644 --- a/app/MindWork AI Studio/Dialogs/DocumentCheckDialog.razor.cs +++ b/app/MindWork AI Studio/Dialogs/DocumentCheckDialog.razor.cs @@ -1,4 +1,5 @@ -using AIStudio.Components; +using AIStudio.Chat; +using AIStudio.Components; using AIStudio.Tools.Services; using Microsoft.AspNetCore.Components; @@ -13,7 +14,7 @@ public partial class DocumentCheckDialog : MSGComponentBase private IMudDialogInstance MudDialog { get; set; } = null!; [Parameter] - public string FilePath { get; set; } = string.Empty; + public FileAttachment? Document { get; set; } private void Close() => this.MudDialog.Cancel(); @@ -27,27 +28,30 @@ public partial class DocumentCheckDialog : MSGComponentBase private IDialogService DialogService { get; init; } = null!; [Inject] - private ILogger Logger { get; init; } = null!; + private ILogger Logger { get; init; } = null!; protected override async Task OnAfterRenderAsync(bool firstRender) { - if (firstRender && !string.IsNullOrWhiteSpace(this.FilePath)) + if (firstRender && this.Document is not null) { try { - var fileContent = await UserFile.LoadFileData(this.FilePath, this.RustService, this.DialogService); - this.FileContent = fileContent; - this.StateHasChanged(); + if (!this.Document.IsImage) + { + var fileContent = await UserFile.LoadFileData(this.Document.FilePath, this.RustService, this.DialogService); + this.FileContent = fileContent; + } } catch (Exception ex) { - this.Logger.LogError(ex, "Failed to load file content from '{FilePath}'", this.FilePath); + this.Logger.LogError(ex, "Failed to load file content from '{FilePath}'", this.Document); this.FileContent = string.Empty; - this.StateHasChanged(); } + + this.StateHasChanged(); } else if (firstRender) - this.Logger.LogWarning("Document check dialog opened without a valid file path"); + this.Logger.LogWarning("Document check dialog opened without a valid file path."); } private CodeBlockTheme CodeColorPalette => this.SettingsManager.IsDarkMode ? CodeBlockTheme.Dark : CodeBlockTheme.Default; diff --git a/app/MindWork AI Studio/Dialogs/ReviewAttachmentsDialog.razor b/app/MindWork AI Studio/Dialogs/ReviewAttachmentsDialog.razor index 3602adf0..bb9d8b9f 100644 --- a/app/MindWork AI Studio/Dialogs/ReviewAttachmentsDialog.razor +++ b/app/MindWork AI Studio/Dialogs/ReviewAttachmentsDialog.razor @@ -46,19 +46,24 @@ - - - + + + + + + + + + - } else { - +
@@ -70,13 +75,17 @@
- - - + + + + + + +
} } @@ -85,7 +94,7 @@ - Close + @T("Close") \ No newline at end of file diff --git a/app/MindWork AI Studio/Dialogs/ReviewAttachmentsDialog.razor.cs b/app/MindWork AI Studio/Dialogs/ReviewAttachmentsDialog.razor.cs index 21af5418..aa12f128 100644 --- a/app/MindWork AI Studio/Dialogs/ReviewAttachmentsDialog.razor.cs +++ b/app/MindWork AI Studio/Dialogs/ReviewAttachmentsDialog.razor.cs @@ -46,4 +46,18 @@ public partial class ReviewAttachmentsDialog : MSGComponentBase this.StateHasChanged(); } } + + /// + /// The user might want to check what we actually extract from his file and therefore give the LLM as an input. + /// + /// The file to check. + private async Task InvestigateFile(FileAttachment fileAttachment) + { + var dialogParameters = new DialogParameters + { + { x => x.Document, fileAttachment }, + }; + + await this.DialogService.ShowAsync(T("Document Preview"), dialogParameters, DialogOptions.FULLSCREEN); + } } \ No newline at end of file diff --git a/app/MindWork AI Studio/FileHandler.cs b/app/MindWork AI Studio/FileHandler.cs new file mode 100644 index 00000000..5fd18896 --- /dev/null +++ b/app/MindWork AI Studio/FileHandler.cs @@ -0,0 +1,80 @@ +using Microsoft.AspNetCore.StaticFiles; + +namespace AIStudio; + +internal static class FileHandler +{ + private const string ENDPOINT = "/local/file"; + + private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(nameof(FileHandler)); + + internal static string CreateFileUrl(string filePath) + { + var encodedPath = Uri.EscapeDataString(filePath); + return $"{ENDPOINT}?path={encodedPath}"; + } + + internal static async Task HandlerAsync(HttpContext context, Func nextHandler) + { + var requestPath = context.Request.Path.Value; + if (string.IsNullOrWhiteSpace(requestPath) || !requestPath.Equals(ENDPOINT, StringComparison.Ordinal)) + { + await nextHandler(); + return; + } + + // Extract the file path from the query parameter: + // Format: /local/file?path={url-encoded-path} + if (!context.Request.Query.TryGetValue("path", out var pathValues) || pathValues.Count == 0) + { + context.Response.StatusCode = StatusCodes.Status400BadRequest; + LOGGER.LogWarning("No file path provided in the request. Using ?path={{url-encoded-path}} format."); + return; + } + + // The query parameter is automatically URL-decoded by ASP.NET Core: + var filePath = pathValues[0]; + if (string.IsNullOrWhiteSpace(filePath)) + { + context.Response.StatusCode = StatusCodes.Status400BadRequest; + LOGGER.LogWarning("Empty file path provided in the request."); + return; + } + + // Security check: Prevent path traversal attacks: + var fullPath = Path.GetFullPath(filePath); + if (fullPath != filePath && !filePath.StartsWith('/')) + { + // On Windows, absolute paths may differ, so we do an additional check + // to ensure no path traversal sequences are present: + if (filePath.Contains("..")) + { + context.Response.StatusCode = StatusCodes.Status403Forbidden; + LOGGER.LogWarning("Path traversal attempt detected: {FilePath}", filePath); + return; + } + } + + // Check if the file exists: + if (!File.Exists(filePath)) + { + context.Response.StatusCode = StatusCodes.Status404NotFound; + LOGGER.LogWarning("Requested file not found: '{FilePath}'", filePath); + return; + } + + // Determine the content type: + var contentTypeProvider = new FileExtensionContentTypeProvider(); + if (!contentTypeProvider.TryGetContentType(filePath, out var contentType)) + contentType = "application/octet-stream"; + + // Set response headers: + context.Response.ContentType = contentType; + context.Response.Headers.ContentDisposition = $"inline; filename=\"{Path.GetFileName(filePath)}\""; + + // Stream the file to the response: + await using var fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: 64 * 1024, useAsync: true); + context.Response.ContentLength = fileStream.Length; + await fileStream.CopyToAsync(context.Response.Body); + } +} diff --git a/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua b/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua index 4b2e2168..95471d53 100644 --- a/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua +++ b/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua @@ -49,7 +49,7 @@ LANG_NAME = "Deutsch (Deutschland)" UI_TEXT_CONTENT = {} -- Objective -UI_TEXT_CONTENT["AISTUDIO::ASSISTANTS::AGENDA::ASSISTANTAGENDA::T1121586136"] = "Ziel" +UI_TEXT_CONTENT["AISTUDIO::ASSISTANTS::AGENDA::ASSISTANTAGENDA::T1121586136"] = "Zielsetzung" -- Describe the topic of the meeting, seminar, etc. Is it about quantum computing, software engineering, or is it a general business meeting? UI_TEXT_CONTENT["AISTUDIO::ASSISTANTS::AGENDA::ASSISTANTAGENDA::T12079368"] = "Beschreiben Sie das Thema des Treffens, Seminars usw. Geht es um Quantencomputing, Softwareentwicklung oder handelt es sich um ein allgemeines Geschäftstreffen?" @@ -1515,6 +1515,12 @@ UI_TEXT_CONTENT["AISTUDIO::CHAT::CONTENTBLOCKCOMPONENT::T4188329028"] = "Nein, b -- Export Chat to Microsoft Word UI_TEXT_CONTENT["AISTUDIO::CHAT::CONTENTBLOCKCOMPONENT::T861873672"] = "Chat in Microsoft Word exportieren" +-- The local image file does not exist. Skipping the image. +UI_TEXT_CONTENT["AISTUDIO::CHAT::IIMAGESOURCEEXTENSIONS::T255679918"] = "Die lokale Bilddatei existiert nicht. Das Bild wird übersprungen." + +-- Failed to download the image from the URL. Skipping the image. +UI_TEXT_CONTENT["AISTUDIO::CHAT::IIMAGESOURCEEXTENSIONS::T2996654916"] = "Das Bild konnte nicht von der URL heruntergeladen werden. Das Bild wird übersprungen." + -- The local image file is too large (>10 MB). Skipping the image. UI_TEXT_CONTENT["AISTUDIO::CHAT::IIMAGESOURCEEXTENSIONS::T3219823625"] = "Die lokale Bilddatei ist zu groß (>10 MB). Das Bild wird übersprungen." @@ -2970,6 +2976,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T1373123357"] = "Markdo -- Load file UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T2129302565"] = "Datei laden" +-- Image View +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T2199753423"] = "Bildansicht" + -- See how we load your file. Review the content before we process it further. UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T3271853346"] = "So wird Ihre Datei geladen. Überprüfen Sie den Inhalt, bevor wir ihn weiterverarbeiten." @@ -2988,6 +2997,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T652739927"] = "Dies is -- File Path UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T729508546"] = "Dateipfad" +-- The specified file could not be found. The file have been moved, deleted, renamed, or is otherwise inaccessible. +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T973777830"] = "Die angegebene Datei konnte nicht gefunden werden. Die Datei wurde möglicherweise verschoben, gelöscht, umbenannt oder ist anderweitig nicht zugänglich." + -- Embedding Name UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGMETHODDIALOG::T1427271797"] = "Name der Einbettung" @@ -3438,12 +3450,21 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T1746160064"] = "Hi -- There aren't any file attachments right now. UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T2111340711"] = "Derzeit sind keine Dateianhänge vorhanden." +-- Document Preview +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T285154968"] = "Dokumentvorschau" + -- The file was deleted, renamed, or moved. UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T3083729256"] = "Die Datei wurde gelöscht, umbenannt oder verschoben." -- Your attached file. UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T3154198222"] = "Ihre angehängte Datei." +-- Preview what we send to the AI. +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T3160778981"] = "Vorschau dessen, was wir an die KI senden." + +-- Close +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T3448155331"] = "Schließen" + -- Your attached files UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T3909191077"] = "Ihre angehängten Dateien" @@ -6060,9 +6081,15 @@ UI_TEXT_CONTENT["AISTUDIO::TOOLS::VALIDATION::FILEEXTENSIONVALIDATION::T29289275 -- Images are not supported yet UI_TEXT_CONTENT["AISTUDIO::TOOLS::VALIDATION::FILEEXTENSIONVALIDATION::T298062956"] = "Bilder werden derzeit nicht unterstützt." +-- Images are not supported at this place +UI_TEXT_CONTENT["AISTUDIO::TOOLS::VALIDATION::FILEEXTENSIONVALIDATION::T305247150"] = "Bilder werden an dieser Stelle nicht unterstützt." + -- Executables are not allowed UI_TEXT_CONTENT["AISTUDIO::TOOLS::VALIDATION::FILEEXTENSIONVALIDATION::T4167762413"] = "Ausführbare Dateien sind nicht erlaubt" +-- Images are not supported by the selected provider and model +UI_TEXT_CONTENT["AISTUDIO::TOOLS::VALIDATION::FILEEXTENSIONVALIDATION::T999194030"] = "Bilder werden vom ausgewählten Anbieter und Modell nicht unterstützt." + -- The hostname is not a valid HTTP(S) URL. UI_TEXT_CONTENT["AISTUDIO::TOOLS::VALIDATION::PROVIDERVALIDATION::T1013354736"] = "Der Hostname ist keine gültige HTTP(S)-URL." diff --git a/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua b/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua index 42df6dd0..85edaf99 100644 --- a/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua +++ b/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua @@ -1515,6 +1515,12 @@ UI_TEXT_CONTENT["AISTUDIO::CHAT::CONTENTBLOCKCOMPONENT::T4188329028"] = "No, kee -- Export Chat to Microsoft Word UI_TEXT_CONTENT["AISTUDIO::CHAT::CONTENTBLOCKCOMPONENT::T861873672"] = "Export Chat to Microsoft Word" +-- The local image file does not exist. Skipping the image. +UI_TEXT_CONTENT["AISTUDIO::CHAT::IIMAGESOURCEEXTENSIONS::T255679918"] = "The local image file does not exist. Skipping the image." + +-- Failed to download the image from the URL. Skipping the image. +UI_TEXT_CONTENT["AISTUDIO::CHAT::IIMAGESOURCEEXTENSIONS::T2996654916"] = "Failed to download the image from the URL. Skipping the image." + -- The local image file is too large (>10 MB). Skipping the image. UI_TEXT_CONTENT["AISTUDIO::CHAT::IIMAGESOURCEEXTENSIONS::T3219823625"] = "The local image file is too large (>10 MB). Skipping the image." @@ -2970,6 +2976,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T1373123357"] = "Markdo -- Load file UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T2129302565"] = "Load file" +-- Image View +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T2199753423"] = "Image View" + -- See how we load your file. Review the content before we process it further. UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T3271853346"] = "See how we load your file. Review the content before we process it further." @@ -2988,6 +2997,9 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T652739927"] = "This is -- File Path UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T729508546"] = "File Path" +-- The specified file could not be found. The file have been moved, deleted, renamed, or is otherwise inaccessible. +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::DOCUMENTCHECKDIALOG::T973777830"] = "The specified file could not be found. The file have been moved, deleted, renamed, or is otherwise inaccessible." + -- Embedding Name UI_TEXT_CONTENT["AISTUDIO::DIALOGS::EMBEDDINGMETHODDIALOG::T1427271797"] = "Embedding Name" @@ -3438,12 +3450,21 @@ UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T1746160064"] = "He -- There aren't any file attachments right now. UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T2111340711"] = "There aren't any file attachments right now." +-- Document Preview +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T285154968"] = "Document Preview" + -- The file was deleted, renamed, or moved. UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T3083729256"] = "The file was deleted, renamed, or moved." -- Your attached file. UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T3154198222"] = "Your attached file." +-- Preview what we send to the AI. +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T3160778981"] = "Preview what we send to the AI." + +-- Close +UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T3448155331"] = "Close" + -- Your attached files UI_TEXT_CONTENT["AISTUDIO::DIALOGS::REVIEWATTACHMENTSDIALOG::T3909191077"] = "Your attached files" @@ -6060,9 +6081,15 @@ UI_TEXT_CONTENT["AISTUDIO::TOOLS::VALIDATION::FILEEXTENSIONVALIDATION::T29289275 -- Images are not supported yet UI_TEXT_CONTENT["AISTUDIO::TOOLS::VALIDATION::FILEEXTENSIONVALIDATION::T298062956"] = "Images are not supported yet" +-- Images are not supported at this place +UI_TEXT_CONTENT["AISTUDIO::TOOLS::VALIDATION::FILEEXTENSIONVALIDATION::T305247150"] = "Images are not supported at this place" + -- Executables are not allowed UI_TEXT_CONTENT["AISTUDIO::TOOLS::VALIDATION::FILEEXTENSIONVALIDATION::T4167762413"] = "Executables are not allowed" +-- Images are not supported by the selected provider and model +UI_TEXT_CONTENT["AISTUDIO::TOOLS::VALIDATION::FILEEXTENSIONVALIDATION::T999194030"] = "Images are not supported by the selected provider and model" + -- The hostname is not a valid HTTP(S) URL. UI_TEXT_CONTENT["AISTUDIO::TOOLS::VALIDATION::PROVIDERVALIDATION::T1013354736"] = "The hostname is not a valid HTTP(S) URL." diff --git a/app/MindWork AI Studio/Program.cs b/app/MindWork AI Studio/Program.cs index b5954efc..cc185180 100644 --- a/app/MindWork AI Studio/Program.cs +++ b/app/MindWork AI Studio/Program.cs @@ -192,6 +192,7 @@ internal sealed class Program programLogger.LogInformation("Initialize internal file system."); app.Use(Redirect.HandlerContentAsync); + app.Use(FileHandler.HandlerAsync); #if DEBUG app.UseStaticFiles(); diff --git a/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs b/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs index 6b6224aa..dacfeea5 100644 --- a/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs +++ b/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs @@ -9,7 +9,7 @@ using AIStudio.Settings; namespace AIStudio.Provider.AlibabaCloud; -public sealed class ProviderAlibabaCloud() : BaseProvider("https://dashscope-intl.aliyuncs.com/compatible-mode/v1/", LOGGER) +public sealed class ProviderAlibabaCloud() : BaseProvider(LLMProviders.ALIBABA_CLOUD, "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/", LOGGER) { private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); @@ -40,24 +40,7 @@ public sealed class ProviderAlibabaCloud() : BaseProvider("https://dashscope-int var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessages(async n => new TextMessage - { - Role = n.Role switch - { - ChatRole.USER => "user", - ChatRole.AI => "assistant", - ChatRole.AGENT => "assistant", - ChatRole.SYSTEM => "system", - - _ => "user", - }, - - Content = n.Content switch - { - ContentText text => await text.PrepareTextContentForAI(), - _ => string.Empty, - } - }); + var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel); // Prepare the AlibabaCloud HTTP chat request: var alibabaCloudChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/Anthropic/ISubContentImageSource.cs b/app/MindWork AI Studio/Provider/Anthropic/ISubContentImageSource.cs new file mode 100644 index 00000000..84015bdd --- /dev/null +++ b/app/MindWork AI Studio/Provider/Anthropic/ISubContentImageSource.cs @@ -0,0 +1,9 @@ +namespace AIStudio.Provider.Anthropic; + +public interface ISubContentImageSource +{ + /// + /// The type of the sub-content image. + /// + public SubContentImageType Type { get; } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs b/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs index 2e82c198..7f02781b 100644 --- a/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs +++ b/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs @@ -9,7 +9,7 @@ using AIStudio.Settings; namespace AIStudio.Provider.Anthropic; -public sealed class ProviderAnthropic() : BaseProvider("https://api.anthropic.com/v1/", LOGGER) +public sealed class ProviderAnthropic() : BaseProvider(LLMProviders.ANTHROPIC, "https://api.anthropic.com/v1/", LOGGER) { private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); @@ -31,9 +31,11 @@ public sealed class ProviderAnthropic() : BaseProvider("https://api.anthropic.co var apiParameters = this.ParseAdditionalApiParameters("system"); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessages(async n => new TextMessage - { - Role = n.Role switch + var messages = await chatThread.Blocks.BuildMessagesAsync( + this.Provider, chatModel, + + // Anthropic-specific role mapping: + role => role switch { ChatRole.USER => "user", ChatRole.AI => "assistant", @@ -41,13 +43,26 @@ public sealed class ProviderAnthropic() : BaseProvider("https://api.anthropic.co _ => "user", }, - - Content = n.Content switch + + // Anthropic uses the standard text sub-content: + text => new SubContentText { - ContentText text => await text.PrepareTextContentForAI(), - _ => string.Empty, + Text = text, + }, + + // Anthropic-specific image sub-content: + async attachment => new SubContentImage + { + Source = new SubContentBase64Image + { + Data = await attachment.TryAsBase64(token: token) is (true, var base64Content) + ? base64Content + : string.Empty, + + MediaType = attachment.DetermineMimeType(), + } } - }); + ); // Prepare the Anthropic HTTP chat request: var chatRequest = JsonSerializer.Serialize(new ChatRequest diff --git a/app/MindWork AI Studio/Provider/Anthropic/SubContentBase64Image.cs b/app/MindWork AI Studio/Provider/Anthropic/SubContentBase64Image.cs new file mode 100644 index 00000000..8123c814 --- /dev/null +++ b/app/MindWork AI Studio/Provider/Anthropic/SubContentBase64Image.cs @@ -0,0 +1,10 @@ +namespace AIStudio.Provider.Anthropic; + +public record SubContentBase64Image : ISubContentImageSource +{ + public SubContentImageType Type => SubContentImageType.BASE64; + + public string MediaType { get; init; } = string.Empty; + + public string Data { get; init; } = string.Empty; +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/Anthropic/SubContentImage.cs b/app/MindWork AI Studio/Provider/Anthropic/SubContentImage.cs new file mode 100644 index 00000000..074f5db0 --- /dev/null +++ b/app/MindWork AI Studio/Provider/Anthropic/SubContentImage.cs @@ -0,0 +1,10 @@ +using AIStudio.Provider.OpenAI; + +namespace AIStudio.Provider.Anthropic; + +public record SubContentImage(SubContentType Type, ISubContentImageSource Source) : ISubContent +{ + public SubContentImage() : this(SubContentType.IMAGE, new SubContentImageUrl()) + { + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/Anthropic/SubContentImageSourceConverter.cs b/app/MindWork AI Studio/Provider/Anthropic/SubContentImageSourceConverter.cs new file mode 100644 index 00000000..11c61ad2 --- /dev/null +++ b/app/MindWork AI Studio/Provider/Anthropic/SubContentImageSourceConverter.cs @@ -0,0 +1,32 @@ +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace AIStudio.Provider.Anthropic; + +/// +/// Custom JSON converter for the ISubContentImageSource interface to handle polymorphic serialization. +/// +/// +/// This converter ensures that when serializing ISubContentImageSource objects, all properties +/// of the concrete implementation (e.g., SubContentBase64Image, SubContentImageUrl) are serialized, +/// not just the properties defined in the ISubContentImageSource interface. +/// +public sealed class SubContentImageSourceConverter : JsonConverter +{ + private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); + + public override ISubContentImageSource? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + // Deserialization is not needed for request objects, as sub-content image sources are only serialized + // when sending requests to LLM providers. + LOGGER.LogError("Deserializing ISubContentImageSource is not supported. This converter is only used for serializing request messages."); + return null; + } + + public override void Write(Utf8JsonWriter writer, ISubContentImageSource value, JsonSerializerOptions options) + { + // Serialize the actual concrete type (e.g., SubContentBase64Image, SubContentImageUrl) instead of just the ISubContentImageSource interface. + // This ensures all properties of the concrete type are included in the JSON output. + JsonSerializer.Serialize(writer, value, value.GetType(), options); + } +} diff --git a/app/MindWork AI Studio/Provider/Anthropic/SubContentImageType.cs b/app/MindWork AI Studio/Provider/Anthropic/SubContentImageType.cs new file mode 100644 index 00000000..e94c2224 --- /dev/null +++ b/app/MindWork AI Studio/Provider/Anthropic/SubContentImageType.cs @@ -0,0 +1,7 @@ +namespace AIStudio.Provider.Anthropic; + +public enum SubContentImageType +{ + URL, + BASE64 +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/Anthropic/SubContentImageUrl.cs b/app/MindWork AI Studio/Provider/Anthropic/SubContentImageUrl.cs new file mode 100644 index 00000000..0247a40c --- /dev/null +++ b/app/MindWork AI Studio/Provider/Anthropic/SubContentImageUrl.cs @@ -0,0 +1,8 @@ +namespace AIStudio.Provider.Anthropic; + +public record SubContentImageUrl : ISubContentImageSource +{ + public SubContentImageType Type => SubContentImageType.URL; + + public string Url { get; init; } = string.Empty; +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/BaseProvider.cs b/app/MindWork AI Studio/Provider/BaseProvider.cs index 9b261646..62464910 100644 --- a/app/MindWork AI Studio/Provider/BaseProvider.cs +++ b/app/MindWork AI Studio/Provider/BaseProvider.cs @@ -1,8 +1,10 @@ using System.Net; using System.Runtime.CompilerServices; using System.Text.Json; +using System.Text.Json.Serialization; using AIStudio.Chat; +using AIStudio.Provider.Anthropic; using AIStudio.Provider.OpenAI; using AIStudio.Settings; using AIStudio.Tools.PluginSystem; @@ -40,18 +42,28 @@ public abstract class BaseProvider : IProvider, ISecretId protected static readonly JsonSerializerOptions JSON_SERIALIZER_OPTIONS = new() { PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower, - Converters = { new AnnotationConverter(), new MessageBaseConverter() }, + Converters = + { + new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower), + new AnnotationConverter(), + new MessageBaseConverter(), + new SubContentConverter(), + new SubContentImageSourceConverter(), + new SubContentImageUrlConverter(), + }, AllowTrailingCommas = false }; /// /// Constructor for the base provider. /// + /// The provider enum value. /// The base URL for the provider. /// The logger to use. - protected BaseProvider(string url, ILogger logger) + protected BaseProvider(LLMProviders provider, string url, ILogger logger) { this.logger = logger; + this.Provider = provider; // Set the base URL: this.httpClient.BaseAddress = new(url); @@ -59,6 +71,9 @@ public abstract class BaseProvider : IProvider, ISecretId #region Handling of IProvider, which all providers must implement + /// + public LLMProviders Provider { get; } + /// public abstract string Id { get; } diff --git a/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs b/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs index 0a9e4181..4b597601 100644 --- a/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs +++ b/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs @@ -9,7 +9,7 @@ using AIStudio.Settings; namespace AIStudio.Provider.DeepSeek; -public sealed class ProviderDeepSeek() : BaseProvider("https://api.deepseek.com/", LOGGER) +public sealed class ProviderDeepSeek() : BaseProvider(LLMProviders.DEEP_SEEK, "https://api.deepseek.com/", LOGGER) { private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); @@ -40,24 +40,7 @@ public sealed class ProviderDeepSeek() : BaseProvider("https://api.deepseek.com/ var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessages(async n => new TextMessage - { - Role = n.Role switch - { - ChatRole.USER => "user", - ChatRole.AI => "assistant", - ChatRole.AGENT => "assistant", - ChatRole.SYSTEM => "system", - - _ => "user", - }, - - Content = n.Content switch - { - ContentText text => await text.PrepareTextContentForAI(), - _ => string.Empty, - } - }); + var messages = await chatThread.Blocks.BuildMessagesUsingDirectImageUrlAsync(this.Provider, chatModel); // Prepare the DeepSeek HTTP chat request: var deepSeekChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs b/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs index fa6b229f..5e056674 100644 --- a/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs +++ b/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs @@ -9,7 +9,7 @@ using AIStudio.Settings; namespace AIStudio.Provider.Fireworks; -public class ProviderFireworks() : BaseProvider("https://api.fireworks.ai/inference/v1/", LOGGER) +public class ProviderFireworks() : BaseProvider(LLMProviders.FIREWORKS, "https://api.fireworks.ai/inference/v1/", LOGGER) { private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); @@ -40,24 +40,7 @@ public class ProviderFireworks() : BaseProvider("https://api.fireworks.ai/infere var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessages(async n => new TextMessage - { - Role = n.Role switch - { - ChatRole.USER => "user", - ChatRole.AI => "assistant", - ChatRole.AGENT => "assistant", - ChatRole.SYSTEM => "system", - - _ => "user", - }, - - Content = n.Content switch - { - ContentText text => await text.PrepareTextContentForAI(), - _ => string.Empty, - } - }); + var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel); // Prepare the Fireworks HTTP chat request: var fireworksChatRequest = JsonSerializer.Serialize(new ChatRequest diff --git a/app/MindWork AI Studio/Provider/Fireworks/TextMessage.cs b/app/MindWork AI Studio/Provider/Fireworks/TextMessage.cs deleted file mode 100644 index 36340b0f..00000000 --- a/app/MindWork AI Studio/Provider/Fireworks/TextMessage.cs +++ /dev/null @@ -1,13 +0,0 @@ -namespace AIStudio.Provider.Fireworks; - -/// -/// Chat message model. -/// -/// The text content of the message. -/// The role of the message. -public record TextMessage(string Content, string Role) : IMessage -{ - public TextMessage() : this(string.Empty, string.Empty) - { - } -} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs b/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs index ae719b49..deecd6d5 100644 --- a/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs +++ b/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs @@ -9,7 +9,7 @@ using AIStudio.Settings; namespace AIStudio.Provider.GWDG; -public sealed class ProviderGWDG() : BaseProvider("https://chat-ai.academiccloud.de/v1/", LOGGER) +public sealed class ProviderGWDG() : BaseProvider(LLMProviders.GWDG, "https://chat-ai.academiccloud.de/v1/", LOGGER) { private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); @@ -40,24 +40,7 @@ public sealed class ProviderGWDG() : BaseProvider("https://chat-ai.academiccloud var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessages(async n => new TextMessage - { - Role = n.Role switch - { - ChatRole.USER => "user", - ChatRole.AI => "assistant", - ChatRole.AGENT => "assistant", - ChatRole.SYSTEM => "system", - - _ => "user", - }, - - Content = n.Content switch - { - ContentText text => await text.PrepareTextContentForAI(), - _ => string.Empty, - } - }); + var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel); // Prepare the GWDG HTTP chat request: var gwdgChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs b/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs index 93a9140f..a7cb6daa 100644 --- a/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs +++ b/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs @@ -9,7 +9,7 @@ using AIStudio.Settings; namespace AIStudio.Provider.Google; -public class ProviderGoogle() : BaseProvider("https://generativelanguage.googleapis.com/v1beta/", LOGGER) +public class ProviderGoogle() : BaseProvider(LLMProviders.GOOGLE, "https://generativelanguage.googleapis.com/v1beta/", LOGGER) { private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); @@ -40,24 +40,7 @@ public class ProviderGoogle() : BaseProvider("https://generativelanguage.googlea var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessages(async n => new TextMessage - { - Role = n.Role switch - { - ChatRole.USER => "user", - ChatRole.AI => "assistant", - ChatRole.AGENT => "assistant", - ChatRole.SYSTEM => "system", - - _ => "user", - }, - - Content = n.Content switch - { - ContentText text => await text.PrepareTextContentForAI(), - _ => string.Empty, - } - }); + var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel); // Prepare the Google HTTP chat request: var geminiChatRequest = JsonSerializer.Serialize(new ChatRequest diff --git a/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs b/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs index 16453fa9..60d449b0 100644 --- a/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs +++ b/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs @@ -9,7 +9,7 @@ using AIStudio.Settings; namespace AIStudio.Provider.Groq; -public class ProviderGroq() : BaseProvider("https://api.groq.com/openai/v1/", LOGGER) +public class ProviderGroq() : BaseProvider(LLMProviders.GROQ, "https://api.groq.com/openai/v1/", LOGGER) { private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); @@ -40,24 +40,7 @@ public class ProviderGroq() : BaseProvider("https://api.groq.com/openai/v1/", LO var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessages(async n => new TextMessage - { - Role = n.Role switch - { - ChatRole.USER => "user", - ChatRole.AI => "assistant", - ChatRole.AGENT => "assistant", - ChatRole.SYSTEM => "system", - - _ => "user", - }, - - Content = n.Content switch - { - ContentText text => await text.PrepareTextContentForAI(), - _ => string.Empty, - } - }); + var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel); // Prepare the OpenAI HTTP chat request: var groqChatRequest = JsonSerializer.Serialize(new ChatRequest diff --git a/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs b/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs index 775d0447..07263d39 100644 --- a/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs +++ b/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs @@ -9,7 +9,7 @@ using AIStudio.Settings; namespace AIStudio.Provider.Helmholtz; -public sealed class ProviderHelmholtz() : BaseProvider("https://api.helmholtz-blablador.fz-juelich.de/v1/", LOGGER) +public sealed class ProviderHelmholtz() : BaseProvider(LLMProviders.HELMHOLTZ, "https://api.helmholtz-blablador.fz-juelich.de/v1/", LOGGER) { private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); @@ -40,24 +40,7 @@ public sealed class ProviderHelmholtz() : BaseProvider("https://api.helmholtz-bl var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessages(async n => new TextMessage - { - Role = n.Role switch - { - ChatRole.USER => "user", - ChatRole.AI => "assistant", - ChatRole.AGENT => "assistant", - ChatRole.SYSTEM => "system", - - _ => "user", - }, - - Content = n.Content switch - { - ContentText text => await text.PrepareTextContentForAI(), - _ => string.Empty, - } - }); + var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel); // Prepare the Helmholtz HTTP chat request: var helmholtzChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs b/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs index 67773fcf..cfd2346c 100644 --- a/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs +++ b/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs @@ -13,9 +13,9 @@ public sealed class ProviderHuggingFace : BaseProvider { private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); - public ProviderHuggingFace(HFInferenceProvider hfProvider, Model model) : base($"https://router.huggingface.co/{hfProvider.Endpoints(model)}", LOGGER) + public ProviderHuggingFace(HFInferenceProvider hfProvider, Model model) : base(LLMProviders.HUGGINGFACE, $"https://router.huggingface.co/{hfProvider.Endpoints(model)}", LOGGER) { - LOGGER.LogInformation($"We use the inferende provider '{hfProvider}'. Thus we use the base URL 'https://router.huggingface.co/{hfProvider.Endpoints(model)}'."); + LOGGER.LogInformation($"We use the inference provider '{hfProvider}'. Thus we use the base URL 'https://router.huggingface.co/{hfProvider.Endpoints(model)}'."); } #region Implementation of IProvider @@ -45,24 +45,7 @@ public sealed class ProviderHuggingFace : BaseProvider var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var message = await chatThread.Blocks.BuildMessages(async n => new TextMessage - { - Role = n.Role switch - { - ChatRole.USER => "user", - ChatRole.AI => "assistant", - ChatRole.AGENT => "assistant", - ChatRole.SYSTEM => "system", - - _ => "user", - }, - - Content = n.Content switch - { - ContentText text => await text.PrepareTextContentForAI(), - _ => string.Empty, - } - }); + var message = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel); // Prepare the HuggingFace HTTP chat request: var huggingfaceChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/IProvider.cs b/app/MindWork AI Studio/Provider/IProvider.cs index e883fec8..41d9b37d 100644 --- a/app/MindWork AI Studio/Provider/IProvider.cs +++ b/app/MindWork AI Studio/Provider/IProvider.cs @@ -8,6 +8,11 @@ namespace AIStudio.Provider; ///
public interface IProvider { + /// + /// The provider type. + /// + public LLMProviders Provider { get; } + /// /// The provider's ID. /// diff --git a/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs b/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs index 43c18c89..522757ea 100644 --- a/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs +++ b/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs @@ -9,7 +9,7 @@ using AIStudio.Settings; namespace AIStudio.Provider.Mistral; -public sealed class ProviderMistral() : BaseProvider("https://api.mistral.ai/v1/", LOGGER) +public sealed class ProviderMistral() : BaseProvider(LLMProviders.MISTRAL, "https://api.mistral.ai/v1/", LOGGER) { private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); @@ -38,24 +38,7 @@ public sealed class ProviderMistral() : BaseProvider("https://api.mistral.ai/v1/ var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessages(async n => new TextMessage - { - Role = n.Role switch - { - ChatRole.USER => "user", - ChatRole.AI => "assistant", - ChatRole.AGENT => "assistant", - ChatRole.SYSTEM => "system", - - _ => "user", - }, - - Content = n.Content switch - { - ContentText text => await text.PrepareTextContentForAI(), - _ => string.Empty, - } - }); + var messages = await chatThread.Blocks.BuildMessagesUsingDirectImageUrlAsync(this.Provider, chatModel); // Prepare the Mistral HTTP chat request: var mistralChatRequest = JsonSerializer.Serialize(new ChatRequest diff --git a/app/MindWork AI Studio/Provider/Mistral/TextMessage.cs b/app/MindWork AI Studio/Provider/Mistral/TextMessage.cs deleted file mode 100644 index 9606a9d8..00000000 --- a/app/MindWork AI Studio/Provider/Mistral/TextMessage.cs +++ /dev/null @@ -1,13 +0,0 @@ -namespace AIStudio.Provider.Mistral; - -/// -/// Text chat message model. -/// -/// The text content of the message. -/// The role of the message. -public record TextMessage(string Content, string Role) : IMessage -{ - public TextMessage() : this(string.Empty, string.Empty) - { - } -} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/NoProvider.cs b/app/MindWork AI Studio/Provider/NoProvider.cs index b87820ca..73d75d61 100644 --- a/app/MindWork AI Studio/Provider/NoProvider.cs +++ b/app/MindWork AI Studio/Provider/NoProvider.cs @@ -9,6 +9,8 @@ public class NoProvider : IProvider { #region Implementation of IProvider + public LLMProviders Provider => LLMProviders.NONE; + public string Id => "none"; public string InstanceName { get; set; } = "None"; diff --git a/app/MindWork AI Studio/Provider/OpenAI/ISubContent.cs b/app/MindWork AI Studio/Provider/OpenAI/ISubContent.cs new file mode 100644 index 00000000..165d07b1 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ISubContent.cs @@ -0,0 +1,12 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Contract for sub-content in multimodal messages. +/// +public interface ISubContent +{ + /// + /// The type of the sub-content. + /// + public SubContentType Type { get; init; } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/ISubContentImageUrl.cs b/app/MindWork AI Studio/Provider/OpenAI/ISubContentImageUrl.cs new file mode 100644 index 00000000..b2f9b51d --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ISubContentImageUrl.cs @@ -0,0 +1,19 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Contract for nested image URL sub-content. +/// +/// +/// Some providers use a nested object format for image URLs: +/// +/// { "type": "image_url", "image_url": { "url": "data:image/jpeg;base64,..." } } +/// +/// This interface represents the inner object with the "url" property. +/// +public interface ISubContentImageUrl +{ + /// + /// The URL or base64-encoded data URI of the image. + /// + public string Url { get; init; } +} diff --git a/app/MindWork AI Studio/Provider/OpenAI/MultimodalMessage.cs b/app/MindWork AI Studio/Provider/OpenAI/MultimodalMessage.cs new file mode 100644 index 00000000..8b7ff8e0 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/MultimodalMessage.cs @@ -0,0 +1,13 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// A multimodal chat message model that can contain various types of content. +/// +/// The list of sub-contents in the message. +/// The role of the message. +public record MultimodalMessage(List Content, string Role) : IMessage> +{ + public MultimodalMessage() : this([], string.Empty) + { + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs b/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs index b0da092d..b5a11e60 100644 --- a/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs +++ b/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs @@ -11,7 +11,7 @@ namespace AIStudio.Provider.OpenAI; /// /// The OpenAI provider. /// -public sealed class ProviderOpenAI() : BaseProvider("https://api.openai.com/v1/", LOGGER) +public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https://api.openai.com/v1/", LOGGER) { private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); @@ -59,7 +59,7 @@ public sealed class ProviderOpenAI() : BaseProvider("https://api.openai.com/v1/" }; // Read the model capabilities: - var modelCapabilities = ProviderExtensions.GetModelCapabilitiesOpenAI(chatModel); + var modelCapabilities = this.Provider.GetModelCapabilities(chatModel); // Check if we are using the Responses API or the Chat Completion API: var usingResponsesAPI = modelCapabilities.Contains(Capability.RESPONSES_API); @@ -90,9 +90,11 @@ public sealed class ProviderOpenAI() : BaseProvider("https://api.openai.com/v1/" var apiParameters = this.ParseAdditionalApiParameters("input", "store", "tools"); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessages(async n => new TextMessage - { - Role = n.Role switch + var messages = await chatThread.Blocks.BuildMessagesAsync( + this.Provider, chatModel, + + // OpenAI-specific role mapping: + role => role switch { ChatRole.USER => "user", ChatRole.AI => "assistant", @@ -102,12 +104,46 @@ public sealed class ProviderOpenAI() : BaseProvider("https://api.openai.com/v1/" _ => "user", }, - Content = n.Content switch + // OpenAI's text sub-content depends on the model, whether we are using + // the Responses API or the Chat Completion API: + text => usingResponsesAPI switch { - ContentText text => await text.PrepareTextContentForAI(), - _ => string.Empty, - } - }); + // Responses API uses INPUT_TEXT: + true => new SubContentInputText + { + Text = text, + }, + + // Chat Completion API uses TEXT: + false => new SubContentText + { + Text = text, + }, + }, + + // OpenAI's image sub-content depends on the model as well, + // whether we are using the Responses API or the Chat Completion API: + async attachment => usingResponsesAPI switch + { + // Responses API uses INPUT_IMAGE: + true => new SubContentInputImage + { + ImageUrl = await attachment.TryAsBase64(token: token) is (true, var base64Content) + ? $"data:{attachment.DetermineMimeType()};base64,{base64Content}" + : string.Empty, + }, + + // Chat Completion API uses IMAGE_URL: + false => new SubContentImageUrlNested + { + ImageUrl = new SubContentImageUrlData + { + Url = await attachment.TryAsBase64(token: token) is (true, var base64Content) + ? $"data:{attachment.DetermineMimeType()};base64,{base64Content}" + : string.Empty, + }, + } + }); // // Create the request: either for the Responses API or the Chat Completion API @@ -149,7 +185,7 @@ public sealed class ProviderOpenAI() : BaseProvider("https://api.openai.com/v1/" }, JSON_SERIALIZER_OPTIONS), }; - + async Task RequestBuilder() { // Build the HTTP post request: diff --git a/app/MindWork AI Studio/Provider/OpenAI/SubContentImageUrl.cs b/app/MindWork AI Studio/Provider/OpenAI/SubContentImageUrl.cs new file mode 100644 index 00000000..fe190fb8 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/SubContentImageUrl.cs @@ -0,0 +1,11 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Image sub-content for multimodal messages. +/// +public record SubContentImageUrl(SubContentType Type, string ImageUrl) : ISubContent +{ + public SubContentImageUrl() : this(SubContentType.IMAGE_URL, string.Empty) + { + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/SubContentImageUrlData.cs b/app/MindWork AI Studio/Provider/OpenAI/SubContentImageUrlData.cs new file mode 100644 index 00000000..af0ffc9e --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/SubContentImageUrlData.cs @@ -0,0 +1,17 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Represents the inner object of a nested image URL sub-content. +/// +/// +/// This record is used when the provider expects the format: +/// +/// { "type": "image_url", "image_url": { "url": "data:image/jpeg;base64,..." } } +/// +/// This class represents the inner { "url": "..." } part. +/// +public record SubContentImageUrlData : ISubContentImageUrl +{ + /// + public string Url { get; init; } = string.Empty; +} diff --git a/app/MindWork AI Studio/Provider/OpenAI/SubContentImageUrlNested.cs b/app/MindWork AI Studio/Provider/OpenAI/SubContentImageUrlNested.cs new file mode 100644 index 00000000..297a73fe --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/SubContentImageUrlNested.cs @@ -0,0 +1,18 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Image sub-content for multimodal messages using nested URL format. +/// +/// +/// This record is used when the provider expects the format: +/// +/// { "type": "image_url", "image_url": { "url": "data:image/jpeg;base64,..." } } +/// +/// Used by LM Studio, VLLM, and other OpenAI-compatible providers. +/// +public record SubContentImageUrlNested(SubContentType Type, ISubContentImageUrl ImageUrl) : ISubContent +{ + public SubContentImageUrlNested() : this(SubContentType.IMAGE_URL, new SubContentImageUrlData()) + { + } +} diff --git a/app/MindWork AI Studio/Provider/OpenAI/SubContentInputImage.cs b/app/MindWork AI Studio/Provider/OpenAI/SubContentInputImage.cs new file mode 100644 index 00000000..21144952 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/SubContentInputImage.cs @@ -0,0 +1,14 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Image input sub-content for multimodal messages. +/// +/// +/// Right now, this is used only by OpenAI in its responses API. +/// +public record SubContentInputImage(SubContentType Type, string ImageUrl) : ISubContent +{ + public SubContentInputImage() : this(SubContentType.INPUT_IMAGE, string.Empty) + { + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/SubContentInputText.cs b/app/MindWork AI Studio/Provider/OpenAI/SubContentInputText.cs new file mode 100644 index 00000000..30fb51d8 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/SubContentInputText.cs @@ -0,0 +1,14 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Text input sub-content for multimodal messages. +/// +/// +/// Right now, this is used only by OpenAI in its responses API. +/// +public record SubContentInputText(SubContentType Type, string Text) : ISubContent +{ + public SubContentInputText() : this(SubContentType.INPUT_TEXT, string.Empty) + { + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/SubContentText.cs b/app/MindWork AI Studio/Provider/OpenAI/SubContentText.cs new file mode 100644 index 00000000..f94dd710 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/SubContentText.cs @@ -0,0 +1,11 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Text sub-content for multimodal messages. +/// +public record SubContentText(SubContentType Type, string Text) : ISubContent +{ + public SubContentText() : this(SubContentType.TEXT, string.Empty) + { + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/TextMessage.cs b/app/MindWork AI Studio/Provider/OpenAI/TextMessage.cs index ceaeb73d..0e75f87f 100644 --- a/app/MindWork AI Studio/Provider/OpenAI/TextMessage.cs +++ b/app/MindWork AI Studio/Provider/OpenAI/TextMessage.cs @@ -1,7 +1,7 @@ namespace AIStudio.Provider.OpenAI; /// -/// Chat message model. +/// Standard text-based chat message model. /// /// The text content of the message. /// The role of the message. diff --git a/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs b/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs index c0d6550b..9f2d3648 100644 --- a/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs +++ b/app/MindWork AI Studio/Provider/OpenRouter/ProviderOpenRouter.cs @@ -9,7 +9,7 @@ using AIStudio.Settings; namespace AIStudio.Provider.OpenRouter; -public sealed class ProviderOpenRouter() : BaseProvider("https://openrouter.ai/api/v1/", LOGGER) +public sealed class ProviderOpenRouter() : BaseProvider(LLMProviders.OPEN_ROUTER, "https://openrouter.ai/api/v1/", LOGGER) { private const string PROJECT_WEBSITE = "https://github.com/MindWorkAI/AI-Studio"; private const string PROJECT_NAME = "MindWork AI Studio"; @@ -43,24 +43,7 @@ public sealed class ProviderOpenRouter() : BaseProvider("https://openrouter.ai/a var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessages(async n => new TextMessage - { - Role = n.Role switch - { - ChatRole.USER => "user", - ChatRole.AI => "assistant", - ChatRole.AGENT => "assistant", - ChatRole.SYSTEM => "system", - - _ => "user", - }, - - Content = n.Content switch - { - ContentText text => await text.PrepareTextContentForAI(), - _ => string.Empty, - } - }); + var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel); // Prepare the OpenRouter HTTP chat request: var openRouterChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs b/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs index 51a156af..27101716 100644 --- a/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs +++ b/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs @@ -9,7 +9,7 @@ using AIStudio.Settings; namespace AIStudio.Provider.Perplexity; -public sealed class ProviderPerplexity() : BaseProvider("https://api.perplexity.ai/", LOGGER) +public sealed class ProviderPerplexity() : BaseProvider(LLMProviders.PERPLEXITY, "https://api.perplexity.ai/", LOGGER) { private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); @@ -49,24 +49,7 @@ public sealed class ProviderPerplexity() : BaseProvider("https://api.perplexity. var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessages(async n => new TextMessage() - { - Role = n.Role switch - { - ChatRole.USER => "user", - ChatRole.AI => "assistant", - ChatRole.AGENT => "assistant", - ChatRole.SYSTEM => "system", - - _ => "user", - }, - - Content = n.Content switch - { - ContentText text => await text.PrepareTextContentForAI(), - _ => string.Empty, - } - }); + var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel); // Prepare the Perplexity HTTP chat request: var perplexityChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs index 5b5bda37..70228589 100644 --- a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs +++ b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs @@ -9,7 +9,7 @@ using AIStudio.Settings; namespace AIStudio.Provider.SelfHosted; -public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvider($"{hostname}{host.BaseURL()}", LOGGER) +public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvider(LLMProviders.SELF_HOSTED, $"{hostname}{host.BaseURL()}", LOGGER) { private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); @@ -34,26 +34,15 @@ public sealed class ProviderSelfHosted(Host host, string hostname) : BaseProvide // Parse the API parameters: var apiParameters = this.ParseAdditionalApiParameters(); - - // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessages(async n => new TextMessage + + // Build the list of messages. The image format depends on the host: + // - Ollama uses the direct image URL format: { "type": "image_url", "image_url": "data:..." } + // - LM Studio, vLLM, and llama.cpp use the nested image URL format: { "type": "image_url", "image_url": { "url": "data:..." } } + var messages = host switch { - Role = n.Role switch - { - ChatRole.USER => "user", - ChatRole.AI => "assistant", - ChatRole.AGENT => "assistant", - ChatRole.SYSTEM => "system", - - _ => "user", - }, - - Content = n.Content switch - { - ContentText text => await text.PrepareTextContentForAI(), - _ => string.Empty, - } - }); + Host.OLLAMA => await chatThread.Blocks.BuildMessagesUsingDirectImageUrlAsync(this.Provider, chatModel), + _ => await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel), + }; // Prepare the OpenAI HTTP chat request: var providerChatRequest = JsonSerializer.Serialize(new ChatRequest diff --git a/app/MindWork AI Studio/Provider/SelfHosted/TextMessage.cs b/app/MindWork AI Studio/Provider/SelfHosted/TextMessage.cs deleted file mode 100644 index 1479257d..00000000 --- a/app/MindWork AI Studio/Provider/SelfHosted/TextMessage.cs +++ /dev/null @@ -1,13 +0,0 @@ -namespace AIStudio.Provider.SelfHosted; - -/// -/// Chat message model. -/// -/// The text content of the message. -/// The role of the message. -public record TextMessage(string Content, string Role) : IMessage -{ - public TextMessage() : this(string.Empty, string.Empty) - { - } -} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/SubContentConverter.cs b/app/MindWork AI Studio/Provider/SubContentConverter.cs new file mode 100644 index 00000000..cd257a3d --- /dev/null +++ b/app/MindWork AI Studio/Provider/SubContentConverter.cs @@ -0,0 +1,34 @@ +using System.Text.Json; +using System.Text.Json.Serialization; + +using AIStudio.Provider.OpenAI; + +namespace AIStudio.Provider; + +/// +/// Custom JSON converter for the ISubContent interface to handle polymorphic serialization. +/// +/// +/// This converter ensures that when serializing ISubContent objects, all properties +/// of the concrete implementation (e.g., SubContentText, SubContentImageUrl) are serialized, +/// not just the properties defined in the ISubContent interface. +/// +public sealed class SubContentConverter : JsonConverter +{ + private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); + + public override ISubContent? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + // Deserialization is not needed for request objects, as sub-content is only serialized + // when sending requests to LLM providers. + LOGGER.LogError("Deserializing ISubContent is not supported. This converter is only used for serializing request messages."); + return null; + } + + public override void Write(Utf8JsonWriter writer, ISubContent value, JsonSerializerOptions options) + { + // Serialize the actual concrete type (e.g., SubContentText, SubContentImageUrl) instead of just the ISubContent interface. + // This ensures all properties of the concrete type are included in the JSON output. + JsonSerializer.Serialize(writer, value, value.GetType(), options); + } +} diff --git a/app/MindWork AI Studio/Provider/SubContentImageUrlConverter.cs b/app/MindWork AI Studio/Provider/SubContentImageUrlConverter.cs new file mode 100644 index 00000000..d6df6878 --- /dev/null +++ b/app/MindWork AI Studio/Provider/SubContentImageUrlConverter.cs @@ -0,0 +1,34 @@ +using System.Text.Json; +using System.Text.Json.Serialization; + +using AIStudio.Provider.OpenAI; + +namespace AIStudio.Provider; + +/// +/// Custom JSON converter for the ISubContentImageUrl interface to handle polymorphic serialization. +/// +/// +/// This converter ensures that when serializing ISubContentImageUrl objects, all properties +/// of the concrete implementation (e.g., SubContentImageUrlData) are serialized, +/// not just the properties defined in the ISubContentImageUrl interface. +/// +public sealed class SubContentImageUrlConverter : JsonConverter +{ + private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); + + public override ISubContentImageUrl? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + // Deserialization is not needed for request objects, as sub-content image URLs are only serialized + // when sending requests to LLM providers. + LOGGER.LogError("Deserializing ISubContentImageUrl is not supported. This converter is only used for serializing request messages."); + return null; + } + + public override void Write(Utf8JsonWriter writer, ISubContentImageUrl value, JsonSerializerOptions options) + { + // Serialize the actual concrete type (e.g., SubContentImageUrlData) instead of just the ISubContentImageUrl interface. + // This ensures all properties of the concrete type are included in the JSON output. + JsonSerializer.Serialize(writer, value, value.GetType(), options); + } +} diff --git a/app/MindWork AI Studio/Provider/SubContentType.cs b/app/MindWork AI Studio/Provider/SubContentType.cs new file mode 100644 index 00000000..cca2d802 --- /dev/null +++ b/app/MindWork AI Studio/Provider/SubContentType.cs @@ -0,0 +1,39 @@ +namespace AIStudio.Provider; + +/// +/// Sub content types for OpenAI-compatible API interactions when using multimodal messages. +/// +public enum SubContentType +{ + /// + /// Default type for user prompts in multimodal messages. This type is supported across all providers. + /// + TEXT, + + /// + /// Right now only supported by OpenAI and it's responses API. Even other providers that support multimodal messages + /// and the responses API do not support this type. They use TEXT instead. + /// + INPUT_TEXT, + + /// + /// Right now only supported by OpenAI and it's responses API. Even other providers that support multimodal messages + /// and the responses API do not support this type. They use IMAGE_URL instead. + /// + INPUT_IMAGE, + + /// + /// Default type for images in multimodal messages. This type is supported across all providers. + /// + IMAGE_URL, + + /// + /// The image type is used exclusively by Anthropic's messages API. + /// + IMAGE, + + /// + /// Right now only supported by OpenAI (responses & chat completion API), Google (chat completions API), and Mistral (chat completions API). + /// + INPUT_AUDIO, +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/X/ProviderX.cs b/app/MindWork AI Studio/Provider/X/ProviderX.cs index 27764c7a..db3f3a29 100644 --- a/app/MindWork AI Studio/Provider/X/ProviderX.cs +++ b/app/MindWork AI Studio/Provider/X/ProviderX.cs @@ -9,7 +9,7 @@ using AIStudio.Settings; namespace AIStudio.Provider.X; -public sealed class ProviderX() : BaseProvider("https://api.x.ai/v1/", LOGGER) +public sealed class ProviderX() : BaseProvider(LLMProviders.X, "https://api.x.ai/v1/", LOGGER) { private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); @@ -40,24 +40,7 @@ public sealed class ProviderX() : BaseProvider("https://api.x.ai/v1/", LOGGER) var apiParameters = this.ParseAdditionalApiParameters(); // Build the list of messages: - var messages = await chatThread.Blocks.BuildMessages(async n => new TextMessage - { - Role = n.Role switch - { - ChatRole.USER => "user", - ChatRole.AI => "assistant", - ChatRole.AGENT => "assistant", - ChatRole.SYSTEM => "system", - - _ => "user", - }, - - Content = n.Content switch - { - ContentText text => await text.PrepareTextContentForAI(), - _ => string.Empty, - } - }); + var messages = await chatThread.Blocks.BuildMessagesUsingNestedImageUrlAsync(this.Provider, chatModel); // Prepare the xAI HTTP chat request: var xChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest diff --git a/app/MindWork AI Studio/Settings/DataModel/DataSourceERI_V1.cs b/app/MindWork AI Studio/Settings/DataModel/DataSourceERI_V1.cs index cc836bcf..cbc3839c 100644 --- a/app/MindWork AI Studio/Settings/DataModel/DataSourceERI_V1.cs +++ b/app/MindWork AI Studio/Settings/DataModel/DataSourceERI_V1.cs @@ -74,7 +74,9 @@ public readonly record struct DataSourceERI_V1 : IERIDataSource LatestUserPrompt = lastUserPrompt switch { ContentText text => text.Text, - ContentImage image => await image.AsBase64(token), + ContentImage image => await image.TryAsBase64(token) is (success: true, { } base64Image) + ? base64Image + : string.Empty, _ => string.Empty }, diff --git a/app/MindWork AI Studio/Settings/ProviderExtensions.Alibaba.cs b/app/MindWork AI Studio/Settings/ProviderExtensions.Alibaba.cs index c7e1a20e..c57aeeed 100644 --- a/app/MindWork AI Studio/Settings/ProviderExtensions.Alibaba.cs +++ b/app/MindWork AI Studio/Settings/ProviderExtensions.Alibaba.cs @@ -4,7 +4,7 @@ namespace AIStudio.Settings; public static partial class ProviderExtensions { - public static List GetModelCapabilitiesAlibaba(Model model) + private static List GetModelCapabilitiesAlibaba(Model model) { var modelName = model.Id.ToLowerInvariant().AsSpan(); diff --git a/app/MindWork AI Studio/Settings/ProviderExtensions.Anthropic.cs b/app/MindWork AI Studio/Settings/ProviderExtensions.Anthropic.cs index 3bf2f0b5..64bc8753 100644 --- a/app/MindWork AI Studio/Settings/ProviderExtensions.Anthropic.cs +++ b/app/MindWork AI Studio/Settings/ProviderExtensions.Anthropic.cs @@ -4,7 +4,7 @@ namespace AIStudio.Settings; public static partial class ProviderExtensions { - public static List GetModelCapabilitiesAnthropic(Model model) + private static List GetModelCapabilitiesAnthropic(Model model) { var modelName = model.Id.ToLowerInvariant().AsSpan(); diff --git a/app/MindWork AI Studio/Settings/ProviderExtensions.DeepSeek.cs b/app/MindWork AI Studio/Settings/ProviderExtensions.DeepSeek.cs index 0d3428e9..9089596b 100644 --- a/app/MindWork AI Studio/Settings/ProviderExtensions.DeepSeek.cs +++ b/app/MindWork AI Studio/Settings/ProviderExtensions.DeepSeek.cs @@ -4,7 +4,7 @@ namespace AIStudio.Settings; public static partial class ProviderExtensions { - public static List GetModelCapabilitiesDeepSeek(Model model) + private static List GetModelCapabilitiesDeepSeek(Model model) { var modelName = model.Id.ToLowerInvariant().AsSpan(); diff --git a/app/MindWork AI Studio/Settings/ProviderExtensions.Google.cs b/app/MindWork AI Studio/Settings/ProviderExtensions.Google.cs index 379370c1..1931bc8f 100644 --- a/app/MindWork AI Studio/Settings/ProviderExtensions.Google.cs +++ b/app/MindWork AI Studio/Settings/ProviderExtensions.Google.cs @@ -4,7 +4,7 @@ namespace AIStudio.Settings; public static partial class ProviderExtensions { - public static List GetModelCapabilitiesGoogle(Model model) + private static List GetModelCapabilitiesGoogle(Model model) { var modelName = model.Id.ToLowerInvariant().AsSpan(); diff --git a/app/MindWork AI Studio/Settings/ProviderExtensions.Mistral.cs b/app/MindWork AI Studio/Settings/ProviderExtensions.Mistral.cs index 545dada8..3d0150c9 100644 --- a/app/MindWork AI Studio/Settings/ProviderExtensions.Mistral.cs +++ b/app/MindWork AI Studio/Settings/ProviderExtensions.Mistral.cs @@ -4,7 +4,7 @@ namespace AIStudio.Settings; public static partial class ProviderExtensions { - public static List GetModelCapabilitiesMistral(Model model) + private static List GetModelCapabilitiesMistral(Model model) { var modelName = model.Id.ToLowerInvariant().AsSpan(); diff --git a/app/MindWork AI Studio/Settings/ProviderExtensions.OpenAI.cs b/app/MindWork AI Studio/Settings/ProviderExtensions.OpenAI.cs index 4f86f377..a65c1534 100644 --- a/app/MindWork AI Studio/Settings/ProviderExtensions.OpenAI.cs +++ b/app/MindWork AI Studio/Settings/ProviderExtensions.OpenAI.cs @@ -4,7 +4,7 @@ namespace AIStudio.Settings; public static partial class ProviderExtensions { - public static List GetModelCapabilitiesOpenAI(Model model) + private static List GetModelCapabilitiesOpenAI(Model model) { var modelName = model.Id.ToLowerInvariant().AsSpan(); diff --git a/app/MindWork AI Studio/Settings/ProviderExtensions.OpenRouter.cs b/app/MindWork AI Studio/Settings/ProviderExtensions.OpenRouter.cs index c1479819..7677cca8 100644 --- a/app/MindWork AI Studio/Settings/ProviderExtensions.OpenRouter.cs +++ b/app/MindWork AI Studio/Settings/ProviderExtensions.OpenRouter.cs @@ -4,7 +4,7 @@ namespace AIStudio.Settings; public static partial class ProviderExtensions { - public static List GetModelCapabilitiesOpenRouter(Model model) + private static List GetModelCapabilitiesOpenRouter(Model model) { var modelName = model.Id.ToLowerInvariant().AsSpan(); diff --git a/app/MindWork AI Studio/Settings/ProviderExtensions.OpenSource.cs b/app/MindWork AI Studio/Settings/ProviderExtensions.OpenSource.cs index afd47cfd..fefa87dc 100644 --- a/app/MindWork AI Studio/Settings/ProviderExtensions.OpenSource.cs +++ b/app/MindWork AI Studio/Settings/ProviderExtensions.OpenSource.cs @@ -4,7 +4,7 @@ namespace AIStudio.Settings; public static partial class ProviderExtensions { - public static List GetModelCapabilitiesOpenSource(Model model) + private static List GetModelCapabilitiesOpenSource(Model model) { var modelName = model.Id.ToLowerInvariant().AsSpan(); @@ -102,6 +102,15 @@ public static partial class ProviderExtensions Capability.CHAT_COMPLETION_API, ]; + if(modelName.IndexOf("-vl-") is not -1) + return [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + + Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, + ]; + return [ Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, Capability.CHAT_COMPLETION_API, @@ -159,7 +168,8 @@ public static partial class ProviderExtensions Capability.CHAT_COMPLETION_API, ]; - if (modelName.IndexOf("3.1") is not -1) + if (modelName.IndexOf("3.1") is not -1 || + modelName.IndexOf("3.2") is not -1) return [ Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, @@ -241,6 +251,43 @@ public static partial class ProviderExtensions ]; } + // + // Z AI / GLM models: + // + if (modelName.IndexOf("glm") is not -1) + { + if(modelName.IndexOf("v") is not -1) + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + + Capability.OPTIONAL_REASONING, + Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, + ]; + + if (modelName.IndexOf("glm-4-") is not -1) + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + + Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, + ]; + + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + + Capability.FUNCTION_CALLING, + Capability.OPTIONAL_REASONING, + Capability.CHAT_COMPLETION_API, + ]; + } + // Default: return [ Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, diff --git a/app/MindWork AI Studio/Settings/ProviderExtensions.Perplexity.cs b/app/MindWork AI Studio/Settings/ProviderExtensions.Perplexity.cs index da873009..d73ba8c5 100644 --- a/app/MindWork AI Studio/Settings/ProviderExtensions.Perplexity.cs +++ b/app/MindWork AI Studio/Settings/ProviderExtensions.Perplexity.cs @@ -4,7 +4,7 @@ namespace AIStudio.Settings; public static partial class ProviderExtensions { - public static List GetModelCapabilitiesPerplexity(Model model) + private static List GetModelCapabilitiesPerplexity(Model model) { var modelName = model.Id.ToLowerInvariant().AsSpan(); diff --git a/app/MindWork AI Studio/Settings/ProviderExtensions.cs b/app/MindWork AI Studio/Settings/ProviderExtensions.cs index 984386d1..8ce0ab2a 100644 --- a/app/MindWork AI Studio/Settings/ProviderExtensions.cs +++ b/app/MindWork AI Studio/Settings/ProviderExtensions.cs @@ -4,26 +4,39 @@ namespace AIStudio.Settings; public static partial class ProviderExtensions { - public static List GetModelCapabilities(this Provider provider) => provider.UsedLLMProvider switch + /// + /// Get the capabilities of the model used by the configured provider. + /// + /// The configured provider. + /// The capabilities of the configured model. + public static List GetModelCapabilities(this Provider provider) => provider.UsedLLMProvider.GetModelCapabilities(provider.Model); + + /// + /// Get the capabilities of a model for a specific provider. + /// + /// The LLM provider. + /// The model to get the capabilities for. + /// >The capabilities of the model. + public static List GetModelCapabilities(this LLMProviders provider, Model model) => provider switch { - LLMProviders.OPEN_AI => GetModelCapabilitiesOpenAI(provider.Model), - LLMProviders.MISTRAL => GetModelCapabilitiesMistral(provider.Model), - LLMProviders.ANTHROPIC => GetModelCapabilitiesAnthropic(provider.Model), - LLMProviders.GOOGLE => GetModelCapabilitiesGoogle(provider.Model), - LLMProviders.X => GetModelCapabilitiesOpenSource(provider.Model), - LLMProviders.DEEP_SEEK => GetModelCapabilitiesDeepSeek(provider.Model), - LLMProviders.ALIBABA_CLOUD => GetModelCapabilitiesAlibaba(provider.Model), - LLMProviders.PERPLEXITY => GetModelCapabilitiesPerplexity(provider.Model), - LLMProviders.OPEN_ROUTER => GetModelCapabilitiesOpenRouter(provider.Model), + LLMProviders.OPEN_AI => GetModelCapabilitiesOpenAI(model), + LLMProviders.MISTRAL => GetModelCapabilitiesMistral(model), + LLMProviders.ANTHROPIC => GetModelCapabilitiesAnthropic(model), + LLMProviders.GOOGLE => GetModelCapabilitiesGoogle(model), + LLMProviders.X => GetModelCapabilitiesOpenSource(model), + LLMProviders.DEEP_SEEK => GetModelCapabilitiesDeepSeek(model), + LLMProviders.ALIBABA_CLOUD => GetModelCapabilitiesAlibaba(model), + LLMProviders.PERPLEXITY => GetModelCapabilitiesPerplexity(model), + LLMProviders.OPEN_ROUTER => GetModelCapabilitiesOpenRouter(model), - LLMProviders.GROQ => GetModelCapabilitiesOpenSource(provider.Model), - LLMProviders.FIREWORKS => GetModelCapabilitiesOpenSource(provider.Model), - LLMProviders.HUGGINGFACE => GetModelCapabilitiesOpenSource(provider.Model), + LLMProviders.GROQ => GetModelCapabilitiesOpenSource(model), + LLMProviders.FIREWORKS => GetModelCapabilitiesOpenSource(model), + LLMProviders.HUGGINGFACE => GetModelCapabilitiesOpenSource(model), - LLMProviders.HELMHOLTZ => GetModelCapabilitiesOpenSource(provider.Model), - LLMProviders.GWDG => GetModelCapabilitiesOpenSource(provider.Model), + LLMProviders.HELMHOLTZ => GetModelCapabilitiesOpenSource(model), + LLMProviders.GWDG => GetModelCapabilitiesOpenSource(model), - LLMProviders.SELF_HOSTED => GetModelCapabilitiesOpenSource(provider.Model), + LLMProviders.SELF_HOSTED => GetModelCapabilitiesOpenSource(model), _ => [] }; diff --git a/app/MindWork AI Studio/Tools/RAG/IRetrievalContextExtensions.cs b/app/MindWork AI Studio/Tools/RAG/IRetrievalContextExtensions.cs index ee5e6cb5..74ff4e58 100644 --- a/app/MindWork AI Studio/Tools/RAG/IRetrievalContextExtensions.cs +++ b/app/MindWork AI Studio/Tools/RAG/IRetrievalContextExtensions.cs @@ -81,7 +81,9 @@ public static class IRetrievalContextExtensions sb.AppendLine(); sb.AppendLine("Matched image content as base64-encoded data:"); sb.AppendLine("````"); - sb.AppendLine(await imageContext.AsBase64(token)); + sb.AppendLine(await imageContext.TryAsBase64(token) is (success: true, { } base64Image) + ? base64Image + : string.Empty); sb.AppendLine("````"); break; diff --git a/app/MindWork AI Studio/Tools/Validation/FileExtensionValidation.cs b/app/MindWork AI Studio/Tools/Validation/FileExtensionValidation.cs index 741e596a..991dcbdf 100644 --- a/app/MindWork AI Studio/Tools/Validation/FileExtensionValidation.cs +++ b/app/MindWork AI Studio/Tools/Validation/FileExtensionValidation.cs @@ -1,3 +1,5 @@ +using AIStudio.Provider; +using AIStudio.Settings; using AIStudio.Tools.PluginSystem; using AIStudio.Tools.Rust; @@ -10,15 +12,38 @@ public static class FileExtensionValidation { private static string TB(string fallbackEN) => I18N.I.T(fallbackEN, typeof(FileExtensionValidation).Namespace, nameof(FileExtensionValidation)); + /// + /// Defines the use cases for file extension validation. + /// + public enum UseCase + { + /// + /// No specific use case; general validation. + /// + NONE, + + /// + /// Validating for directly loading content into the UI. In this state, there might be no provider selected yet. + /// + DIRECTLY_LOADING_CONTENT, + + /// + /// Validating for attaching content to a message or prompt. + /// + ATTACHING_CONTENT, + } + /// /// Validates the file extension and sends appropriate MessageBus notifications when invalid. /// + /// The validation use case. /// The file path to validate. + /// The selected provider. /// True if valid, false if invalid (error/warning already sent via MessageBus). - public static async Task IsExtensionValidWithNotifyAsync(string filePath) + public static async Task IsExtensionValidWithNotifyAsync(UseCase useCae, string filePath, Settings.Provider? provider = null) { - var ext = Path.GetExtension(filePath).TrimStart('.'); - if (Array.Exists(FileTypeFilter.Executables.FilterExtensions, x => x.Equals(ext, StringComparison.OrdinalIgnoreCase))) + var ext = Path.GetExtension(filePath).TrimStart('.').ToLowerInvariant(); + if(FileTypeFilter.Executables.FilterExtensions.Contains(ext)) { await MessageBus.INSTANCE.SendError(new( Icons.Material.Filled.AppBlocking, @@ -26,15 +51,39 @@ public static class FileExtensionValidation return false; } - if (Array.Exists(FileTypeFilter.AllImages.FilterExtensions, x => x.Equals(ext, StringComparison.OrdinalIgnoreCase))) + var capabilities = provider?.GetModelCapabilities() ?? new(); + if (FileTypeFilter.AllImages.FilterExtensions.Contains(ext)) { - await MessageBus.INSTANCE.SendWarning(new( - Icons.Material.Filled.ImageNotSupported, - TB("Images are not supported yet"))); - return false; + switch (useCae) + { + // In this use case, we cannot guarantee that a provider is selected yet: + case UseCase.DIRECTLY_LOADING_CONTENT: + await MessageBus.INSTANCE.SendWarning(new( + Icons.Material.Filled.ImageNotSupported, + TB("Images are not supported at this place"))); + return false; + + // In this use case, we can check the provider capabilities: + case UseCase.ATTACHING_CONTENT when capabilities.Contains(Capability.SINGLE_IMAGE_INPUT) || + capabilities.Contains(Capability.MULTIPLE_IMAGE_INPUT): + return true; + + // We know that images are not supported: + case UseCase.ATTACHING_CONTENT: + await MessageBus.INSTANCE.SendWarning(new( + Icons.Material.Filled.ImageNotSupported, + TB("Images are not supported by the selected provider and model"))); + return false; + + default: + await MessageBus.INSTANCE.SendWarning(new( + Icons.Material.Filled.ImageNotSupported, + TB("Images are not supported yet"))); + return false; + } } - if (Array.Exists(FileTypeFilter.AllVideos.FilterExtensions, x => x.Equals(ext, StringComparison.OrdinalIgnoreCase))) + if(FileTypeFilter.AllVideos.FilterExtensions.Contains(ext)) { await MessageBus.INSTANCE.SendWarning(new( Icons.Material.Filled.FeaturedVideo, @@ -42,7 +91,7 @@ public static class FileExtensionValidation return false; } - if (Array.Exists(FileTypeFilter.AllAudio.FilterExtensions, x => x.Equals(ext, StringComparison.OrdinalIgnoreCase))) + if(FileTypeFilter.AllAudio.FilterExtensions.Contains(ext)) { await MessageBus.INSTANCE.SendWarning(new( Icons.Material.Filled.AudioFile, diff --git a/app/MindWork AI Studio/wwwroot/changelog/v0.9.55.md b/app/MindWork AI Studio/wwwroot/changelog/v0.9.55.md index 7f29c973..9dfc2fe8 100644 --- a/app/MindWork AI Studio/wwwroot/changelog/v0.9.55.md +++ b/app/MindWork AI Studio/wwwroot/changelog/v0.9.55.md @@ -2,16 +2,18 @@ - Added support for newer Mistral models (Mistral 3, Voxtral, and Magistral). - Added support for the new OpenAI model GPT 5.2. - Added support for OpenRouter as LLM and embedding provider. +- Added support for multimodal processing (documents and images for now), when the selected LLM supports it. - Added a description field to local data sources (preview feature) so that the data selection agent has more information about which data each local source contains when selecting data sources. -- Added the ability to use file attachments in chat. This is the initial implementation of this feature. We will continue to develop this feature and refine it further based on user feedback. Many thanks to Sabrina `Sabrina-devops` for this wonderful contribution. +- Added the ability to use file attachments (including images) in chat. This is the initial implementation of this feature. We will continue to develop this feature and refine it further based on user feedback. Many thanks to Sabrina `Sabrina-devops` for this wonderful contribution. - Improved the document analysis assistant (in preview) by adding descriptions to the different sections. -- Improved the document preview dialog for the document analysis assistant (in preview), providing Markdown and plain text views for attached files. +- Improved the document analysis assistant (in preview) by allowing users to use images as input files in addition to documents. +- Improved the document preview dialog for the document analysis assistant (in preview), providing Markdown, image and plain text views for attached files. - Improved the Pandoc handling for the document analysis assistant (in preview) and file attachments in chat. When Pandoc is not installed and users attempt to attach files, users are now prompted to install Pandoc first. - Improved the ID handling for configuration plugins. - Improved error handling, logging, and code quality. -- Improved error handling for Microsoft Word export. -- Improved file reading, e.g. for the translation, summarization, and legal assistants, by performing the Pandoc validation in the first step. This prevents unnecessary selection of files that cannot be processed. -- Improved the file selection for file attachments in chat and assistant file loading by filtering out audio files. Audio attachments are not yet supported. +- Improved error handling for the Microsoft Word export. +- Improved the file reading, e.g. for the translation, summarization, and legal assistants, by performing the Pandoc validation in the first step. This prevents unnecessary selection of files that cannot be processed. +- Improved the file selection for file attachments in chat and the assistant file loading by filtering out audio files. Audio attachments are not yet supported. - Improved the developer experience by automating localization updates in the filesystem for the selected language in the localization assistant. - Improved the file selection so that users can now select multiple files at the same time. This is useful, for example, for document analysis (in preview) or adding file attachments to the chat. - Fixed a bug in the local data sources info dialog (preview feature) for data directories that could cause the app to crash. The error was caused by a background thread producing data while the frontend attempted to display it.