From 4881b1a095a44a2d4c6d7f8aed3aab3e004ec779 Mon Sep 17 00:00:00 2001 From: Thorsten Sommer Date: Sun, 28 Dec 2025 20:09:33 +0100 Subject: [PATCH] Add multimodal message support with content type enumeration and sub-content models --- .../Provider/OpenAI/ContentType.cs | 34 +++++++++++++++++++ .../Provider/OpenAI/ISubContent.cs | 12 +++++++ .../Provider/OpenAI/MultimodalMessage.cs | 13 +++++++ .../Provider/OpenAI/SubContentImageUrl.cs | 11 ++++++ .../Provider/OpenAI/SubContentInputImage.cs | 14 ++++++++ .../Provider/OpenAI/SubContentInputText.cs | 14 ++++++++ .../Provider/OpenAI/SubContentText.cs | 11 ++++++ 7 files changed, 109 insertions(+) create mode 100644 app/MindWork AI Studio/Provider/OpenAI/ContentType.cs create mode 100644 app/MindWork AI Studio/Provider/OpenAI/ISubContent.cs create mode 100644 app/MindWork AI Studio/Provider/OpenAI/MultimodalMessage.cs create mode 100644 app/MindWork AI Studio/Provider/OpenAI/SubContentImageUrl.cs create mode 100644 app/MindWork AI Studio/Provider/OpenAI/SubContentInputImage.cs create mode 100644 app/MindWork AI Studio/Provider/OpenAI/SubContentInputText.cs create mode 100644 app/MindWork AI Studio/Provider/OpenAI/SubContentText.cs diff --git a/app/MindWork AI Studio/Provider/OpenAI/ContentType.cs b/app/MindWork AI Studio/Provider/OpenAI/ContentType.cs new file mode 100644 index 00000000..beba8cf8 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ContentType.cs @@ -0,0 +1,34 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Content types for OpenAI API interactions when using multimodal messages. +/// +public enum ContentType +{ + /// + /// Default type for user prompts in multimodal messages. This type is supported across all providers. + /// + TEXT, + + /// + /// Right now only supported by OpenAI and it's responses API. Even other providers that support multimodal messages + /// and the responses API do not support this type. They use TEXT instead. + /// + INPUT_TEXT, + + /// + /// Right now only supported by OpenAI and it's responses API. Even other providers that support multimodal messages + /// and the responses API do not support this type. They use IMAGE_URL instead. + /// + INPUT_IMAGE, + + /// + /// Right now only supported by OpenAI (responses & chat completion API), Google (chat completions API), and Mistral (chat completions API). + /// + INPUT_AUDIO, + + /// + /// Default type for images in multimodal messages. This type is supported across all providers. + /// + IMAGE_URL, +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/ISubContent.cs b/app/MindWork AI Studio/Provider/OpenAI/ISubContent.cs new file mode 100644 index 00000000..dd375b5a --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ISubContent.cs @@ -0,0 +1,12 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Contract for sub-content in multimodal messages. +/// +public interface ISubContent +{ + /// + /// The type of the sub-content. + /// + public ContentType Type { get; init; } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/MultimodalMessage.cs b/app/MindWork AI Studio/Provider/OpenAI/MultimodalMessage.cs new file mode 100644 index 00000000..8b7ff8e0 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/MultimodalMessage.cs @@ -0,0 +1,13 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// A multimodal chat message model that can contain various types of content. +/// +/// The list of sub-contents in the message. +/// The role of the message. +public record MultimodalMessage(List Content, string Role) : IMessage> +{ + public MultimodalMessage() : this([], string.Empty) + { + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/SubContentImageUrl.cs b/app/MindWork AI Studio/Provider/OpenAI/SubContentImageUrl.cs new file mode 100644 index 00000000..87860e03 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/SubContentImageUrl.cs @@ -0,0 +1,11 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Image sub-content for multimodal messages. +/// +public record SubContentImageUrl(ContentType Type, string ImageUrl) : ISubContent +{ + public SubContentImageUrl() : this(ContentType.IMAGE_URL, string.Empty) + { + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/SubContentInputImage.cs b/app/MindWork AI Studio/Provider/OpenAI/SubContentInputImage.cs new file mode 100644 index 00000000..4d631f38 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/SubContentInputImage.cs @@ -0,0 +1,14 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Image input sub-content for multimodal messages. +/// +/// +/// Right now, this is used only by OpenAI in its responses API. +/// +public record SubContentInputImage(ContentType Type, string ImageUrl) : ISubContent +{ + public SubContentInputImage() : this(ContentType.INPUT_IMAGE, string.Empty) + { + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/SubContentInputText.cs b/app/MindWork AI Studio/Provider/OpenAI/SubContentInputText.cs new file mode 100644 index 00000000..167d2dd3 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/SubContentInputText.cs @@ -0,0 +1,14 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Text input sub-content for multimodal messages. +/// +/// +/// Right now, this is used only by OpenAI in its responses API. +/// +public record SubContentInputText(ContentType Type, string Text) : ISubContent +{ + public SubContentInputText() : this(ContentType.INPUT_TEXT, string.Empty) + { + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/SubContentText.cs b/app/MindWork AI Studio/Provider/OpenAI/SubContentText.cs new file mode 100644 index 00000000..57e0f9ad --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/SubContentText.cs @@ -0,0 +1,11 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Text sub-content for multimodal messages. +/// +public record SubContentText(ContentType Type, string Text) : ISubContent +{ + public SubContentText() : this(ContentType.TEXT, string.Empty) + { + } +} \ No newline at end of file