From d6af5a3afdcac77fdfacd81f5927d5be4d24dcb8 Mon Sep 17 00:00:00 2001 From: Thorsten Sommer Date: Sun, 11 May 2025 12:51:35 +0200 Subject: [PATCH] Add model capabilities (#450) --- .../AlibabaCloud/ProviderAlibabaCloud.cs | 71 +++++++++ .../Provider/Anthropic/ProviderAnthropic.cs | 27 ++++ .../Provider/BaseProvider.cs | 3 + .../Provider/CapabilitiesOpenSource.cs | 150 ++++++++++++++++++ app/MindWork AI Studio/Provider/Capability.cs | 97 +++++++++++ .../Provider/DeepSeek/ProviderDeepSeek.cs | 21 +++ .../Provider/Fireworks/ProviderFireworks.cs | 2 + .../Provider/GWDG/ProviderGWDG.cs | 2 + .../Provider/Google/ProviderGoogle.cs | 82 ++++++++++ .../Provider/Groq/ProviderGroq.cs | 2 + .../Provider/Helmholtz/ProviderHelmholtz.cs | 2 + .../HuggingFace/ProviderHuggingFace.cs | 2 + app/MindWork AI Studio/Provider/IProvider.cs | 7 + .../Provider/Mistral/ProviderMistral.cs | 48 +++++- app/MindWork AI Studio/Provider/NoProvider.cs | 2 + .../Provider/OpenAI/ProviderOpenAI.cs | 65 +++++++- .../Provider/SelfHosted/ProviderSelfHosted.cs | 7 +- .../Provider/X/ProviderX.cs | 2 + 18 files changed, 587 insertions(+), 5 deletions(-) create mode 100644 app/MindWork AI Studio/Provider/CapabilitiesOpenSource.cs create mode 100644 app/MindWork AI Studio/Provider/Capability.cs diff --git a/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs b/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs index dfd807df..fb38cc4f 100644 --- a/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs +++ b/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs @@ -139,7 +139,78 @@ public sealed class ProviderAlibabaCloud(ILogger logger) : BaseProvider("https:/ return this.LoadModels(["text-embedding-"], token, apiKeyProvisional).ContinueWith(t => t.Result.Concat(additionalModels).OrderBy(x => x.Id).AsEnumerable(), token); } + + /// + public override IReadOnlyCollection GetModelCapabilities(Model model) + { + var modelName = model.Id.ToLowerInvariant().AsSpan(); + + // Qwen models: + if (modelName.StartsWith("qwen")) + { + // Check for omni models: + if (modelName.IndexOf("omni") is not -1) + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.AUDIO_INPUT, Capability.SPEECH_INPUT, + Capability.VIDEO_INPUT, + Capability.TEXT_OUTPUT, Capability.SPEECH_OUTPUT + ]; + + // Check for Qwen 3: + if(modelName.StartsWith("qwen3")) + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + + Capability.OPTIONAL_REASONING, Capability.FUNCTION_CALLING + ]; + + if(modelName.IndexOf("-vl-") is not -1) + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + ]; + } + + // QwQ models: + if (modelName.StartsWith("qwq")) + { + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + + Capability.ALWAYS_REASONING, Capability.FUNCTION_CALLING + ]; + } + + // QVQ models: + if (modelName.StartsWith("qvq")) + { + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + + Capability.ALWAYS_REASONING + ]; + } + + // Default to text input and output: + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + + Capability.FUNCTION_CALLING + ]; + } + #endregion private async Task> LoadModels(string[] prefixes, CancellationToken token, string? apiKeyProvisional = null) diff --git a/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs b/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs index 7693d21f..75c424d4 100644 --- a/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs +++ b/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs @@ -110,6 +110,33 @@ public sealed class ProviderAnthropic(ILogger logger) : BaseProvider("https://ap return Task.FromResult(Enumerable.Empty()); } + public override IReadOnlyCollection GetModelCapabilities(Model model) + { + var modelName = model.Id.ToLowerInvariant().AsSpan(); + + // Claude 3.7 is able to do reasoning: + if(modelName.StartsWith("claude-3-7")) + return [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + + Capability.OPTIONAL_REASONING, Capability.FUNCTION_CALLING]; + + // All other 3.x models are able to process text and images as input: + if(modelName.StartsWith("claude-3-")) + return [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + + Capability.FUNCTION_CALLING]; + + // Any other model is able to process text only: + return [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + Capability.FUNCTION_CALLING]; + } + #endregion private async Task> LoadModels(CancellationToken token, string? apiKeyProvisional = null) diff --git a/app/MindWork AI Studio/Provider/BaseProvider.cs b/app/MindWork AI Studio/Provider/BaseProvider.cs index d24ca38d..32c0e621 100644 --- a/app/MindWork AI Studio/Provider/BaseProvider.cs +++ b/app/MindWork AI Studio/Provider/BaseProvider.cs @@ -76,6 +76,9 @@ public abstract class BaseProvider : IProvider, ISecretId /// public abstract Task> GetEmbeddingModels(string? apiKeyProvisional = null, CancellationToken token = default); + + /// + public abstract IReadOnlyCollection GetModelCapabilities(Model model); #endregion diff --git a/app/MindWork AI Studio/Provider/CapabilitiesOpenSource.cs b/app/MindWork AI Studio/Provider/CapabilitiesOpenSource.cs new file mode 100644 index 00000000..806b1d5b --- /dev/null +++ b/app/MindWork AI Studio/Provider/CapabilitiesOpenSource.cs @@ -0,0 +1,150 @@ +namespace AIStudio.Provider; + +public static class CapabilitiesOpenSource +{ + public static IReadOnlyCollection GetCapabilities(Model model) + { + var modelName = model.Id.ToLowerInvariant().AsSpan(); + + // + // Checking for names in the case of open source models is a hard task. + // Let's assume we want to check for the llama 3.1 405b model. + // + // Here is a not complete list of how providers name this model: + // - Fireworks: accounts/fireworks/models/llama-v3p1-405b-instruct + // - Hugging Face -> Nebius AI Studio: meta-llama/Meta-Llama-3.1-405B-Instruct + // - Groq: llama-3.1-405b-instruct + // - LM Studio: llama-3.1-405b-instruct + // - Helmholtz Blablador: 1 - Llama3 405 the best general model + // - GWDG: Llama 3.1 405B Instruct + // + + // + // Meta llama models: + // + if (modelName.IndexOf("llama") is not -1) + { + if (modelName.IndexOf("llama4") is not -1 || + modelName.IndexOf("llama 4") is not -1 || + modelName.IndexOf("llama-4") is not -1 || + modelName.IndexOf("llama-v4") is not -1) + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + + Capability.FUNCTION_CALLING, + ]; + + // The old vision models cannot do function calling: + if (modelName.IndexOf("vision") is not -1) + return [Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, Capability.TEXT_OUTPUT]; + + // + // All models >= 3.1 are able to do function calling: + // + if (modelName.IndexOf("llama3.") is not -1 || + modelName.IndexOf("llama 3.") is not -1 || + modelName.IndexOf("llama-3.") is not -1 || + modelName.IndexOf("llama-v3p") is not -1) + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + + Capability.FUNCTION_CALLING, + ]; + + // All other llama models can only do text input and output: + return [Capability.TEXT_INPUT, Capability.TEXT_OUTPUT]; + } + + // + // DeepSeek models: + // + if (modelName.IndexOf("deepseek") is not -1) + { + if(modelName.IndexOf("deepseek-r1") is not -1 || + modelName.IndexOf("deepseek r1") is not -1) + return [Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, Capability.ALWAYS_REASONING]; + + return [Capability.TEXT_INPUT, Capability.TEXT_OUTPUT]; + } + + // + // Qwen models: + // + if (modelName.IndexOf("qwen") is not -1 || modelName.IndexOf("qwq") is not -1) + { + if (modelName.IndexOf("qwq") is not -1) + return [Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, Capability.ALWAYS_REASONING]; + + return [Capability.TEXT_INPUT, Capability.TEXT_OUTPUT]; + } + + // + // Mistral models: + // + if (modelName.IndexOf("mistral") is not -1 || + modelName.IndexOf("pixtral") is not -1) + { + if(modelName.IndexOf("pixtral") is not -1) + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + Capability.FUNCTION_CALLING + ]; + + if (modelName.IndexOf("3.1") is not -1) + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + Capability.FUNCTION_CALLING + ]; + + // Default: + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + Capability.FUNCTION_CALLING + ]; + } + + // + // Grok models: + // + if (modelName.IndexOf("grok") is not -1) + { + if(modelName.IndexOf("-vision-") is not -1) + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + ]; + + if(modelName.StartsWith("grok-3-mini")) + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + + Capability.ALWAYS_REASONING, Capability.FUNCTION_CALLING, + ]; + + if(modelName.StartsWith("grok-3")) + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + + Capability.FUNCTION_CALLING, + ]; + } + + // Default: + return [Capability.TEXT_INPUT, Capability.TEXT_OUTPUT]; + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/Capability.cs b/app/MindWork AI Studio/Provider/Capability.cs new file mode 100644 index 00000000..047ec67b --- /dev/null +++ b/app/MindWork AI Studio/Provider/Capability.cs @@ -0,0 +1,97 @@ +namespace AIStudio.Provider; + +/// +/// Represents the capabilities of an AI model. +/// +public enum Capability +{ + /// + /// No capabilities specified. + /// + NONE, + + /// + /// We don't know what the AI model can do. + /// + UNKNOWN, + + /// + /// The AI model can perform text input. + /// + TEXT_INPUT, + + /// + /// The AI model can perform audio input, such as music or sound. + /// + AUDIO_INPUT, + + /// + /// The AI model can perform one image input, such as one photo or drawing. + /// + SINGLE_IMAGE_INPUT, + + /// + /// The AI model can perform multiple images as input, such as multiple photos or drawings. + /// + MULTIPLE_IMAGE_INPUT, + + /// + /// The AI model can perform speech input. + /// + SPEECH_INPUT, + + /// + /// The AI model can perform video input, such as video files or streams. + /// + VIDEO_INPUT, + + /// + /// The AI model can generate text output. + /// + TEXT_OUTPUT, + + /// + /// The AI model can generate audio output, such as music or sound. + /// + AUDIO_OUTPUT, + + /// + /// The AI model can generate image output, such as photos or drawings. + /// + IMAGE_OUTPUT, + + /// + /// The AI model can generate speech output. + /// + SPEECH_OUTPUT, + + /// + /// The AI model can generate video output. + /// + VIDEO_OUTPUT, + + /// + /// The AI model can perform reasoning tasks. + /// + OPTIONAL_REASONING, + + /// + /// The AI model always performs reasoning. + /// + ALWAYS_REASONING, + + /// + /// The AI model can embed information or data. + /// + EMBEDDING, + + /// + /// The AI model can perform in real-time. + /// + REALTIME, + + /// + /// The AI model can perform function calling, such as invoking APIs or executing functions. + /// + FUNCTION_CALLING, +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs b/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs index a906ebad..57f74f4c 100644 --- a/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs +++ b/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs @@ -105,6 +105,27 @@ public sealed class ProviderDeepSeek(ILogger logger) : BaseProvider("https://api { return Task.FromResult(Enumerable.Empty()); } + + public override IReadOnlyCollection GetModelCapabilities(Model model) + { + var modelName = model.Id.ToLowerInvariant().AsSpan(); + + if(modelName.IndexOf("reasoner") is not -1) + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + + Capability.ALWAYS_REASONING, + ]; + + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + ]; + } + #endregion diff --git a/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs b/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs index 66817fc2..22164e18 100644 --- a/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs +++ b/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs @@ -106,6 +106,8 @@ public class ProviderFireworks(ILogger logger) : BaseProvider("https://api.firew { return Task.FromResult(Enumerable.Empty()); } + + public override IReadOnlyCollection GetModelCapabilities(Model model) => CapabilitiesOpenSource.GetCapabilities(model); #endregion } \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs b/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs index c0562a69..ad41804d 100644 --- a/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs +++ b/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs @@ -107,6 +107,8 @@ public sealed class ProviderGWDG(ILogger logger) : BaseProvider("https://chat-ai var models = await this.LoadModels(token, apiKeyProvisional); return models.Where(model => model.Id.StartsWith("e5-", StringComparison.InvariantCultureIgnoreCase)); } + + public override IReadOnlyCollection GetModelCapabilities(Model model) => CapabilitiesOpenSource.GetCapabilities(model); #endregion diff --git a/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs b/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs index d4a7dc65..aa46a071 100644 --- a/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs +++ b/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs @@ -118,7 +118,89 @@ public class ProviderGoogle(ILogger logger) : BaseProvider("https://generativela model.Name.StartsWith("models/text-embedding-", StringComparison.InvariantCultureIgnoreCase)) .Select(n => new Provider.Model(n.Name.Replace("models/", string.Empty), n.DisplayName)); } + + public override IReadOnlyCollection GetModelCapabilities(Provider.Model model) + { + var modelName = model.Id.ToLowerInvariant().AsSpan(); + if (modelName.IndexOf("gemini-") is not -1) + { + // Reasoning models: + if (modelName.IndexOf("gemini-2.5") is not -1) + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, Capability.AUDIO_INPUT, + Capability.SPEECH_INPUT, Capability.VIDEO_INPUT, + + Capability.TEXT_OUTPUT, + + Capability.ALWAYS_REASONING, Capability.FUNCTION_CALLING, + ]; + + // Image generation: + if(modelName.IndexOf("-2.0-flash-preview-image-") is not -1) + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, Capability.AUDIO_INPUT, + Capability.SPEECH_INPUT, Capability.VIDEO_INPUT, + + Capability.TEXT_OUTPUT, Capability.IMAGE_OUTPUT, + ]; + + // Realtime model: + if(modelName.IndexOf("-2.0-flash-live-") is not -1) + return + [ + Capability.TEXT_INPUT, Capability.AUDIO_INPUT, Capability.SPEECH_INPUT, + Capability.VIDEO_INPUT, + + Capability.TEXT_OUTPUT, Capability.SPEECH_OUTPUT, + + Capability.FUNCTION_CALLING, + ]; + + // The 2.0 flash models cannot call functions: + if(modelName.IndexOf("-2.0-flash-") is not -1) + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, Capability.AUDIO_INPUT, + Capability.SPEECH_INPUT, Capability.VIDEO_INPUT, + + Capability.TEXT_OUTPUT, + ]; + + // The old 1.0 pro vision model: + if(modelName.IndexOf("pro-vision") is not -1) + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + + Capability.TEXT_OUTPUT, + ]; + + // Default to all other Gemini models: + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, Capability.AUDIO_INPUT, + Capability.SPEECH_INPUT, Capability.VIDEO_INPUT, + + Capability.TEXT_OUTPUT, + + Capability.FUNCTION_CALLING, + ]; + } + + // Default for all other models: + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + + Capability.TEXT_OUTPUT, + + Capability.FUNCTION_CALLING, + ]; + } + #endregion private async Task LoadModels(CancellationToken token, string? apiKeyProvisional = null) diff --git a/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs b/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs index ddf5c002..30d81ed0 100644 --- a/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs +++ b/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs @@ -109,6 +109,8 @@ public class ProviderGroq(ILogger logger) : BaseProvider("https://api.groq.com/o { return Task.FromResult(Enumerable.Empty()); } + + public override IReadOnlyCollection GetModelCapabilities(Model model) => CapabilitiesOpenSource.GetCapabilities(model); #endregion diff --git a/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs b/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs index b8450503..09a95387 100644 --- a/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs +++ b/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs @@ -111,6 +111,8 @@ public sealed class ProviderHelmholtz(ILogger logger) : BaseProvider("https://ap model.Id.StartsWith("text-", StringComparison.InvariantCultureIgnoreCase) || model.Id.Contains("gritlm", StringComparison.InvariantCultureIgnoreCase)); } + + public override IReadOnlyCollection GetModelCapabilities(Model model) => CapabilitiesOpenSource.GetCapabilities(model); #endregion diff --git a/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs b/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs index e98de1f9..659a8ca9 100644 --- a/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs +++ b/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs @@ -110,6 +110,8 @@ public sealed class ProviderHuggingFace : BaseProvider { return Task.FromResult(Enumerable.Empty()); } + + public override IReadOnlyCollection GetModelCapabilities(Model model) => CapabilitiesOpenSource.GetCapabilities(model); #endregion } \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/IProvider.cs b/app/MindWork AI Studio/Provider/IProvider.cs index 2256dff5..86a60913 100644 --- a/app/MindWork AI Studio/Provider/IProvider.cs +++ b/app/MindWork AI Studio/Provider/IProvider.cs @@ -63,4 +63,11 @@ public interface IProvider /// The cancellation token. /// The list of embedding models. public Task> GetEmbeddingModels(string? apiKeyProvisional = null, CancellationToken token = default); + + /// + /// Get the capabilities of a model. + /// + /// The model to get the capabilities for. + /// The capabilities of the model. + public IReadOnlyCollection GetModelCapabilities(Model model); } \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs b/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs index 1039ab45..ed87d12f 100644 --- a/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs +++ b/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs @@ -121,7 +121,53 @@ public sealed class ProviderMistral(ILogger logger) : BaseProvider("https://api. { return Task.FromResult(Enumerable.Empty()); } - + + public override IReadOnlyCollection GetModelCapabilities(Provider.Model model) + { + var modelName = model.Id.ToLowerInvariant().AsSpan(); + + // Pixtral models are able to do process images: + if (modelName.IndexOf("pixtral") is not -1) + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + + Capability.FUNCTION_CALLING, + ]; + + // Mistral medium: + if (modelName.IndexOf("mistral-medium-") is not -1) + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + + Capability.FUNCTION_CALLING, + ]; + + // Mistral small: + if (modelName.IndexOf("mistral-small-") is not -1) + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + + Capability.FUNCTION_CALLING, + ]; + + // Mistral saba: + if (modelName.IndexOf("mistral-saba-") is not -1) + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + ]; + + // Default: + return CapabilitiesOpenSource.GetCapabilities(model); + } + #endregion private async Task LoadModelList(string? apiKeyProvisional, CancellationToken token) diff --git a/app/MindWork AI Studio/Provider/NoProvider.cs b/app/MindWork AI Studio/Provider/NoProvider.cs index ce3fe31f..983ab875 100644 --- a/app/MindWork AI Studio/Provider/NoProvider.cs +++ b/app/MindWork AI Studio/Provider/NoProvider.cs @@ -31,5 +31,7 @@ public class NoProvider : IProvider yield break; } + public IReadOnlyCollection GetModelCapabilities(Model model) => [ Capability.NONE ]; + #endregion } \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs b/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs index 7a586bf6..b5f8f818 100644 --- a/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs +++ b/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs @@ -142,7 +142,70 @@ public sealed class ProviderOpenAI(ILogger logger) : BaseProvider("https://api.o { return this.LoadModels(["text-embedding-"], token, apiKeyProvisional); } - + + public override IReadOnlyCollection GetModelCapabilities(Model model) + { + var modelName = model.Id.ToLowerInvariant().AsSpan(); + + if (modelName.StartsWith("o1-mini")) + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + + Capability.ALWAYS_REASONING, + ]; + + if (modelName.StartsWith("o3-mini")) + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + + Capability.ALWAYS_REASONING, Capability.FUNCTION_CALLING + ]; + + if (modelName.StartsWith("o4-mini") || modelName.StartsWith("o1") || modelName.StartsWith("o3")) + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + + Capability.ALWAYS_REASONING, Capability.FUNCTION_CALLING + ]; + + if(modelName.StartsWith("gpt-3.5")) + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + ]; + + if(modelName.StartsWith("gpt-4-turbo")) + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + + Capability.FUNCTION_CALLING + ]; + + if(modelName is "gpt-4" || modelName.StartsWith("gpt-4-")) + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + ]; + + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + + Capability.FUNCTION_CALLING, + ]; + } + #endregion private async Task> LoadModels(string[] prefixes, CancellationToken token, string? apiKeyProvisional = null) diff --git a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs index 4ba45c6b..a2e997f9 100644 --- a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs +++ b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs @@ -87,8 +87,7 @@ public sealed class ProviderSelfHosted(ILogger logger, Host host, string hostnam yield break; } #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously - - + public override async Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default) { try @@ -139,7 +138,9 @@ public sealed class ProviderSelfHosted(ILogger logger, Host host, string hostnam return []; } } - + + public override IReadOnlyCollection GetModelCapabilities(Provider.Model model) => CapabilitiesOpenSource.GetCapabilities(model); + #endregion private async Task> LoadModels(string[] ignorePhrases, string[] filterPhrases, CancellationToken token, string? apiKeyProvisional = null) diff --git a/app/MindWork AI Studio/Provider/X/ProviderX.cs b/app/MindWork AI Studio/Provider/X/ProviderX.cs index 0292a501..884c1007 100644 --- a/app/MindWork AI Studio/Provider/X/ProviderX.cs +++ b/app/MindWork AI Studio/Provider/X/ProviderX.cs @@ -110,6 +110,8 @@ public sealed class ProviderX(ILogger logger) : BaseProvider("https://api.x.ai/v { return Task.FromResult>([]); } + + public override IReadOnlyCollection GetModelCapabilities(Model model) => CapabilitiesOpenSource.GetCapabilities(model); #endregion