From d380c3131a6df899ec6eb276f0b4c02cfc995e9d Mon Sep 17 00:00:00 2001 From: Nils Kruthoff Date: Tue, 19 May 2026 22:26:20 +0200 Subject: [PATCH] added response api and set as default next to the chat completion api --- .../Provider/BaseProvider.cs | 12 +- .../Provider/OpenAI/ProviderOpenAI.cs | 197 +++++++++++++++++- .../Provider/OpenAI/ProviderToolAdapters.cs | 35 ++++ .../Provider/OpenAI/ResponsesAPIRequest.cs | 8 +- .../OpenAI/ResponsesFunctionCallItem.cs | 15 ++ .../OpenAI/ResponsesFunctionCallOutputItem.cs | 13 ++ .../Provider/OpenAI/ResponsesFunctionTool.cs | 19 ++ .../Provider/OpenAI/ResponsesResponse.cs | 62 ++++++ .../Tools/ToolCallingSystem/ToolRegistry.cs | 3 +- .../wwwroot/changelog/v26.3.1.md | 1 + documentation/Tools.md | 34 +++ 11 files changed, 374 insertions(+), 25 deletions(-) create mode 100644 app/MindWork AI Studio/Provider/OpenAI/ProviderToolAdapters.cs create mode 100644 app/MindWork AI Studio/Provider/OpenAI/ResponsesFunctionCallItem.cs create mode 100644 app/MindWork AI Studio/Provider/OpenAI/ResponsesFunctionCallOutputItem.cs create mode 100644 app/MindWork AI Studio/Provider/OpenAI/ResponsesFunctionTool.cs create mode 100644 app/MindWork AI Studio/Provider/OpenAI/ResponsesResponse.cs diff --git a/app/MindWork AI Studio/Provider/BaseProvider.cs b/app/MindWork AI Studio/Provider/BaseProvider.cs index 7b44eb9c..08821c3b 100644 --- a/app/MindWork AI Studio/Provider/BaseProvider.cs +++ b/app/MindWork AI Studio/Provider/BaseProvider.cs @@ -634,17 +634,7 @@ public abstract class BaseProvider : IProvider, ISecretId if (runnableTools.Count > 0) { - var providerTools = runnableTools.Select(x => (object)new - { - type = "function", - function = new - { - name = x.Definition.Function.Name, - description = x.Definition.Function.Description, - parameters = x.Definition.Function.Parameters, - strict = x.Definition.Function.Strict, - } - }).ToList(); + var providerTools = runnableTools.Select(x => ProviderToolAdapters.ToChatCompletionTool(x.Definition)).ToList(); var internalMessages = new List(); var toolCallCount = 0; diff --git a/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs b/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs index 3d7d280e..db48525c 100644 --- a/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs +++ b/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs @@ -5,7 +5,12 @@ using System.Text.Json; using AIStudio.Chat; using AIStudio.Settings; +using AIStudio.Tools.PluginSystem; +using AIStudio.Tools.Rust; using AIStudio.Tools.ToolCallingSystem; +using AIStudio.Tools.Services; + +using Microsoft.Extensions.DependencyInjection; namespace AIStudio.Provider.OpenAI; @@ -15,6 +20,7 @@ namespace AIStudio.Provider.OpenAI; public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https://api.openai.com/v1/", LOGGER) { private static readonly ILogger LOGGER = Program.LOGGER_FACTORY.CreateLogger(); + private static string TB(string fallbackEN) => I18N.I.T(fallbackEN, typeof(ProviderOpenAI).Namespace, nameof(ProviderOpenAI)); #region Implementation of IProvider @@ -64,12 +70,6 @@ public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https // Check if we are using the Responses API or the Chat Completion API: var usingResponsesAPI = modelCapabilities.Contains(Capability.RESPONSES_API); - var useChatCompletionsForTools = - chatThread.RuntimeSelectedToolIds.Count > 0 && - modelCapabilities.Contains(Capability.CHAT_COMPLETION_API) && - modelCapabilities.Contains(Capability.FUNCTION_CALLING); - if (useChatCompletionsForTools) - usingResponsesAPI = false; // Prepare the request path based on the API we are using: var requestPath = usingResponsesAPI ? "responses" : "chat/completions"; @@ -82,7 +82,7 @@ public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https var providerConfidence = this.Provider.GetConfidence(settingsManager).Level; var minimumWebSearchConfidence = settingsManager.GetMinimumProviderConfidenceForTool(ToolSelectionRules.WEB_SEARCH_TOOL_ID); var isWebSearchAllowed = ToolSelectionRules.IsProviderConfidenceAllowed(providerConfidence, minimumWebSearchConfidence); - IList providerTools = modelCapabilities.Contains(Capability.WEB_SEARCH) && isWebSearchAllowed + IList providerTools = modelCapabilities.Contains(Capability.WEB_SEARCH) && isWebSearchAllowed ? [ ProviderTools.WEB_SEARCH ] : []; @@ -166,6 +166,43 @@ public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https ? $"data:{attachment.DetermineMimeType()};base64,{base64Content}" : string.Empty, }); + + var baseInput = new List { systemPrompt }; + baseInput.AddRange(messages.Cast()); + + var toolRegistry = Program.SERVICE_PROVIDER.GetService(); + var toolExecutor = Program.SERVICE_PROVIDER.GetService(); + var currentAssistantContent = chatThread.Blocks.LastOrDefault(x => x.Role is ChatRole.AI)?.Content as ContentText; + currentAssistantContent?.ToolInvocations.Clear(); + + IReadOnlyList<(ToolDefinition Definition, IToolImplementation Implementation)> runnableTools = toolRegistry is null + ? [] + : await toolRegistry.GetRunnableToolsAsync( + chatThread.RuntimeComponent, + chatThread.RuntimeSelectedToolIds, + modelCapabilities, + providerConfidence, + settingsManager.IsToolSelectionVisible(chatThread.RuntimeComponent)); + + if (usingResponsesAPI && toolExecutor is not null && runnableTools.Count > 0) + { + await foreach (var content in this.StreamResponsesWithLocalTools( + chatModel, + baseInput, + apiParameters, + runnableTools, + toolExecutor, + currentAssistantContent, + requestedSecret, + providerConfidence, + token)) + yield return content; + + yield break; + } + + if (runnableTools.Count > 0) + providerTools = []; // // Create the request: either for the Responses API or the Chat Completion API @@ -191,7 +228,7 @@ public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https Model = chatModel.Id, // All messages go into the input field: - Input = [systemPrompt, ..messages], + Input = baseInput, // Right now, we only support streaming completions: Stream = true, @@ -200,7 +237,7 @@ public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https Store = false, // Tools we want to use: - ProviderTools = providerTools, + Tools = providerTools, // Additional API parameters: AdditionalApiParameters = apiParameters @@ -230,6 +267,148 @@ public sealed class ProviderOpenAI() : BaseProvider(LLMProviders.OPEN_AI, "https yield return content; } + private async IAsyncEnumerable StreamResponsesWithLocalTools( + Model chatModel, + IList baseInput, + IDictionary apiParameters, + IReadOnlyList<(ToolDefinition Definition, IToolImplementation Implementation)> runnableTools, + ToolExecutor toolExecutor, + ContentText? currentAssistantContent, + RequestedSecret requestedSecret, + ConfidenceLevel providerConfidence, + [EnumeratorCancellation] CancellationToken token) + { + var providerTools = runnableTools + .Select(x => (object)ProviderToolAdapters.ToResponsesTool(x.Definition)) + .ToList(); + var internalItems = new List(); + var toolCallCount = 0; + + while (true) + { + var requestDto = new ResponsesAPIRequest + { + Model = chatModel.Id, + Input = [..baseInput, ..internalItems], + Stream = false, + Store = false, + Tools = providerTools, + AdditionalApiParameters = apiParameters, + }; + var response = await this.ExecuteResponsesRequest(requestDto, requestedSecret, token); + if (response is null) + { + if (currentAssistantContent is not null) + { + currentAssistantContent.ToolRuntimeStatus = new(); + await currentAssistantContent.StreamingEvent(); + } + + yield break; + } + + var functionCalls = response.GetFunctionCalls(); + if (functionCalls.Count == 0) + { + if (currentAssistantContent is not null) + { + currentAssistantContent.ToolRuntimeStatus = new(); + await currentAssistantContent.StreamingEvent(); + } + + var textOutput = response.GetTextOutput(); + if (!string.IsNullOrWhiteSpace(textOutput)) + yield return new ContentStreamChunk(textOutput, []); + else if (toolCallCount > 0) + yield return new ContentStreamChunk("The model completed the tool call but did not return a final answer.", []); + + yield break; + } + + if (currentAssistantContent is not null) + { + currentAssistantContent.ToolRuntimeStatus = new ToolRuntimeStatus + { + IsRunning = true, + ToolNames = functionCalls + .Select(x => runnableTools.FirstOrDefault(tool => tool.Definition.Function.Name.Equals(x.Name, StringComparison.Ordinal)).Implementation?.GetDisplayName() ?? x.Name) + .ToList(), + }; + await currentAssistantContent.StreamingEvent(); + } + + foreach (var outputItem in response.Output) + internalItems.Add(outputItem); + + foreach (var functionCall in functionCalls) + { + toolCallCount++; + if (toolCallCount > 10) + { + var limitMessage = "Tool calling stopped because the maximum of 10 tool calls was reached."; + currentAssistantContent?.ToolInvocations.Add(new ToolInvocationTrace + { + Order = toolCallCount, + ToolId = functionCall.Name, + ToolName = functionCall.Name, + ToolCallId = functionCall.CallId, + Status = ToolInvocationTraceStatus.BLOCKED, + StatusMessage = limitMessage, + Result = limitMessage, + }); + + if (currentAssistantContent is not null) + { + currentAssistantContent.ToolRuntimeStatus = new(); + await currentAssistantContent.StreamingEvent(); + } + + yield return new ContentStreamChunk(limitMessage, []); + yield break; + } + + var (toolContent, trace) = await toolExecutor.ExecuteAsync( + functionCall.CallId, + functionCall.Name, + functionCall.Arguments, + runnableTools, + providerConfidence, + toolCallCount, + token); + + currentAssistantContent?.ToolInvocations.Add(trace); + internalItems.Add(new ResponsesFunctionCallOutputItem + { + CallId = functionCall.CallId, + Output = toolContent, + }); + } + + if (currentAssistantContent is not null) + await currentAssistantContent.StreamingEvent(); + } + } + + private async Task ExecuteResponsesRequest(ResponsesAPIRequest requestDto, RequestedSecret requestedSecret, CancellationToken token) + { + using var request = new HttpRequestMessage(HttpMethod.Post, "responses"); + request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION)); + request.Content = new StringContent(JsonSerializer.Serialize(requestDto, JSON_SERIALIZER_OPTIONS), Encoding.UTF8, "application/json"); + + using var response = await this.httpClient.SendAsync(request, token); + if (!response.IsSuccessStatusCode) + { + var responseBody = await response.Content.ReadAsStringAsync(token); + LOGGER.LogError("Tool calling Responses API request failed with status code {ResponseStatusCode} and body: '{ResponseBody}'.", response.StatusCode, responseBody); + await MessageBus.INSTANCE.SendError(new( + Icons.Material.Filled.Build, + string.Format(TB("The tool calling request failed with status code {0}. See the logs for details."), (int)response.StatusCode))); + return null; + } + + return await response.Content.ReadFromJsonAsync(JSON_SERIALIZER_OPTIONS, token); + } + #pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously /// diff --git a/app/MindWork AI Studio/Provider/OpenAI/ProviderToolAdapters.cs b/app/MindWork AI Studio/Provider/OpenAI/ProviderToolAdapters.cs new file mode 100644 index 00000000..5c426ae1 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ProviderToolAdapters.cs @@ -0,0 +1,35 @@ +using AIStudio.Tools.ToolCallingSystem; + +namespace AIStudio.Provider.OpenAI; + +/// +/// Converts the canonical AI Studio tool definition into provider-specific wire shapes. +/// +public static class ProviderToolAdapters +{ + /// + /// Builds the nested function tool shape used by Chat Completions compatible APIs. + /// + public static object ToChatCompletionTool(ToolDefinition definition) => new + { + type = "function", + function = new + { + name = definition.Function.Name, + description = definition.Function.Description, + parameters = definition.Function.Parameters, + strict = definition.Function.Strict, + } + }; + + /// + /// Builds the flat function tool shape used by the OpenAI Responses API. + /// + public static ResponsesFunctionTool ToResponsesTool(ToolDefinition definition) => new() + { + Name = definition.Function.Name, + Description = definition.Function.Description, + Parameters = definition.Function.Parameters, + Strict = definition.Function.Strict, + }; +} diff --git a/app/MindWork AI Studio/Provider/OpenAI/ResponsesAPIRequest.cs b/app/MindWork AI Studio/Provider/OpenAI/ResponsesAPIRequest.cs index 739ad7ad..148edc79 100644 --- a/app/MindWork AI Studio/Provider/OpenAI/ResponsesAPIRequest.cs +++ b/app/MindWork AI Studio/Provider/OpenAI/ResponsesAPIRequest.cs @@ -6,16 +6,16 @@ namespace AIStudio.Provider.OpenAI; /// The request body for the Responses API. /// /// Which model to use. -/// The chat messages. +/// The chat messages and Responses API input items. /// Whether to stream the response. /// Whether to store the response on the server (usually OpenAI's infrastructure). -/// The provider-side tools to use for the request. +/// The provider-side tools and local function tools to use for the request. public record ResponsesAPIRequest( string Model, - IList Input, + IList Input, bool Stream, bool Store, - [property: JsonPropertyName("tools")] IList ProviderTools) + IList Tools) { public ResponsesAPIRequest() : this(string.Empty, [], true, false, []) { diff --git a/app/MindWork AI Studio/Provider/OpenAI/ResponsesFunctionCallItem.cs b/app/MindWork AI Studio/Provider/OpenAI/ResponsesFunctionCallItem.cs new file mode 100644 index 00000000..25114a76 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ResponsesFunctionCallItem.cs @@ -0,0 +1,15 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// A function call item returned by the OpenAI Responses API. +/// +public sealed record ResponsesFunctionCallItem +{ + public string Type { get; init; } = string.Empty; + + public string CallId { get; init; } = string.Empty; + + public string Name { get; init; } = string.Empty; + + public string Arguments { get; init; } = string.Empty; +} diff --git a/app/MindWork AI Studio/Provider/OpenAI/ResponsesFunctionCallOutputItem.cs b/app/MindWork AI Studio/Provider/OpenAI/ResponsesFunctionCallOutputItem.cs new file mode 100644 index 00000000..19e9bedb --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ResponsesFunctionCallOutputItem.cs @@ -0,0 +1,13 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// A local function result item sent back to the OpenAI Responses API. +/// +public sealed record ResponsesFunctionCallOutputItem +{ + public string Type { get; init; } = "function_call_output"; + + public string CallId { get; init; } = string.Empty; + + public string Output { get; init; } = string.Empty; +} diff --git a/app/MindWork AI Studio/Provider/OpenAI/ResponsesFunctionTool.cs b/app/MindWork AI Studio/Provider/OpenAI/ResponsesFunctionTool.cs new file mode 100644 index 00000000..fe9f5dc0 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ResponsesFunctionTool.cs @@ -0,0 +1,19 @@ +using System.Text.Json; + +namespace AIStudio.Provider.OpenAI; + +/// +/// The flat function tool definition shape expected by the OpenAI Responses API. +/// +public sealed record ResponsesFunctionTool +{ + public string Type { get; init; } = "function"; + + public string Name { get; init; } = string.Empty; + + public string Description { get; init; } = string.Empty; + + public JsonElement Parameters { get; init; } + + public bool Strict { get; init; } +} diff --git a/app/MindWork AI Studio/Provider/OpenAI/ResponsesResponse.cs b/app/MindWork AI Studio/Provider/OpenAI/ResponsesResponse.cs new file mode 100644 index 00000000..69682e22 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ResponsesResponse.cs @@ -0,0 +1,62 @@ +using System.Text.Json; + +namespace AIStudio.Provider.OpenAI; + +/// +/// Non-streaming OpenAI Responses API result used during local tool execution. +/// +public sealed record ResponsesResponse +{ + public string Id { get; init; } = string.Empty; + + public string Model { get; init; } = string.Empty; + + public string? OutputText { get; init; } + + public IList Output { get; init; } = []; + + public IReadOnlyList GetFunctionCalls() => this.Output + .Where(x => ReadString(x, "type").Equals("function_call", StringComparison.Ordinal)) + .Select(x => new ResponsesFunctionCallItem + { + Type = ReadString(x, "type"), + CallId = ReadString(x, "call_id"), + Name = ReadString(x, "name"), + Arguments = ReadString(x, "arguments"), + }) + .Where(x => !string.IsNullOrWhiteSpace(x.CallId) && !string.IsNullOrWhiteSpace(x.Name)) + .ToList(); + + public string GetTextOutput() + { + if (!string.IsNullOrWhiteSpace(this.OutputText)) + return this.OutputText; + + return string.Concat(this.Output + .Where(x => ReadString(x, "type").Equals("message", StringComparison.Ordinal)) + .SelectMany(ReadContentItems) + .Where(x => ReadString(x, "type").Equals("output_text", StringComparison.Ordinal)) + .Select(x => ReadString(x, "text"))); + } + + private static IEnumerable ReadContentItems(JsonElement outputItem) + { + if (outputItem.ValueKind is not JsonValueKind.Object || + !outputItem.TryGetProperty("content", out var content) || + content.ValueKind is not JsonValueKind.Array) + yield break; + + foreach (var contentItem in content.EnumerateArray()) + yield return contentItem; + } + + private static string ReadString(JsonElement item, string propertyName) + { + if (item.ValueKind is not JsonValueKind.Object || + !item.TryGetProperty(propertyName, out var property) || + property.ValueKind is not JsonValueKind.String) + return string.Empty; + + return property.GetString() ?? string.Empty; + } +} diff --git a/app/MindWork AI Studio/Tools/ToolCallingSystem/ToolRegistry.cs b/app/MindWork AI Studio/Tools/ToolCallingSystem/ToolRegistry.cs index 9b95162f..7c77ed1e 100644 --- a/app/MindWork AI Studio/Tools/ToolCallingSystem/ToolRegistry.cs +++ b/app/MindWork AI Studio/Tools/ToolCallingSystem/ToolRegistry.cs @@ -122,7 +122,8 @@ public sealed class ToolRegistry if (!isToolSelectionVisible) return []; - if (!modelCapabilities.Contains(Capability.CHAT_COMPLETION_API) || !modelCapabilities.Contains(Capability.FUNCTION_CALLING)) + if (!modelCapabilities.Contains(Capability.FUNCTION_CALLING) || + (!modelCapabilities.Contains(Capability.CHAT_COMPLETION_API) && !modelCapabilities.Contains(Capability.RESPONSES_API))) return []; var selectedToolIdSet = ToolSelectionRules.NormalizeSelection(selectedToolIds); diff --git a/app/MindWork AI Studio/wwwroot/changelog/v26.3.1.md b/app/MindWork AI Studio/wwwroot/changelog/v26.3.1.md index 26bb30b0..a434a0f9 100644 --- a/app/MindWork AI Studio/wwwroot/changelog/v26.3.1.md +++ b/app/MindWork AI Studio/wwwroot/changelog/v26.3.1.md @@ -28,6 +28,7 @@ - Fixed an issue where assistants hidden via configuration plugins still appear in "Send to ..." menus. Thanks, Gunnar, for reporting this issue. - Fixed an issue with voice recording where AI Studio could log errors and keep the feature available even though required parts failed to initialize. Voice recording is now disabled automatically for the current session in that case. - Fixed an issue where the app could turn white or appear invisible in certain chats after HTML-like content was shown. Thanks, Inga, for reporting this issue and providing some context on how to reproduce it. +- Fixed an issue where tools could not be used with OpenAI models that use the Responses API. - Fixed security issues in the native app runtime by strengthening how AI Studio creates and protects the secret values used for its internal secure connection. - Updated several security-sensitive Rust dependencies in the native runtime to address known vulnerabilities. - Updated .NET to v9.0.14 diff --git a/documentation/Tools.md b/documentation/Tools.md index 24269dfd..c589ce59 100644 --- a/documentation/Tools.md +++ b/documentation/Tools.md @@ -15,6 +15,40 @@ At startup, `ToolRegistry` reads all JSON definitions and matches each definitio The provider only sees tools that are available for the current component, selected by the user or defaults, supported by the model, configured correctly, and allowed by the provider confidence rules. +## Provider API Shapes + +The JSON definition in `wwwroot/tool_definitions` is the single source of truth for a tool. Do not create separate tool definition files for different provider APIs. Provider-specific request shapes are generated in code from the same `ToolDefinition`. + +Chat Completions compatible APIs use a nested function shape: + +```json +{ + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location.", + "parameters": {}, + "strict": true + } +} +``` + +The OpenAI Responses API uses a flat function shape: + +```json +{ + "type": "function", + "name": "get_current_weather", + "description": "Get the current weather in a given location.", + "parameters": {}, + "strict": true +} +``` + +Keep this difference contained in provider adapter code. `ProviderToolAdapters` maps a canonical `ToolDefinition` to the Chat Completions or Responses wire shape. Tool implementations should not know which provider API shape was used. + +Tool result handling also differs by API. Chat Completions returns tool calls in `message.tool_calls` and receives results as `role: "tool"` messages. Responses returns `function_call` output items and receives results as `function_call_output` input items correlated by `call_id`. Both paths still execute local tools through `ToolExecutor`, so validation, provider confidence checks, trace formatting, and blocked-call behavior stay shared. + ## Definition File Create one JSON file per tool under `wwwroot/tool_definitions`. The file describes the user-visible tool metadata, optional settings, and the function schema sent to the model.