diff --git a/app/MindWork AI Studio/Agents/AgentBase.cs b/app/MindWork AI Studio/Agents/AgentBase.cs index 2b9239d1..4636908c 100644 --- a/app/MindWork AI Studio/Agents/AgentBase.cs +++ b/app/MindWork AI Studio/Agents/AgentBase.cs @@ -73,7 +73,6 @@ public abstract class AgentBase(ILogger logger, SettingsManager setti WorkspaceId = Guid.Empty, ChatId = Guid.NewGuid(), Name = string.Empty, - Seed = this.RNG.Next(), SystemPrompt = systemPrompt, Blocks = [], }; diff --git a/app/MindWork AI Studio/Assistants/AssistantBase.razor.cs b/app/MindWork AI Studio/Assistants/AssistantBase.razor.cs index 12ad2757..c0d86219 100644 --- a/app/MindWork AI Studio/Assistants/AssistantBase.razor.cs +++ b/app/MindWork AI Studio/Assistants/AssistantBase.razor.cs @@ -20,9 +20,6 @@ public abstract partial class AssistantBase : AssistantLowerBase wher [Inject] protected IJSRuntime JsRuntime { get; init; } = null!; - - [Inject] - protected ThreadSafeRandom RNG { get; init; } = null!; [Inject] protected ISnackbar Snackbar { get; init; } = null!; @@ -199,7 +196,6 @@ public abstract partial class AssistantBase : AssistantLowerBase wher WorkspaceId = Guid.Empty, ChatId = Guid.NewGuid(), Name = string.Format(this.TB("Assistant - {0}"), this.Title), - Seed = this.RNG.Next(), Blocks = [], }; } @@ -215,7 +211,6 @@ public abstract partial class AssistantBase : AssistantLowerBase wher WorkspaceId = workspaceId, ChatId = chatId, Name = name, - Seed = this.RNG.Next(), Blocks = [], }; diff --git a/app/MindWork AI Studio/Chat/ChatThread.cs b/app/MindWork AI Studio/Chat/ChatThread.cs index 0193ce28..48ac119a 100644 --- a/app/MindWork AI Studio/Chat/ChatThread.cs +++ b/app/MindWork AI Studio/Chat/ChatThread.cs @@ -59,11 +59,6 @@ public sealed record ChatThread /// The name of the chat thread. Usually generated by an AI model or manually edited by the user. /// public string Name { get; set; } = string.Empty; - - /// - /// The seed for the chat thread. Some providers use this to generate deterministic results. - /// - public int Seed { get; init; } /// /// The current system prompt for the chat thread. diff --git a/app/MindWork AI Studio/Components/ChatComponent.razor.cs b/app/MindWork AI Studio/Components/ChatComponent.razor.cs index e75e0ca9..849127e4 100644 --- a/app/MindWork AI Studio/Components/ChatComponent.razor.cs +++ b/app/MindWork AI Studio/Components/ChatComponent.razor.cs @@ -34,9 +34,6 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable [Inject] private ILogger Logger { get; set; } = null!; - [Inject] - private ThreadSafeRandom RNG { get; init; } = null!; - [Inject] private IDialogService DialogService { get; init; } = null!; @@ -436,7 +433,6 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable ChatId = Guid.NewGuid(), DataSourceOptions = this.earlyDataSourceOptions, Name = this.ExtractThreadName(this.userInput), - Seed = this.RNG.Next(), Blocks = this.currentChatTemplate == ChatTemplate.NO_CHAT_TEMPLATE ? [] : this.currentChatTemplate.ExampleConversation.Select(x => x.DeepClone()).ToList(), }; @@ -674,7 +670,6 @@ public partial class ChatComponent : MSGComponentBase, IAsyncDisposable WorkspaceId = this.currentWorkspaceId, ChatId = Guid.NewGuid(), Name = string.Empty, - Seed = this.RNG.Next(), Blocks = this.currentChatTemplate == ChatTemplate.NO_CHAT_TEMPLATE ? [] : this.currentChatTemplate.ExampleConversation.Select(x => x.DeepClone()).ToList(), }; } diff --git a/app/MindWork AI Studio/Components/Workspaces.razor.cs b/app/MindWork AI Studio/Components/Workspaces.razor.cs index 7fc51877..f3564e65 100644 --- a/app/MindWork AI Studio/Components/Workspaces.razor.cs +++ b/app/MindWork AI Studio/Components/Workspaces.razor.cs @@ -16,9 +16,6 @@ public partial class Workspaces : MSGComponentBase [Inject] private IDialogService DialogService { get; init; } = null!; - [Inject] - private ThreadSafeRandom RNG { get; init; } = null!; - [Inject] private ILogger Logger { get; init; } = null!; @@ -576,7 +573,6 @@ public partial class Workspaces : MSGComponentBase WorkspaceId = workspaceId, ChatId = Guid.NewGuid(), Name = string.Empty, - Seed = this.RNG.Next(), SystemPrompt = SystemPrompts.DEFAULT, Blocks = [], }; diff --git a/app/MindWork AI Studio/Pages/Writer.razor.cs b/app/MindWork AI Studio/Pages/Writer.razor.cs index 8bd80016..f9f65db4 100644 --- a/app/MindWork AI Studio/Pages/Writer.razor.cs +++ b/app/MindWork AI Studio/Pages/Writer.razor.cs @@ -77,7 +77,6 @@ public partial class Writer : MSGComponentBase WorkspaceId = Guid.Empty, ChatId = Guid.NewGuid(), Name = string.Empty, - Seed = 798798, SystemPrompt = """ You are an assistant who helps with writing documents. You receive a sample from a document as input. As output, you provide how the begun sentence could diff --git a/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs b/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs index 22d79441..c04cb0cf 100644 --- a/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs +++ b/app/MindWork AI Studio/Provider/AlibabaCloud/ProviderAlibabaCloud.cs @@ -36,7 +36,7 @@ public sealed class ProviderAlibabaCloud(ILogger logger) : BaseProvider("https:/ }; // Prepare the AlibabaCloud HTTP chat request: - var alibabaCloudChatRequest = JsonSerializer.Serialize(new ChatRequest + var alibabaCloudChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest { Model = chatModel.Id, @@ -77,7 +77,7 @@ public sealed class ProviderAlibabaCloud(ILogger logger) : BaseProvider("https:/ return request; } - await foreach (var content in this.StreamChatCompletionInternal("AlibabaCloud", RequestBuilder, token)) + await foreach (var content in this.StreamChatCompletionInternal("AlibabaCloud", RequestBuilder, token)) yield return content; } @@ -156,7 +156,9 @@ public sealed class ProviderAlibabaCloud(ILogger logger) : BaseProvider("https:/ Capability.AUDIO_INPUT, Capability.SPEECH_INPUT, Capability.VIDEO_INPUT, - Capability.TEXT_OUTPUT, Capability.SPEECH_OUTPUT + Capability.TEXT_OUTPUT, Capability.SPEECH_OUTPUT, + + Capability.CHAT_COMPLETION_API, ]; // Check for Qwen 3: @@ -166,7 +168,8 @@ public sealed class ProviderAlibabaCloud(ILogger logger) : BaseProvider("https:/ Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, - Capability.OPTIONAL_REASONING, Capability.FUNCTION_CALLING + Capability.OPTIONAL_REASONING, Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, ]; if(modelName.IndexOf("-vl-") is not -1) @@ -174,6 +177,8 @@ public sealed class ProviderAlibabaCloud(ILogger logger) : BaseProvider("https:/ [ Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, Capability.TEXT_OUTPUT, + + Capability.CHAT_COMPLETION_API, ]; } @@ -185,7 +190,8 @@ public sealed class ProviderAlibabaCloud(ILogger logger) : BaseProvider("https:/ Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, - Capability.ALWAYS_REASONING, Capability.FUNCTION_CALLING + Capability.ALWAYS_REASONING, Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, ]; } @@ -197,7 +203,8 @@ public sealed class ProviderAlibabaCloud(ILogger logger) : BaseProvider("https:/ Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, Capability.TEXT_OUTPUT, - Capability.ALWAYS_REASONING + Capability.ALWAYS_REASONING, + Capability.CHAT_COMPLETION_API, ]; } @@ -207,7 +214,8 @@ public sealed class ProviderAlibabaCloud(ILogger logger) : BaseProvider("https:/ Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, - Capability.FUNCTION_CALLING + Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, ]; } diff --git a/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs b/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs index a09564df..676a4365 100644 --- a/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs +++ b/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs @@ -72,7 +72,7 @@ public sealed class ProviderAnthropic(ILogger logger) : BaseProvider("https://ap return request; } - await foreach (var content in this.StreamChatCompletionInternal("Anthropic", RequestBuilder, token)) + await foreach (var content in this.StreamChatCompletionInternal("Anthropic", RequestBuilder, token)) yield return content; } @@ -122,7 +122,9 @@ public sealed class ProviderAnthropic(ILogger logger) : BaseProvider("https://ap Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, Capability.TEXT_OUTPUT, - Capability.OPTIONAL_REASONING, Capability.FUNCTION_CALLING]; + Capability.OPTIONAL_REASONING, Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, + ]; // Claude 3.7 is able to do reasoning: if(modelName.StartsWith("claude-3-7")) @@ -130,7 +132,9 @@ public sealed class ProviderAnthropic(ILogger logger) : BaseProvider("https://ap Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, Capability.TEXT_OUTPUT, - Capability.OPTIONAL_REASONING, Capability.FUNCTION_CALLING]; + Capability.OPTIONAL_REASONING, Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, + ]; // All other 3.x models are able to process text and images as input: if(modelName.StartsWith("claude-3-")) @@ -138,13 +142,17 @@ public sealed class ProviderAnthropic(ILogger logger) : BaseProvider("https://ap Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, Capability.TEXT_OUTPUT, - Capability.FUNCTION_CALLING]; + Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, + ]; // Any other model is able to process text only: return [ Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, - Capability.FUNCTION_CALLING]; + Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, + ]; } #endregion diff --git a/app/MindWork AI Studio/Provider/Anthropic/ResponseStreamLine.cs b/app/MindWork AI Studio/Provider/Anthropic/ResponseStreamLine.cs index c74e13ea..9b69ce4a 100644 --- a/app/MindWork AI Studio/Provider/Anthropic/ResponseStreamLine.cs +++ b/app/MindWork AI Studio/Provider/Anthropic/ResponseStreamLine.cs @@ -14,6 +14,21 @@ public readonly record struct ResponseStreamLine(string Type, int Index, Delta D /// public ContentStreamChunk GetContent() => new(this.Delta.Text, []); + + #region Implementation of IAnnotationStreamLine + + // + // Please note: Anthropic's API does not currently support sources in their + // OpenAI-compatible response stream. + // + + /// + public bool ContainsSources() => false; + + /// + public IList GetSources() => []; + + #endregion } /// diff --git a/app/MindWork AI Studio/Provider/BaseProvider.cs b/app/MindWork AI Studio/Provider/BaseProvider.cs index cc81ab3c..4d41da0b 100644 --- a/app/MindWork AI Studio/Provider/BaseProvider.cs +++ b/app/MindWork AI Studio/Provider/BaseProvider.cs @@ -3,6 +3,7 @@ using System.Runtime.CompilerServices; using System.Text.Json; using AIStudio.Chat; +using AIStudio.Provider.OpenAI; using AIStudio.Settings; using AIStudio.Tools.PluginSystem; using AIStudio.Tools.Services; @@ -39,6 +40,7 @@ public abstract class BaseProvider : IProvider, ISecretId protected static readonly JsonSerializerOptions JSON_SERIALIZER_OPTIONS = new() { PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower, + Converters = { new AnnotationConverter() } }; /// @@ -123,10 +125,12 @@ public abstract class BaseProvider : IProvider, ISecretId break; } + var errorBody = await nextResponse.Content.ReadAsStringAsync(token); if (nextResponse.StatusCode is HttpStatusCode.Forbidden) { await MessageBus.INSTANCE.SendError(new(Icons.Material.Filled.Block, string.Format(TB("Tried to communicate with the LLM provider '{0}'. You might not be able to use this provider from your location. The provider message is: '{1}'"), this.InstanceName, nextResponse.ReasonPhrase))); this.logger.LogError($"Failed request with status code {nextResponse.StatusCode} (message = '{nextResponse.ReasonPhrase}')."); + this.logger.LogDebug($"Error body: {errorBody}"); errorMessage = nextResponse.ReasonPhrase; break; } @@ -135,6 +139,7 @@ public abstract class BaseProvider : IProvider, ISecretId { await MessageBus.INSTANCE.SendError(new(Icons.Material.Filled.CloudOff, string.Format(TB("Tried to communicate with the LLM provider '{0}'. The required message format might be changed. The provider message is: '{1}'"), this.InstanceName, nextResponse.ReasonPhrase))); this.logger.LogError($"Failed request with status code {nextResponse.StatusCode} (message = '{nextResponse.ReasonPhrase}')."); + this.logger.LogDebug($"Error body: {errorBody}"); errorMessage = nextResponse.ReasonPhrase; break; } @@ -143,6 +148,7 @@ public abstract class BaseProvider : IProvider, ISecretId { await MessageBus.INSTANCE.SendError(new(Icons.Material.Filled.CloudOff, string.Format(TB("Tried to communicate with the LLM provider '{0}'. Something was not found. The provider message is: '{1}'"), this.InstanceName, nextResponse.ReasonPhrase))); this.logger.LogError($"Failed request with status code {nextResponse.StatusCode} (message = '{nextResponse.ReasonPhrase}')."); + this.logger.LogDebug($"Error body: {errorBody}"); errorMessage = nextResponse.ReasonPhrase; break; } @@ -151,6 +157,7 @@ public abstract class BaseProvider : IProvider, ISecretId { await MessageBus.INSTANCE.SendError(new(Icons.Material.Filled.Key, string.Format(TB("Tried to communicate with the LLM provider '{0}'. The API key might be invalid. The provider message is: '{1}'"), this.InstanceName, nextResponse.ReasonPhrase))); this.logger.LogError($"Failed request with status code {nextResponse.StatusCode} (message = '{nextResponse.ReasonPhrase}')."); + this.logger.LogDebug($"Error body: {errorBody}"); errorMessage = nextResponse.ReasonPhrase; break; } @@ -159,6 +166,7 @@ public abstract class BaseProvider : IProvider, ISecretId { await MessageBus.INSTANCE.SendError(new(Icons.Material.Filled.CloudOff, string.Format(TB("Tried to communicate with the LLM provider '{0}'. The server might be down or having issues. The provider message is: '{1}'"), this.InstanceName, nextResponse.ReasonPhrase))); this.logger.LogError($"Failed request with status code {nextResponse.StatusCode} (message = '{nextResponse.ReasonPhrase}')."); + this.logger.LogDebug($"Error body: {errorBody}"); errorMessage = nextResponse.ReasonPhrase; break; } @@ -167,6 +175,7 @@ public abstract class BaseProvider : IProvider, ISecretId { await MessageBus.INSTANCE.SendError(new(Icons.Material.Filled.CloudOff, string.Format(TB("Tried to communicate with the LLM provider '{0}'. The provider is overloaded. The message is: '{1}'"), this.InstanceName, nextResponse.ReasonPhrase))); this.logger.LogError($"Failed request with status code {nextResponse.StatusCode} (message = '{nextResponse.ReasonPhrase}')."); + this.logger.LogDebug($"Error body: {errorBody}"); errorMessage = nextResponse.ReasonPhrase; break; } @@ -189,8 +198,20 @@ public abstract class BaseProvider : IProvider, ISecretId return new HttpRateLimitedStreamResult(true, false, string.Empty, response); } - protected async IAsyncEnumerable StreamChatCompletionInternal(string providerName, Func> requestBuilder, [EnumeratorCancellation] CancellationToken token = default) where T : struct, IResponseStreamLine + /// + /// Streams the chat completion from the provider using the Chat Completion API. + /// + /// The name of the provider. + /// A function that builds the request. + /// The cancellation token to use. + /// The type of the delta lines inside the stream. + /// The type of the annotation lines inside the stream. + /// The stream of content chunks. + protected async IAsyncEnumerable StreamChatCompletionInternal(string providerName, Func> requestBuilder, [EnumeratorCancellation] CancellationToken token = default) where TDelta : IResponseStreamLine where TAnnotation : IAnnotationStreamLine { + // Check if annotations are supported: + var annotationSupported = typeof(TAnnotation) != typeof(NoResponsesAnnotationStreamLine) && typeof(TAnnotation) != typeof(NoChatCompletionAnnotationStreamLine); + StreamReader? streamReader = null; try { @@ -217,7 +238,9 @@ public abstract class BaseProvider : IProvider, ISecretId if (streamReader is null) yield break; + // // Read the stream, line by line: + // while (true) { try @@ -240,7 +263,9 @@ public abstract class BaseProvider : IProvider, ISecretId yield break; } + // // Read the next line: + // string? line; try { @@ -266,28 +291,233 @@ public abstract class BaseProvider : IProvider, ISecretId if (line.StartsWith("data: [DONE]", StringComparison.InvariantCulture)) yield break; - T providerResponse; + // + // Process annotation lines: + // + if (annotationSupported && line.Contains(""" + "annotations":[ + """, StringComparison.InvariantCulture)) + { + TAnnotation? providerResponse; + + try + { + // We know that the line starts with "data: ". Hence, we can + // skip the first 6 characters to get the JSON data after that. + var jsonData = line[6..]; + + // Deserialize the JSON data: + providerResponse = JsonSerializer.Deserialize(jsonData, JSON_SERIALIZER_OPTIONS); + + if (providerResponse is null) + continue; + } + catch + { + // Skip invalid JSON data: + continue; + } + + // Skip empty responses: + if (!providerResponse.ContainsSources()) + continue; + + // Yield the response: + yield return new(string.Empty, providerResponse.GetSources()); + } + + // + // Process delta lines: + // + else + { + TDelta? providerResponse; + try + { + // We know that the line starts with "data: ". Hence, we can + // skip the first 6 characters to get the JSON data after that. + var jsonData = line[6..]; + + // Deserialize the JSON data: + providerResponse = JsonSerializer.Deserialize(jsonData, JSON_SERIALIZER_OPTIONS); + + if (providerResponse is null) + continue; + } + catch + { + // Skip invalid JSON data: + continue; + } + + // Skip empty responses: + if (!providerResponse.ContainsContent()) + continue; + + // Yield the response: + yield return providerResponse.GetContent(); + } + } + + streamReader.Dispose(); + } + + /// + /// Streams the chat completion from the provider using the Responses API. + /// + /// The name of the provider. + /// A function that builds the request. + /// The cancellation token to use. + /// The type of the delta lines inside the stream. + /// The type of the annotation lines inside the stream. + /// The stream of content chunks. + protected async IAsyncEnumerable StreamResponsesInternal(string providerName, Func> requestBuilder, [EnumeratorCancellation] CancellationToken token = default) where TDelta : IResponseStreamLine where TAnnotation : IAnnotationStreamLine + { + // Check if annotations are supported: + var annotationSupported = typeof(TAnnotation) != typeof(NoResponsesAnnotationStreamLine) && typeof(TAnnotation) != typeof(NoChatCompletionAnnotationStreamLine); + + StreamReader? streamReader = null; + try + { + // Send the request using exponential backoff: + var responseData = await this.SendRequest(requestBuilder, token); + if(responseData.IsFailedAfterAllRetries) + { + this.logger.LogError($"The {providerName} responses call failed: {responseData.ErrorMessage}"); + yield break; + } + + // Open the response stream: + var providerStream = await responseData.Response!.Content.ReadAsStreamAsync(token); + + // Add a stream reader to read the stream, line by line: + streamReader = new StreamReader(providerStream); + } + catch(Exception e) + { + await MessageBus.INSTANCE.SendError(new(Icons.Material.Filled.Stream, string.Format(TB("Tried to communicate with the LLM provider '{0}'. There were some problems with the request. The provider message is: '{1}'"), this.InstanceName, e.Message))); + this.logger.LogError($"Failed to stream responses from {providerName} '{this.InstanceName}': {e.Message}"); + } + + if (streamReader is null) + yield break; + + // + // Read the stream, line by line: + // + while (true) + { try { - // We know that the line starts with "data: ". Hence, we can - // skip the first 6 characters to get the JSON data after that. - var jsonData = line[6..]; - - // Deserialize the JSON data: - providerResponse = JsonSerializer.Deserialize(jsonData, JSON_SERIALIZER_OPTIONS); + if(streamReader.EndOfStream) + break; } - catch + catch (Exception e) { - // Skip invalid JSON data: - continue; + await MessageBus.INSTANCE.SendError(new(Icons.Material.Filled.Stream, string.Format(TB("Tried to stream the LLM provider '{0}' answer. There were some problems with the stream. The message is: '{1}'"), this.InstanceName, e.Message))); + this.logger.LogWarning($"Failed to read the end-of-stream state from {providerName} '{this.InstanceName}': {e.Message}"); + break; + } + + // Check if the token is canceled: + if (token.IsCancellationRequested) + { + this.logger.LogWarning($"The user canceled the responses for {providerName} '{this.InstanceName}'."); + streamReader.Close(); + yield break; } - // Skip empty responses: - if (!providerResponse.ContainsContent()) - continue; + // + // Read the next line: + // + string? line; + try + { + line = await streamReader.ReadLineAsync(token); + } + catch (Exception e) + { + await MessageBus.INSTANCE.SendError(new(Icons.Material.Filled.Stream, string.Format(TB("Tried to stream the LLM provider '{0}' answer. Was not able to read the stream. The message is: '{1}'"), this.InstanceName, e.Message))); + this.logger.LogError($"Failed to read the stream from {providerName} '{this.InstanceName}': {e.Message}"); + break; + } - // Yield the response: - yield return providerResponse.GetContent(); + // Skip empty lines: + if (string.IsNullOrWhiteSpace(line)) + continue; + + // Check if the line is the end of the stream: + if (line.StartsWith("event: response.completed", StringComparison.InvariantCulture)) + yield break; + + // + // Find delta lines: + // + if (line.StartsWith(""" + data: {"type":"response.output_text.delta" + """, StringComparison.InvariantCulture)) + { + TDelta? providerResponse; + try + { + // We know that the line starts with "data: ". Hence, we can + // skip the first 6 characters to get the JSON data after that. + var jsonData = line[6..]; + + // Deserialize the JSON data: + providerResponse = JsonSerializer.Deserialize(jsonData, JSON_SERIALIZER_OPTIONS); + + if (providerResponse is null) + continue; + } + catch + { + // Skip invalid JSON data: + continue; + } + + // Skip empty responses: + if (!providerResponse.ContainsContent()) + continue; + + // Yield the response: + yield return providerResponse.GetContent(); + } + + // + // Find annotation added lines: + // + else if (annotationSupported && line.StartsWith( + """ + data: {"type":"response.output_text.annotation.added" + """, StringComparison.InvariantCulture)) + { + TAnnotation? providerResponse; + try + { + // We know that the line starts with "data: ". Hence, we can + // skip the first 6 characters to get the JSON data after that. + var jsonData = line[6..]; + + // Deserialize the JSON data: + providerResponse = JsonSerializer.Deserialize(jsonData, JSON_SERIALIZER_OPTIONS); + + if (providerResponse is null) + continue; + } + catch + { + // Skip invalid JSON data: + continue; + } + + // Skip empty responses: + if (!providerResponse.ContainsSources()) + continue; + + // Yield the response: + yield return new(string.Empty, providerResponse.GetSources()); + } } streamReader.Dispose(); diff --git a/app/MindWork AI Studio/Provider/CapabilitiesOpenSource.cs b/app/MindWork AI Studio/Provider/CapabilitiesOpenSource.cs index 806b1d5b..1444ec34 100644 --- a/app/MindWork AI Studio/Provider/CapabilitiesOpenSource.cs +++ b/app/MindWork AI Studio/Provider/CapabilitiesOpenSource.cs @@ -34,11 +34,17 @@ public static class CapabilitiesOpenSource Capability.TEXT_OUTPUT, Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, ]; // The old vision models cannot do function calling: if (modelName.IndexOf("vision") is not -1) - return [Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, Capability.TEXT_OUTPUT]; + return [ + Capability.TEXT_INPUT, + Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + Capability.CHAT_COMPLETION_API, + ]; // // All models >= 3.1 are able to do function calling: @@ -53,10 +59,14 @@ public static class CapabilitiesOpenSource Capability.TEXT_OUTPUT, Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, ]; // All other llama models can only do text input and output: - return [Capability.TEXT_INPUT, Capability.TEXT_OUTPUT]; + return [ + Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, + Capability.CHAT_COMPLETION_API, + ]; } // @@ -66,9 +76,16 @@ public static class CapabilitiesOpenSource { if(modelName.IndexOf("deepseek-r1") is not -1 || modelName.IndexOf("deepseek r1") is not -1) - return [Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, Capability.ALWAYS_REASONING]; + return [ + Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, + Capability.ALWAYS_REASONING, + Capability.CHAT_COMPLETION_API, + ]; - return [Capability.TEXT_INPUT, Capability.TEXT_OUTPUT]; + return [ + Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, + Capability.CHAT_COMPLETION_API, + ]; } // @@ -77,9 +94,16 @@ public static class CapabilitiesOpenSource if (modelName.IndexOf("qwen") is not -1 || modelName.IndexOf("qwq") is not -1) { if (modelName.IndexOf("qwq") is not -1) - return [Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, Capability.ALWAYS_REASONING]; + return [ + Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, + Capability.ALWAYS_REASONING, + Capability.CHAT_COMPLETION_API, + ]; - return [Capability.TEXT_INPUT, Capability.TEXT_OUTPUT]; + return [ + Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, + Capability.CHAT_COMPLETION_API, + ]; } // @@ -93,7 +117,8 @@ public static class CapabilitiesOpenSource [ Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, Capability.TEXT_OUTPUT, - Capability.FUNCTION_CALLING + Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, ]; if (modelName.IndexOf("3.1") is not -1) @@ -101,7 +126,8 @@ public static class CapabilitiesOpenSource [ Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, Capability.TEXT_OUTPUT, - Capability.FUNCTION_CALLING + Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, ]; // Default: @@ -109,7 +135,8 @@ public static class CapabilitiesOpenSource [ Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, - Capability.FUNCTION_CALLING + Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, ]; } @@ -123,6 +150,7 @@ public static class CapabilitiesOpenSource [ Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, Capability.TEXT_OUTPUT, + Capability.CHAT_COMPLETION_API, ]; if(modelName.StartsWith("grok-3-mini")) @@ -132,6 +160,7 @@ public static class CapabilitiesOpenSource Capability.TEXT_OUTPUT, Capability.ALWAYS_REASONING, Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, ]; if(modelName.StartsWith("grok-3")) @@ -141,10 +170,41 @@ public static class CapabilitiesOpenSource Capability.TEXT_OUTPUT, Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, + ]; + } + + // + // OpenAI models: + // + if (modelName.IndexOf("gpt-oss") is not -1 || + modelName.IndexOf("gpt-3.5") is not -1) + { + if(modelName.IndexOf("gpt-oss") is not -1) + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + + Capability.FUNCTION_CALLING, + Capability.WEB_SEARCH, + Capability.CHAT_COMPLETION_API, + ]; + + if(modelName.IndexOf("gpt-3.5") is not -1) + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + + Capability.CHAT_COMPLETION_API, ]; } // Default: - return [Capability.TEXT_INPUT, Capability.TEXT_OUTPUT]; + return [ + Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, + Capability.CHAT_COMPLETION_API, + ]; } } \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/Capability.cs b/app/MindWork AI Studio/Provider/Capability.cs index 047ec67b..97f56de2 100644 --- a/app/MindWork AI Studio/Provider/Capability.cs +++ b/app/MindWork AI Studio/Provider/Capability.cs @@ -94,4 +94,19 @@ public enum Capability /// The AI model can perform function calling, such as invoking APIs or executing functions. /// FUNCTION_CALLING, + + /// + /// The AI model can perform web search to retrieve information from the internet. + /// + WEB_SEARCH, + + /// + /// The AI model is used via the Chat Completion API. + /// + CHAT_COMPLETION_API, + + /// + /// The AI model is used via the Responses API. + /// + RESPONSES_API, } \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs b/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs index c7ab556f..127fc493 100644 --- a/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs +++ b/app/MindWork AI Studio/Provider/DeepSeek/ProviderDeepSeek.cs @@ -35,7 +35,7 @@ public sealed class ProviderDeepSeek(ILogger logger) : BaseProvider("https://api }; // Prepare the DeepSeek HTTP chat request: - var deepSeekChatRequest = JsonSerializer.Serialize(new ChatRequest + var deepSeekChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest { Model = chatModel.Id, @@ -76,7 +76,7 @@ public sealed class ProviderDeepSeek(ILogger logger) : BaseProvider("https://api return request; } - await foreach (var content in this.StreamChatCompletionInternal("DeepSeek", RequestBuilder, token)) + await foreach (var content in this.StreamChatCompletionInternal("DeepSeek", RequestBuilder, token)) yield return content; } @@ -117,12 +117,14 @@ public sealed class ProviderDeepSeek(ILogger logger) : BaseProvider("https://api Capability.TEXT_OUTPUT, Capability.ALWAYS_REASONING, + Capability.CHAT_COMPLETION_API, ]; return [ Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, + Capability.CHAT_COMPLETION_API, ]; } diff --git a/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs b/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs index 880804e0..a2aa95c1 100644 --- a/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs +++ b/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs @@ -4,6 +4,7 @@ using System.Text; using System.Text.Json; using AIStudio.Chat; +using AIStudio.Provider.OpenAI; using AIStudio.Settings; namespace AIStudio.Provider.Fireworks; @@ -77,7 +78,7 @@ public class ProviderFireworks(ILogger logger) : BaseProvider("https://api.firew return request; } - await foreach (var content in this.StreamChatCompletionInternal("Fireworks", RequestBuilder, token)) + await foreach (var content in this.StreamChatCompletionInternal("Fireworks", RequestBuilder, token)) yield return content; } diff --git a/app/MindWork AI Studio/Provider/Fireworks/ResponseStreamLine.cs b/app/MindWork AI Studio/Provider/Fireworks/ResponseStreamLine.cs index e9da7a53..4fd0bcaa 100644 --- a/app/MindWork AI Studio/Provider/Fireworks/ResponseStreamLine.cs +++ b/app/MindWork AI Studio/Provider/Fireworks/ResponseStreamLine.cs @@ -15,6 +15,20 @@ public readonly record struct ResponseStreamLine(string Id, string Object, uint /// public ContentStreamChunk GetContent() => new(this.Choices[0].Delta.Content, []); + + #region Implementation of IAnnotationStreamLine + + // + // Currently, Fireworks does not provide source citations in their response stream. + // + + /// + public bool ContainsSources() => false; + + /// + public IList GetSources() => []; + + #endregion } /// diff --git a/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs b/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs index b9a997d6..86a25f89 100644 --- a/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs +++ b/app/MindWork AI Studio/Provider/GWDG/ProviderGWDG.cs @@ -35,7 +35,7 @@ public sealed class ProviderGWDG(ILogger logger) : BaseProvider("https://chat-ai }; // Prepare the GWDG HTTP chat request: - var gwdgChatRequest = JsonSerializer.Serialize(new ChatRequest + var gwdgChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest { Model = chatModel.Id, @@ -76,7 +76,7 @@ public sealed class ProviderGWDG(ILogger logger) : BaseProvider("https://chat-ai return request; } - await foreach (var content in this.StreamChatCompletionInternal("GWDG", RequestBuilder, token)) + await foreach (var content in this.StreamChatCompletionInternal("GWDG", RequestBuilder, token)) yield return content; } diff --git a/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs b/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs index 7819614f..0322842a 100644 --- a/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs +++ b/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs @@ -78,7 +78,7 @@ public class ProviderGoogle(ILogger logger) : BaseProvider("https://generativela return request; } - await foreach (var content in this.StreamChatCompletionInternal("Google", RequestBuilder, token)) + await foreach (var content in this.StreamChatCompletionInternal("Google", RequestBuilder, token)) yield return content; } @@ -136,6 +136,7 @@ public class ProviderGoogle(ILogger logger) : BaseProvider("https://generativela Capability.TEXT_OUTPUT, Capability.ALWAYS_REASONING, Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, ]; // Image generation: @@ -146,6 +147,7 @@ public class ProviderGoogle(ILogger logger) : BaseProvider("https://generativela Capability.SPEECH_INPUT, Capability.VIDEO_INPUT, Capability.TEXT_OUTPUT, Capability.IMAGE_OUTPUT, + Capability.CHAT_COMPLETION_API, ]; // Realtime model: @@ -158,6 +160,7 @@ public class ProviderGoogle(ILogger logger) : BaseProvider("https://generativela Capability.TEXT_OUTPUT, Capability.SPEECH_OUTPUT, Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, ]; // The 2.0 flash models cannot call functions: @@ -168,6 +171,7 @@ public class ProviderGoogle(ILogger logger) : BaseProvider("https://generativela Capability.SPEECH_INPUT, Capability.VIDEO_INPUT, Capability.TEXT_OUTPUT, + Capability.CHAT_COMPLETION_API, ]; // The old 1.0 pro vision model: @@ -177,6 +181,7 @@ public class ProviderGoogle(ILogger logger) : BaseProvider("https://generativela Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, Capability.TEXT_OUTPUT, + Capability.CHAT_COMPLETION_API, ]; // Default to all other Gemini models: @@ -188,6 +193,7 @@ public class ProviderGoogle(ILogger logger) : BaseProvider("https://generativela Capability.TEXT_OUTPUT, Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, ]; } @@ -199,6 +205,7 @@ public class ProviderGoogle(ILogger logger) : BaseProvider("https://generativela Capability.TEXT_OUTPUT, Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, ]; } diff --git a/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs b/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs index 8729b1d5..62f974c3 100644 --- a/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs +++ b/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs @@ -60,8 +60,6 @@ public class ProviderGroq(ILogger logger) : BaseProvider("https://api.groq.com/o _ => string.Empty, } }).ToList()], - - Seed = chatThread.Seed, // Right now, we only support streaming completions: Stream = true, @@ -80,7 +78,7 @@ public class ProviderGroq(ILogger logger) : BaseProvider("https://api.groq.com/o return request; } - await foreach (var content in this.StreamChatCompletionInternal("Groq", RequestBuilder, token)) + await foreach (var content in this.StreamChatCompletionInternal("Groq", RequestBuilder, token)) yield return content; } diff --git a/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs b/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs index bc8a3832..51f9a4c3 100644 --- a/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs +++ b/app/MindWork AI Studio/Provider/Helmholtz/ProviderHelmholtz.cs @@ -35,7 +35,7 @@ public sealed class ProviderHelmholtz(ILogger logger) : BaseProvider("https://ap }; // Prepare the Helmholtz HTTP chat request: - var helmholtzChatRequest = JsonSerializer.Serialize(new ChatRequest + var helmholtzChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest { Model = chatModel.Id, @@ -76,7 +76,7 @@ public sealed class ProviderHelmholtz(ILogger logger) : BaseProvider("https://ap return request; } - await foreach (var content in this.StreamChatCompletionInternal("Helmholtz", RequestBuilder, token)) + await foreach (var content in this.StreamChatCompletionInternal("Helmholtz", RequestBuilder, token)) yield return content; } diff --git a/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs b/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs index f0b312b9..118545ed 100644 --- a/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs +++ b/app/MindWork AI Studio/Provider/HuggingFace/ProviderHuggingFace.cs @@ -40,7 +40,7 @@ public sealed class ProviderHuggingFace : BaseProvider }; // Prepare the HuggingFace HTTP chat request: - var huggingfaceChatRequest = JsonSerializer.Serialize(new ChatRequest + var huggingfaceChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest { Model = chatModel.Id, @@ -81,7 +81,7 @@ public sealed class ProviderHuggingFace : BaseProvider return request; } - await foreach (var content in this.StreamChatCompletionInternal("HuggingFace", RequestBuilder, token)) + await foreach (var content in this.StreamChatCompletionInternal("HuggingFace", RequestBuilder, token)) yield return content; } diff --git a/app/MindWork AI Studio/Provider/IAnnotationStreamLine.cs b/app/MindWork AI Studio/Provider/IAnnotationStreamLine.cs new file mode 100644 index 00000000..5d95d5bf --- /dev/null +++ b/app/MindWork AI Studio/Provider/IAnnotationStreamLine.cs @@ -0,0 +1,19 @@ +namespace AIStudio.Provider; + +/// +/// A contract for a line in a response stream that can provide annotations such as sources. +/// +public interface IAnnotationStreamLine +{ + /// + /// Checks if the response line contains any sources. + /// + /// True when the response line contains sources, false otherwise. + public bool ContainsSources(); + + /// + /// Gets the sources of the response line. + /// + /// The sources of the response line. + public IList GetSources(); +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/IResponseStreamLine.cs b/app/MindWork AI Studio/Provider/IResponseStreamLine.cs index 366b9884..76ae56fe 100644 --- a/app/MindWork AI Studio/Provider/IResponseStreamLine.cs +++ b/app/MindWork AI Studio/Provider/IResponseStreamLine.cs @@ -1,6 +1,9 @@ namespace AIStudio.Provider; -public interface IResponseStreamLine +/// +/// A contract for a streamed response line that may contain content and annotations. +/// +public interface IResponseStreamLine : IAnnotationStreamLine { /// /// Checks if the response line contains any content. @@ -13,16 +16,4 @@ public interface IResponseStreamLine /// /// The content of the response line. public ContentStreamChunk GetContent(); - - /// - /// Checks if the response line contains any sources. - /// - /// True when the response line contains sources, false otherwise. - public bool ContainsSources() => false; - - /// - /// Gets the sources of the response line. - /// - /// The sources of the response line. - public IList GetSources() => []; } \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs b/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs index db094210..2f1ece18 100644 --- a/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs +++ b/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs @@ -58,8 +58,6 @@ public sealed class ProviderMistral(ILogger logger) : BaseProvider("https://api. _ => string.Empty, } }).ToList()], - - RandomSeed = chatThread.Seed, // Right now, we only support streaming completions: Stream = true, @@ -79,7 +77,7 @@ public sealed class ProviderMistral(ILogger logger) : BaseProvider("https://api. return request; } - await foreach (var content in this.StreamChatCompletionInternal("Mistral", RequestBuilder, token)) + await foreach (var content in this.StreamChatCompletionInternal("Mistral", RequestBuilder, token)) yield return content; } @@ -134,6 +132,7 @@ public sealed class ProviderMistral(ILogger logger) : BaseProvider("https://api. Capability.TEXT_OUTPUT, Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, ]; // Mistral medium: @@ -144,6 +143,7 @@ public sealed class ProviderMistral(ILogger logger) : BaseProvider("https://api. Capability.TEXT_OUTPUT, Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, ]; // Mistral small: @@ -154,6 +154,7 @@ public sealed class ProviderMistral(ILogger logger) : BaseProvider("https://api. Capability.TEXT_OUTPUT, Capability.FUNCTION_CALLING, + Capability.CHAT_COMPLETION_API, ]; // Mistral saba: @@ -162,6 +163,7 @@ public sealed class ProviderMistral(ILogger logger) : BaseProvider("https://api. [ Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, + Capability.CHAT_COMPLETION_API, ]; // Default: diff --git a/app/MindWork AI Studio/Provider/NoChatCompletionAnnotationStreamLine.cs b/app/MindWork AI Studio/Provider/NoChatCompletionAnnotationStreamLine.cs new file mode 100644 index 00000000..e9e53239 --- /dev/null +++ b/app/MindWork AI Studio/Provider/NoChatCompletionAnnotationStreamLine.cs @@ -0,0 +1,15 @@ +namespace AIStudio.Provider; + +/// +/// A marker record indicating that no chat completion annotation line is expected in that stream. +/// +public sealed record NoChatCompletionAnnotationStreamLine : IAnnotationStreamLine +{ + #region Implementation of IAnnotationStreamLine + + public bool ContainsSources() => false; + + public IList GetSources() => []; + + #endregion +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/NoResponsesAnnotationStreamLine.cs b/app/MindWork AI Studio/Provider/NoResponsesAnnotationStreamLine.cs new file mode 100644 index 00000000..b9587dda --- /dev/null +++ b/app/MindWork AI Studio/Provider/NoResponsesAnnotationStreamLine.cs @@ -0,0 +1,15 @@ +namespace AIStudio.Provider; + +/// +/// A marker record indicating that no annotation line is expected in that Responses API stream. +/// +public sealed record NoResponsesAnnotationStreamLine : IAnnotationStreamLine +{ + #region Implementation of IAnnotationStreamLine + + public bool ContainsSources() => false; + + public IList GetSources() => []; + + #endregion +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/AnnotatingUnknown.cs b/app/MindWork AI Studio/Provider/OpenAI/AnnotatingUnknown.cs new file mode 100644 index 00000000..8581f2f5 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/AnnotatingUnknown.cs @@ -0,0 +1,7 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Represents an unknown annotation type. +/// +/// The type of the unknown annotation. +public sealed record AnnotatingUnknown(string Type) : Annotation(Type); \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/Annotation.cs b/app/MindWork AI Studio/Provider/OpenAI/Annotation.cs new file mode 100644 index 00000000..4ac1e714 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/Annotation.cs @@ -0,0 +1,10 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Base class for different types of annotations. +/// +/// +/// We use this base class to represent various annotation types for all types of LLM providers. +/// +/// The type of the annotation. +public abstract record Annotation(string Type); \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/AnnotationConverter.cs b/app/MindWork AI Studio/Provider/OpenAI/AnnotationConverter.cs new file mode 100644 index 00000000..acdb69b2 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/AnnotationConverter.cs @@ -0,0 +1,62 @@ +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace AIStudio.Provider.OpenAI; + +/// +/// Custom JSON converter for the annotation class to handle polymorphic deserialization. +/// +/// +/// We use this converter for chat completion API and responses API annotation deserialization. +/// +public sealed class AnnotationConverter : JsonConverter +{ + public override Annotation? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + using var doc = JsonDocument.ParseValue(ref reader); + var root = doc.RootElement; + + if (!root.TryGetProperty("type", out var typeElement)) + return null; + + var type = typeElement.GetString(); + var rawText = root.GetRawText(); + + Annotation? annotation; + switch (type) + { + case "url_citation": + + // Let's check the responses API data type first: + var responsesAnnotation = JsonSerializer.Deserialize(rawText, options); + + // If it fails, let's try the chat completion API data type: + if(responsesAnnotation is null || string.IsNullOrWhiteSpace(responsesAnnotation.Title) || string.IsNullOrWhiteSpace(responsesAnnotation.URL)) + { + // Try chat completion API data type: + var chatCompletionAnnotation = JsonSerializer.Deserialize(rawText, options); + + // If both fail, we return the unknown type: + if(chatCompletionAnnotation is null) + annotation = new AnnotatingUnknown(type); + else + annotation = chatCompletionAnnotation; + } + else + annotation = responsesAnnotation; + + break; + + default: + annotation = new AnnotatingUnknown(type ?? "unknown"); + break; + } + + return annotation; + } + + public override void Write(Utf8JsonWriter writer, Annotation value, JsonSerializerOptions options) + { + JsonSerializer.Serialize(writer, value, value.GetType(), options); + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionAPIRequest.cs b/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionAPIRequest.cs new file mode 100644 index 00000000..21236284 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionAPIRequest.cs @@ -0,0 +1,18 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// The OpenAI's legacy chat completion request model. +/// +/// Which model to use for chat completion. +/// The chat messages. +/// Whether to stream the chat completion. +public record ChatCompletionAPIRequest( + string Model, + IList Messages, + bool Stream +) +{ + public ChatCompletionAPIRequest() : this(string.Empty, [], true) + { + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionAnnotatingURL.cs b/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionAnnotatingURL.cs new file mode 100644 index 00000000..9800d5ae --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionAnnotatingURL.cs @@ -0,0 +1,16 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Data structure for URL annotation in chat completions. +/// +/// +/// Although this class is not directly intended for the Responses API, it is +/// used there as a fallback solution. One day, one of the open source LLM +/// drivers may use this data structure for their responses API. +/// +/// The type of annotation, typically "url_citation". +/// The URL citation details. +public sealed record ChatCompletionAnnotatingURL( + string Type, + ChatCompletionUrlCitationData UrlCitation +) : Annotation(Type); \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionAnnotationChoice.cs b/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionAnnotationChoice.cs new file mode 100644 index 00000000..430da40f --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionAnnotationChoice.cs @@ -0,0 +1,8 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Data structure representing a choice in a chat completion annotation response. +/// +/// The index of the choice. +/// The delta information for the choice. +public record ChatCompletionAnnotationChoice(int Index, ChatCompletionAnnotationDelta Delta); \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionAnnotationDelta.cs b/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionAnnotationDelta.cs new file mode 100644 index 00000000..87ab13b8 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionAnnotationDelta.cs @@ -0,0 +1,7 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Data structure representing annotation deltas in chat completions. +/// +/// The list of annotations, which can be null. +public record ChatCompletionAnnotationDelta(IList? Annotations); \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionAnnotationStreamLine.cs b/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionAnnotationStreamLine.cs new file mode 100644 index 00000000..2637473c --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionAnnotationStreamLine.cs @@ -0,0 +1,57 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Represents a line of a chat completion annotation stream. +/// +/// The unique identifier of the chat completion. +/// The type of object returned, typically "chat.completion". +/// The creation timestamp of the chat completion in Unix epoch format. +/// The model used for the chat completion. +/// The system fingerprint associated with the chat completion. +/// The list of choices returned in the chat completion. +public record ChatCompletionAnnotationStreamLine(string Id, string Object, uint Created, string Model, string SystemFingerprint, IList Choices) : IAnnotationStreamLine +{ + #region Implementation of IAnnotationStreamLine + + /// + public bool ContainsSources() => this.Choices.Any(choice => choice.Delta.Annotations is not null && choice.Delta.Annotations.Any(annotation => annotation is not AnnotatingUnknown)); + + /// + public IList GetSources() + { + var sources = new List(); + foreach (var choice in this.Choices) + { + if (choice.Delta.Annotations is null) + continue; + + // Iterate through all annotations: + foreach (var annotation in choice.Delta.Annotations) + { + // Check if the annotation is of the expected type and extract the source information: + if (annotation is ChatCompletionAnnotatingURL urlAnnotation) + sources.Add(new Source(urlAnnotation.UrlCitation.Title, urlAnnotation.UrlCitation.URL)); + + // + // Check for the unexpected annotation type of the Responses API. + // + // This seems weird at first. But there are two possibilities why this could happen: + // - Anyone of the open source providers such as ollama, LM Studio, etc. could + // implement & use the Responses API data structures for annotations in their + // chat completion endpoint. + // + // - Our custom JSON converter checks for the Responses API data type first. If it + // fails, it checks for the chat completion API data type. So, when the Responses + // API data type is valid, it will be deserialized into that type, even though + // we are calling the chat completion endpoint. + // + if (annotation is ResponsesAnnotatingUrlCitationData citationData) + sources.Add(new Source(citationData.Title, citationData.URL)); + } + } + + return sources; + } + + #endregion +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionChoice.cs b/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionChoice.cs new file mode 100644 index 00000000..136cc367 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionChoice.cs @@ -0,0 +1,13 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Data model for a choice made by the AI. +/// +/// The index of the choice. +/// The delta text of the choice. +public record ChatCompletionChoice(int Index, ChatCompletionDelta Delta) +{ + public ChatCompletionChoice() : this(0, new (string.Empty)) + { + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionDelta.cs b/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionDelta.cs new file mode 100644 index 00000000..6154cfbe --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionDelta.cs @@ -0,0 +1,12 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// The delta text of a choice. +/// +/// The content of the delta text. +public record ChatCompletionDelta(string Content) +{ + public ChatCompletionDelta() : this(string.Empty) + { + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionDeltaStreamLine.cs b/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionDeltaStreamLine.cs new file mode 100644 index 00000000..1db13ba9 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionDeltaStreamLine.cs @@ -0,0 +1,45 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Data model for a delta line in the chat completion response stream. +/// +/// The id of the response. +/// The object describing the response. +/// The timestamp of the response. +/// The model used for the response. +/// The system fingerprint; together with the seed, this allows you to reproduce the response. +/// The choices made by the AI. +public record ChatCompletionDeltaStreamLine(string Id, string Object, uint Created, string Model, string SystemFingerprint, IList Choices) : IResponseStreamLine +{ + public ChatCompletionDeltaStreamLine() : this(string.Empty, string.Empty, 0, string.Empty, string.Empty, []) + { + } + + /// + public bool ContainsContent() => this.Choices.Count > 0; + + /// + public ContentStreamChunk GetContent() => new(this.Choices[0].Delta.Content, []); + + #region Implementation of IAnnotationStreamLine + + // + // Please note that there are multiple options where LLM providers might stream sources: + // + // - As part of the delta content while streaming. That would be part of this class. + // - By using a dedicated stream event and data structure. That would be another class implementing IResponseStreamLine. + // + // Right now, OpenAI uses the latter approach, so we don't have any sources here. And + // because no other provider does it yet, we don't have any implementation here either. + // + // One example where sources are part of the delta content is the Perplexity provider. + // + + /// + public bool ContainsSources() => false; + + /// + public IList GetSources() => []; + + #endregion +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionUrlCitationData.cs b/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionUrlCitationData.cs new file mode 100644 index 00000000..0aa4613d --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ChatCompletionUrlCitationData.cs @@ -0,0 +1,14 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Represents citation data for a URL in a chat completion response. +/// +/// The end index of the citation in the response text. +/// The start index of the citation in the response text. +/// The title of the cited source. +/// The URL of the cited source. +public sealed record ChatCompletionUrlCitationData( + int EndIndex, + int StartIndex, + string Title, + string URL); \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/ChatRequest.cs b/app/MindWork AI Studio/Provider/OpenAI/ChatRequest.cs deleted file mode 100644 index 79f89fd7..00000000 --- a/app/MindWork AI Studio/Provider/OpenAI/ChatRequest.cs +++ /dev/null @@ -1,21 +0,0 @@ -using System.ComponentModel.DataAnnotations; - -namespace AIStudio.Provider.OpenAI; - -/// -/// The OpenAI chat request model. -/// -/// Which model to use for chat completion. -/// The chat messages. -/// Whether to stream the chat completion. -/// The seed for the chat completion. -/// The frequency penalty for the chat completion. -public readonly record struct ChatRequest( - string Model, - IList Messages, - bool Stream, - int Seed, - - [Range(-2.0f, 2.0f)] - float FrequencyPenalty -); \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs b/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs index cc89d1b2..1aad315b 100644 --- a/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs +++ b/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs @@ -30,16 +30,20 @@ public sealed class ProviderOpenAI(ILogger logger) : BaseProvider("https://api.o yield break; // Unfortunately, OpenAI changed the name of the system prompt based on the model. - // All models that start with "o" (the omni aka reasoning models) and all GPT4o models - // have the system prompt named "developer". All other models have the system prompt - // named "system". We need to check this to get the correct system prompt. + // All models that start with "o" (the omni aka reasoning models), all GPT4o models, + // and all newer models have the system prompt named "developer". All other models + // have the system prompt named "system". We need to check this to get the correct + // system prompt. // // To complicate it even more: The early versions of reasoning models, which are released // before the 17th of December 2024, have no system prompt at all. We need to check this // as well. // Apply the basic rule first: - var systemPromptRole = chatModel.Id.StartsWith('o') || chatModel.Id.Contains("4o") ? "developer" : "system"; + var systemPromptRole = + chatModel.Id.StartsWith('o') || + chatModel.Id.StartsWith("gpt-5", StringComparison.Ordinal) || + chatModel.Id.Contains("4o") ? "developer" : "system"; // Check if the model is an early version of the reasoning models: systemPromptRole = chatModel.Id switch @@ -51,53 +55,113 @@ public sealed class ProviderOpenAI(ILogger logger) : BaseProvider("https://api.o _ => systemPromptRole, }; - - this.logger.LogInformation($"Using the system prompt role '{systemPromptRole}' for model '{chatModel.Id}'."); + // Read the model capabilities: + var modelCapabilities = this.GetModelCapabilities(chatModel); + + // Check if we are using the Responses API or the Chat Completion API: + var usingResponsesAPI = modelCapabilities.Contains(Capability.RESPONSES_API); + + // Prepare the request path based on the API we are using: + var requestPath = usingResponsesAPI ? "responses" : "chat/completions"; + + this.logger.LogInformation("Using the system prompt role '{SystemPromptRole}' and the '{RequestPath}' API for model '{ChatModelId}'.", systemPromptRole, requestPath, chatModel.Id); + // Prepare the system prompt: var systemPrompt = new Message { Role = systemPromptRole, Content = chatThread.PrepareSystemPrompt(settingsManager, chatThread, this.logger), }; - - // Prepare the OpenAI HTTP chat request: - var openAIChatRequest = JsonSerializer.Serialize(new ChatRequest + + // + // Prepare the tools we want to use: + // + IList tools = modelCapabilities.Contains(Capability.WEB_SEARCH) switch { - Model = chatModel.Id, - - // Build the messages: - // - First of all the system prompt - // - Then none-empty user and AI messages - Messages = [systemPrompt, ..chatThread.Blocks.Where(n => n.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace((n.Content as ContentText)?.Text)).Select(n => new Message + true => [ Tools.WEB_SEARCH ], + _ => [] + }; + + // + // Create the request: either for the Responses API or the Chat Completion API + // + var openAIChatRequest = usingResponsesAPI switch + { + // Chat Completion API request: + false => JsonSerializer.Serialize(new ChatCompletionAPIRequest { - Role = n.Role switch - { - ChatRole.USER => "user", - ChatRole.AI => "assistant", - ChatRole.AGENT => "assistant", - ChatRole.SYSTEM => systemPromptRole, - - _ => "user", - }, - - Content = n.Content switch - { - ContentText text => text.Text, - _ => string.Empty, - } - }).ToList()], - - Seed = chatThread.Seed, + Model = chatModel.Id, - // Right now, we only support streaming completions: - Stream = true, - }, JSON_SERIALIZER_OPTIONS); + // Build the messages: + // - First of all the system prompt + // - Then none-empty user and AI messages + Messages = [systemPrompt, ..chatThread.Blocks.Where(n => n.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace((n.Content as ContentText)?.Text)).Select(n => new Message + { + Role = n.Role switch + { + ChatRole.USER => "user", + ChatRole.AI => "assistant", + ChatRole.AGENT => "assistant", + ChatRole.SYSTEM => systemPromptRole, + + _ => "user", + }, + + Content = n.Content switch + { + ContentText text => text.Text, + _ => string.Empty, + } + }).ToList()], + + // Right now, we only support streaming completions: + Stream = true, + }, JSON_SERIALIZER_OPTIONS), + + // Responses API request: + true => JsonSerializer.Serialize(new ResponsesAPIRequest + { + Model = chatModel.Id, + + // Build the messages: + // - First of all the system prompt + // - Then none-empty user and AI messages + Input = [systemPrompt, ..chatThread.Blocks.Where(n => n.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace((n.Content as ContentText)?.Text)).Select(n => new Message + { + Role = n.Role switch + { + ChatRole.USER => "user", + ChatRole.AI => "assistant", + ChatRole.AGENT => "assistant", + ChatRole.SYSTEM => systemPromptRole, + + _ => "user", + }, + + Content = n.Content switch + { + ContentText text => text.Text, + _ => string.Empty, + } + }).ToList()], + + // Right now, we only support streaming completions: + Stream = true, + + // We do not want to store any data on OpenAI's servers: + Store = false, + + // Tools we want to use: + Tools = tools, + + }, JSON_SERIALIZER_OPTIONS), + }; async Task RequestBuilder() { // Build the HTTP post request: - var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions"); + var request = new HttpRequestMessage(HttpMethod.Post, requestPath); // Set the authorization header: request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION)); @@ -106,29 +170,35 @@ public sealed class ProviderOpenAI(ILogger logger) : BaseProvider("https://api.o request.Content = new StringContent(openAIChatRequest, Encoding.UTF8, "application/json"); return request; } + + if (usingResponsesAPI) + await foreach (var content in this.StreamResponsesInternal("OpenAI", RequestBuilder, token)) + yield return content; - await foreach (var content in this.StreamChatCompletionInternal("OpenAI", RequestBuilder, token)) - yield return content; + else + await foreach (var content in this.StreamChatCompletionInternal("OpenAI", RequestBuilder, token)) + yield return content; } #pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously + /// public override async IAsyncEnumerable StreamImageCompletion(Model imageModel, string promptPositive, string promptNegative = FilterOperator.String.Empty, ImageURL referenceImageURL = default, [EnumeratorCancellation] CancellationToken token = default) { yield break; } + #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously /// public override async Task> GetTextModels(string? apiKeyProvisional = null, CancellationToken token = default) { - var models = await this.LoadModels(["gpt-", "o1-", "o3-", "o4-"], token, apiKeyProvisional); + var models = await this.LoadModels(["chatgpt-", "gpt-", "o1-", "o3-", "o4-"], token, apiKeyProvisional); return models.Where(model => !model.Id.Contains("image", StringComparison.OrdinalIgnoreCase) && !model.Id.Contains("realtime", StringComparison.OrdinalIgnoreCase) && !model.Id.Contains("audio", StringComparison.OrdinalIgnoreCase) && !model.Id.Contains("tts", StringComparison.OrdinalIgnoreCase) && - !model.Id.Contains("transcribe", StringComparison.OrdinalIgnoreCase) && - !model.Id.Contains("o1-pro", StringComparison.OrdinalIgnoreCase)); + !model.Id.Contains("transcribe", StringComparison.OrdinalIgnoreCase)); } /// @@ -147,6 +217,26 @@ public sealed class ProviderOpenAI(ILogger logger) : BaseProvider("https://api.o { var modelName = model.Id.ToLowerInvariant().AsSpan(); + if (modelName is "gpt-4o-search-preview") + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + + Capability.WEB_SEARCH, + Capability.CHAT_COMPLETION_API, + ]; + + if (modelName is "gpt-4o-mini-search-preview") + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + + Capability.WEB_SEARCH, + Capability.CHAT_COMPLETION_API, + ]; + if (modelName.StartsWith("o1-mini")) return [ @@ -154,32 +244,63 @@ public sealed class ProviderOpenAI(ILogger logger) : BaseProvider("https://api.o Capability.TEXT_OUTPUT, Capability.ALWAYS_REASONING, + Capability.CHAT_COMPLETION_API, ]; + if(modelName is "gpt-3.5-turbo") + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + Capability.RESPONSES_API, + ]; + + if(modelName.StartsWith("gpt-3.5")) + return + [ + Capability.TEXT_INPUT, + Capability.TEXT_OUTPUT, + Capability.CHAT_COMPLETION_API, + ]; + + if (modelName.StartsWith("chatgpt-4o-")) + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + Capability.RESPONSES_API, + ]; + if (modelName.StartsWith("o3-mini")) return [ Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, - Capability.ALWAYS_REASONING, Capability.FUNCTION_CALLING + Capability.ALWAYS_REASONING, Capability.FUNCTION_CALLING, + Capability.RESPONSES_API, ]; - if (modelName.StartsWith("o4-mini") || modelName.StartsWith("o1") || modelName.StartsWith("o3")) + if (modelName.StartsWith("o4-mini") || modelName.StartsWith("o3")) return [ Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, Capability.TEXT_OUTPUT, - Capability.ALWAYS_REASONING, Capability.FUNCTION_CALLING + Capability.ALWAYS_REASONING, Capability.FUNCTION_CALLING, + Capability.WEB_SEARCH, + Capability.RESPONSES_API, ]; - if(modelName.StartsWith("gpt-3.5")) + if (modelName.StartsWith("o1")) return - [ - Capability.TEXT_INPUT, - Capability.TEXT_OUTPUT, - ]; + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + + Capability.ALWAYS_REASONING, Capability.FUNCTION_CALLING, + Capability.RESPONSES_API, + ]; if(modelName.StartsWith("gpt-4-turbo")) return @@ -187,7 +308,8 @@ public sealed class ProviderOpenAI(ILogger logger) : BaseProvider("https://api.o Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, Capability.TEXT_OUTPUT, - Capability.FUNCTION_CALLING + Capability.FUNCTION_CALLING, + Capability.RESPONSES_API, ]; if(modelName is "gpt-4" || modelName.StartsWith("gpt-4-")) @@ -195,14 +317,37 @@ public sealed class ProviderOpenAI(ILogger logger) : BaseProvider("https://api.o [ Capability.TEXT_INPUT, Capability.TEXT_OUTPUT, + Capability.RESPONSES_API, ]; + if(modelName.StartsWith("gpt-5-nano")) + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + + Capability.FUNCTION_CALLING, Capability.ALWAYS_REASONING, + Capability.RESPONSES_API, + ]; + + if(modelName is "gpt-5" || modelName.StartsWith("gpt-5-")) + return + [ + Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, + Capability.TEXT_OUTPUT, + + Capability.FUNCTION_CALLING, Capability.ALWAYS_REASONING, + Capability.WEB_SEARCH, + Capability.RESPONSES_API, + ]; + return [ Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT, Capability.TEXT_OUTPUT, Capability.FUNCTION_CALLING, + Capability.RESPONSES_API, ]; } diff --git a/app/MindWork AI Studio/Provider/OpenAI/ResponseStreamLine.cs b/app/MindWork AI Studio/Provider/OpenAI/ResponseStreamLine.cs deleted file mode 100644 index 96f6fc46..00000000 --- a/app/MindWork AI Studio/Provider/OpenAI/ResponseStreamLine.cs +++ /dev/null @@ -1,32 +0,0 @@ -namespace AIStudio.Provider.OpenAI; - -/// -/// Data model for a line in the response stream, for streaming completions. -/// -/// The id of the response. -/// The object describing the response. -/// The timestamp of the response. -/// The model used for the response. -/// The system fingerprint; together with the seed, this allows you to reproduce the response. -/// The choices made by the AI. -public readonly record struct ResponseStreamLine(string Id, string Object, uint Created, string Model, string SystemFingerprint, IList Choices) : IResponseStreamLine -{ - /// - public bool ContainsContent() => this != default && this.Choices.Count > 0; - - /// - public ContentStreamChunk GetContent() => new(this.Choices[0].Delta.Content, []); -} - -/// -/// Data model for a choice made by the AI. -/// -/// The index of the choice. -/// The delta text of the choice. -public readonly record struct Choice(int Index, Delta Delta); - -/// -/// The delta text of a choice. -/// -/// The content of the delta text. -public readonly record struct Delta(string Content); \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/ResponsesAPIRequest.cs b/app/MindWork AI Studio/Provider/OpenAI/ResponsesAPIRequest.cs new file mode 100644 index 00000000..b5525b8f --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ResponsesAPIRequest.cs @@ -0,0 +1,21 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// The request body for the Responses API. +/// +/// Which model to use. +/// The chat messages. +/// Whether to stream the response. +/// Whether to store the response on the server (usually OpenAI's infrastructure). +/// The tools to use for the request. +public record ResponsesAPIRequest( + string Model, + IList Input, + bool Stream, + bool Store, + IList Tools) +{ + public ResponsesAPIRequest() : this(string.Empty, [], true, false, []) + { + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/ResponsesAnnotatingURL.cs b/app/MindWork AI Studio/Provider/OpenAI/ResponsesAnnotatingURL.cs new file mode 100644 index 00000000..6f334015 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ResponsesAnnotatingURL.cs @@ -0,0 +1,16 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Data structure for URL citation annotations in the OpenAI Responses API. +/// +/// The type of annotation, typically "url_citation". +/// The end index of the annotated text in the response. +/// The start index of the annotated text in the response. +/// The title of the cited URL. +/// The URL being cited. +public sealed record ResponsesAnnotatingUrlCitationData( + string Type, + int EndIndex, + int StartIndex, + string Title, + string URL) : Annotation(Type); \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/ResponsesAnnotationStreamLine.cs b/app/MindWork AI Studio/Provider/OpenAI/ResponsesAnnotationStreamLine.cs new file mode 100644 index 00000000..a58fa17b --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ResponsesAnnotationStreamLine.cs @@ -0,0 +1,45 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Data structure for a line in the response stream of the Responses API, containing an annotation. +/// +/// The type of the annotation. +/// The continuous index of the annotation in the response. +/// The annotation details. +public sealed record ResponsesAnnotationStreamLine(string Type, int AnnotationIndex, Annotation Annotation) : IAnnotationStreamLine +{ + #region Implementation of IAnnotationStreamLine + + /// + public bool ContainsSources() + { + return this.Annotation is not AnnotatingUnknown; + } + + /// + public IList GetSources() + { + // + // Check for the unexpected annotation type of the chat completion API. + // + // This seems weird at first. But there are two possibilities why this could happen: + // - Anyone of the open source providers such as ollama, LM Studio, etc. could + // implement and use the chat completion API data structures for annotations in their + // Responses API endpoint. + // + // - Our custom JSON converter checks for all possible annotation data types. So, + // when the streamed data is valid for any annotation type, it will be deserialized + // into that type, even though we are calling the Responses API endpoint. + // + if (this.Annotation is ChatCompletionAnnotatingURL urlAnnotation) + return [new Source(urlAnnotation.UrlCitation.Title, urlAnnotation.UrlCitation.URL)]; + + // Check for the expected annotation type of the Responses API: + if (this.Annotation is ResponsesAnnotatingUrlCitationData urlCitationData) + return [new Source(urlCitationData.Title, urlCitationData.URL)]; + + return []; + } + + #endregion +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/ResponsesDeltaStreamLine.cs b/app/MindWork AI Studio/Provider/OpenAI/ResponsesDeltaStreamLine.cs new file mode 100644 index 00000000..5bad9c1b --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/ResponsesDeltaStreamLine.cs @@ -0,0 +1,39 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Data model for a delta line in the Response API chat completion stream. +/// +/// The type of the response. +/// The delta content of the response. +public record ResponsesDeltaStreamLine( + string Type, + string Delta) : IResponseStreamLine +{ + #region Implementation of IResponseStreamLine + + /// + public bool ContainsContent() => !string.IsNullOrWhiteSpace(this.Delta); + + /// + public ContentStreamChunk GetContent() => new(this.Delta, this.GetSources()); + + // + // Please note that there are multiple options where LLM providers might stream sources: + // + // - As part of the delta content while streaming. That would be part of this class. + // - By using a dedicated stream event and data structure. That would be another class implementing IResponseStreamLine. + // + // Right now, OpenAI uses the latter approach, so we don't have any sources here. And + // because no other provider does it yet, we don't have any implementation here either. + // + // One example where sources are part of the delta content is the Perplexity provider. + // + + /// + public bool ContainsSources() => false; + + /// + public IList GetSources() => []; + + #endregion +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/Tool.cs b/app/MindWork AI Studio/Provider/OpenAI/Tool.cs new file mode 100644 index 00000000..782e6b60 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/Tool.cs @@ -0,0 +1,12 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Represents a tool used by the AI model. +/// +/// +/// Right now, only our OpenAI provider is using tools. Thus, this class is located in the +/// OpenAI namespace. In the future, when other providers also support tools, this class can +/// be moved into the provider namespace. +/// +/// The type of the tool. +public record Tool(string Type); \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/OpenAI/Tools.cs b/app/MindWork AI Studio/Provider/OpenAI/Tools.cs new file mode 100644 index 00000000..50d2b836 --- /dev/null +++ b/app/MindWork AI Studio/Provider/OpenAI/Tools.cs @@ -0,0 +1,14 @@ +namespace AIStudio.Provider.OpenAI; + +/// +/// Known tools for LLM providers. +/// +/// +/// Right now, only our OpenAI provider is using tools. Thus, this class is located in the +/// OpenAI namespace. In the future, when other providers also support tools, this class can +/// be moved into the provider namespace. +/// +public static class Tools +{ + public static readonly Tool WEB_SEARCH = new("web_search"); +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/Perplexity/Choice.cs b/app/MindWork AI Studio/Provider/Perplexity/Choice.cs new file mode 100644 index 00000000..2da6bd99 --- /dev/null +++ b/app/MindWork AI Studio/Provider/Perplexity/Choice.cs @@ -0,0 +1,8 @@ +namespace AIStudio.Provider.Perplexity; + +/// +/// Data model for a choice made by the AI. +/// +/// The index of the choice. +/// The delta text of the choice. +public readonly record struct Choice(int Index, Delta Delta); \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/Perplexity/Delta.cs b/app/MindWork AI Studio/Provider/Perplexity/Delta.cs new file mode 100644 index 00000000..a4f1da61 --- /dev/null +++ b/app/MindWork AI Studio/Provider/Perplexity/Delta.cs @@ -0,0 +1,7 @@ +namespace AIStudio.Provider.Perplexity; + +/// +/// The delta text of a choice. +/// +/// The content of the delta text. +public readonly record struct Delta(string Content); \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs b/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs index 8193f237..acc7b113 100644 --- a/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs +++ b/app/MindWork AI Studio/Provider/Perplexity/ProviderPerplexity.cs @@ -44,7 +44,7 @@ public sealed class ProviderPerplexity(ILogger logger) : BaseProvider("https://a }; // Prepare the Perplexity HTTP chat request: - var perplexityChatRequest = JsonSerializer.Serialize(new ChatRequest + var perplexityChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest { Model = chatModel.Id, @@ -85,7 +85,7 @@ public sealed class ProviderPerplexity(ILogger logger) : BaseProvider("https://a return request; } - await foreach (var content in this.StreamChatCompletionInternal("Perplexity", RequestBuilder, token)) + await foreach (var content in this.StreamChatCompletionInternal("Perplexity", RequestBuilder, token)) yield return content; } @@ -130,6 +130,8 @@ public sealed class ProviderPerplexity(ILogger logger) : BaseProvider("https://a Capability.IMAGE_OUTPUT, Capability.ALWAYS_REASONING, + Capability.WEB_SEARCH, + Capability.CHAT_COMPLETION_API, ]; return @@ -139,6 +141,9 @@ public sealed class ProviderPerplexity(ILogger logger) : BaseProvider("https://a Capability.TEXT_OUTPUT, Capability.IMAGE_OUTPUT, + + Capability.WEB_SEARCH, + Capability.CHAT_COMPLETION_API, ]; } diff --git a/app/MindWork AI Studio/Provider/Perplexity/ResponseStreamLine.cs b/app/MindWork AI Studio/Provider/Perplexity/ResponseStreamLine.cs index e8956a06..5ef74083 100644 --- a/app/MindWork AI Studio/Provider/Perplexity/ResponseStreamLine.cs +++ b/app/MindWork AI Studio/Provider/Perplexity/ResponseStreamLine.cs @@ -22,24 +22,4 @@ public readonly record struct ResponseStreamLine(string Id, string Object, uint /// public IList GetSources() => this.SearchResults.Cast().ToList(); -} - -/// -/// Data model for a choice made by the AI. -/// -/// The index of the choice. -/// The delta text of the choice. -public readonly record struct Choice(int Index, Delta Delta); - -/// -/// The delta text of a choice. -/// -/// The content of the delta text. -public readonly record struct Delta(string Content); - -/// -/// Data model for a search result. -/// -/// The title of the search result. -/// The URL of the search result. -public sealed record SearchResult(string Title, string URL) : Source(Title, URL); +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/Perplexity/SearchResult.cs b/app/MindWork AI Studio/Provider/Perplexity/SearchResult.cs new file mode 100644 index 00000000..cfd870c1 --- /dev/null +++ b/app/MindWork AI Studio/Provider/Perplexity/SearchResult.cs @@ -0,0 +1,8 @@ +namespace AIStudio.Provider.Perplexity; + +/// +/// Data model for a search result. +/// +/// The title of the search result. +/// The URL of the search result. +public sealed record SearchResult(string Title, string URL) : Source(Title, URL); \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs index db6766ac..2483ce90 100644 --- a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs +++ b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs @@ -75,7 +75,7 @@ public sealed class ProviderSelfHosted(ILogger logger, Host host, string hostnam return request; } - await foreach (var content in this.StreamChatCompletionInternal("self-hosted provider", RequestBuilder, token)) + await foreach (var content in this.StreamChatCompletionInternal("self-hosted provider", RequestBuilder, token)) yield return content; } diff --git a/app/MindWork AI Studio/Provider/X/ProviderX.cs b/app/MindWork AI Studio/Provider/X/ProviderX.cs index 9fc5ec90..b79a4ec6 100644 --- a/app/MindWork AI Studio/Provider/X/ProviderX.cs +++ b/app/MindWork AI Studio/Provider/X/ProviderX.cs @@ -35,7 +35,7 @@ public sealed class ProviderX(ILogger logger) : BaseProvider("https://api.x.ai/v }; // Prepare the xAI HTTP chat request: - var xChatRequest = JsonSerializer.Serialize(new ChatRequest + var xChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest { Model = chatModel.Id, @@ -60,8 +60,6 @@ public sealed class ProviderX(ILogger logger) : BaseProvider("https://api.x.ai/v _ => string.Empty, } }).ToList()], - - Seed = chatThread.Seed, // Right now, we only support streaming completions: Stream = true, @@ -80,7 +78,7 @@ public sealed class ProviderX(ILogger logger) : BaseProvider("https://api.x.ai/v return request; } - await foreach (var content in this.StreamChatCompletionInternal("xAI", RequestBuilder, token)) + await foreach (var content in this.StreamChatCompletionInternal("xAI", RequestBuilder, token)) yield return content; } diff --git a/app/MindWork AI Studio/wwwroot/changelog/v0.9.51.md b/app/MindWork AI Studio/wwwroot/changelog/v0.9.51.md index 04304eac..45c02b69 100644 --- a/app/MindWork AI Studio/wwwroot/changelog/v0.9.51.md +++ b/app/MindWork AI Studio/wwwroot/changelog/v0.9.51.md @@ -4,11 +4,14 @@ - Added the ability to control the update installation behavior by configuration plugins. - Added the option for LLM providers to stream citations or sources. - Added support for citations to the chat interface. This feature is invisible unless an LLM model is streaming citations or sources. +- Added the Responses API according to the OpenAI documentation. It is currently only used by OpenAI, but we could use the API for other providers as soon as someone offers it. This means that all text-based LLMs from OpenAI can now be used in MindWork AI Studio. For example, the Deep Research models for comprehensive research tasks. +- Added support for web searches. Currently supported by some OpenAI models (e.g., GPT5, newer Omni models, Deep Research models) and Perplexity. Used sources are displayed visually in the chat interface. - Improved memory usage in several areas of the app. - Improved plugin management for configuration plugins so that hot reload detects when a provider or chat template has been removed. - Improved the dialog for naming chats and workspaces to ensure valid inputs are entered. - Improved the dialog invocation by making parameter provision more robust. - Improved the text summarizer assistant by allowing users to specify important aspects & optimized the generated prompt. +- Improved the OpenAI provider by supporting more models and capabilities. - Changed the configuration plugin setting name for how often to check for updates from `UpdateBehavior` to `UpdateInterval`. - Fixed a bug in various assistants where some text fields were not reset when resetting. - Fixed the input field header in the dialog for naming chats and workspaces.