Added the Responses API

This commit is contained in:
Thorsten Sommer 2025-09-03 09:49:15 +02:00
parent b7f69ed8db
commit 8f89f2c30b
Signed by: tsommer
GPG Key ID: 371BBA77A02C0108
37 changed files with 831 additions and 110 deletions

View File

@ -77,7 +77,7 @@ public sealed class ProviderAlibabaCloud(ILogger logger) : BaseProvider("https:/
return request;
}
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionResponseStreamLine>("AlibabaCloud", RequestBuilder, token))
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionDeltaStreamLine, NoChatCompletionAnnotationStreamLine>("AlibabaCloud", RequestBuilder, token))
yield return content;
}

View File

@ -72,7 +72,7 @@ public sealed class ProviderAnthropic(ILogger logger) : BaseProvider("https://ap
return request;
}
await foreach (var content in this.StreamChatCompletionInternal<ResponseStreamLine>("Anthropic", RequestBuilder, token))
await foreach (var content in this.StreamChatCompletionInternal<ResponseStreamLine, NoChatCompletionAnnotationStreamLine>("Anthropic", RequestBuilder, token))
yield return content;
}

View File

@ -14,6 +14,21 @@ public readonly record struct ResponseStreamLine(string Type, int Index, Delta D
/// <inheritdoc />
public ContentStreamChunk GetContent() => new(this.Delta.Text, []);
#region Implementation of IAnnotationStreamLine
//
// Please note: Anthropic's API does not currently support sources in their
// OpenAI-compatible response stream.
//
/// <inheritdoc />
public bool ContainsSources() => false;
/// <inheritdoc />
public IList<ISource> GetSources() => [];
#endregion
}
/// <summary>

View File

@ -3,6 +3,7 @@ using System.Runtime.CompilerServices;
using System.Text.Json;
using AIStudio.Chat;
using AIStudio.Provider.OpenAI;
using AIStudio.Settings;
using AIStudio.Tools.PluginSystem;
using AIStudio.Tools.Services;
@ -39,6 +40,7 @@ public abstract class BaseProvider : IProvider, ISecretId
protected static readonly JsonSerializerOptions JSON_SERIALIZER_OPTIONS = new()
{
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
Converters = { new AnnotationConverter() }
};
/// <summary>
@ -196,8 +198,20 @@ public abstract class BaseProvider : IProvider, ISecretId
return new HttpRateLimitedStreamResult(true, false, string.Empty, response);
}
protected async IAsyncEnumerable<ContentStreamChunk> StreamChatCompletionInternal<T>(string providerName, Func<Task<HttpRequestMessage>> requestBuilder, [EnumeratorCancellation] CancellationToken token = default) where T : struct, IResponseStreamLine
/// <summary>
/// Streams the chat completion from the provider using the Chat Completion API.
/// </summary>
/// <param name="providerName">The name of the provider.</param>
/// <param name="requestBuilder">A function that builds the request.</param>
/// <param name="token">The cancellation token to use.</param>
/// <typeparam name="TDelta">The type of the delta lines inside the stream.</typeparam>
/// <typeparam name="TAnnotation">The type of the annotation lines inside the stream.</typeparam>
/// <returns>The stream of content chunks.</returns>
protected async IAsyncEnumerable<ContentStreamChunk> StreamChatCompletionInternal<TDelta, TAnnotation>(string providerName, Func<Task<HttpRequestMessage>> requestBuilder, [EnumeratorCancellation] CancellationToken token = default) where TDelta : IResponseStreamLine where TAnnotation : IAnnotationStreamLine
{
// Check if annotations are supported:
var annotationSupported = typeof(TAnnotation) != typeof(NoResponsesAnnotationStreamLine) && typeof(TAnnotation) != typeof(NoChatCompletionAnnotationStreamLine);
StreamReader? streamReader = null;
try
{
@ -224,7 +238,9 @@ public abstract class BaseProvider : IProvider, ISecretId
if (streamReader is null)
yield break;
//
// Read the stream, line by line:
//
while (true)
{
try
@ -247,7 +263,9 @@ public abstract class BaseProvider : IProvider, ISecretId
yield break;
}
//
// Read the next line:
//
string? line;
try
{
@ -273,28 +291,233 @@ public abstract class BaseProvider : IProvider, ISecretId
if (line.StartsWith("data: [DONE]", StringComparison.InvariantCulture))
yield break;
T providerResponse;
//
// Process annotation lines:
//
if (annotationSupported && line.Contains("""
"annotations":[
""", StringComparison.InvariantCulture))
{
TAnnotation? providerResponse;
try
{
// We know that the line starts with "data: ". Hence, we can
// skip the first 6 characters to get the JSON data after that.
var jsonData = line[6..];
// Deserialize the JSON data:
providerResponse = JsonSerializer.Deserialize<TAnnotation>(jsonData, JSON_SERIALIZER_OPTIONS);
if (providerResponse is null)
continue;
}
catch
{
// Skip invalid JSON data:
continue;
}
// Skip empty responses:
if (!providerResponse.ContainsSources())
continue;
// Yield the response:
yield return new(string.Empty, providerResponse.GetSources());
}
//
// Process delta lines:
//
else
{
TDelta? providerResponse;
try
{
// We know that the line starts with "data: ". Hence, we can
// skip the first 6 characters to get the JSON data after that.
var jsonData = line[6..];
// Deserialize the JSON data:
providerResponse = JsonSerializer.Deserialize<TDelta>(jsonData, JSON_SERIALIZER_OPTIONS);
if (providerResponse is null)
continue;
}
catch
{
// Skip invalid JSON data:
continue;
}
// Skip empty responses:
if (!providerResponse.ContainsContent())
continue;
// Yield the response:
yield return providerResponse.GetContent();
}
}
streamReader.Dispose();
}
/// <summary>
/// Streams the chat completion from the provider using the Responses API.
/// </summary>
/// <param name="providerName">The name of the provider.</param>
/// <param name="requestBuilder">A function that builds the request.</param>
/// <param name="token">The cancellation token to use.</param>
/// <typeparam name="TDelta">The type of the delta lines inside the stream.</typeparam>
/// <typeparam name="TAnnotation">The type of the annotation lines inside the stream.</typeparam>
/// <returns>The stream of content chunks.</returns>
protected async IAsyncEnumerable<ContentStreamChunk> StreamResponsesInternal<TDelta, TAnnotation>(string providerName, Func<Task<HttpRequestMessage>> requestBuilder, [EnumeratorCancellation] CancellationToken token = default) where TDelta : IResponseStreamLine where TAnnotation : IAnnotationStreamLine
{
// Check if annotations are supported:
var annotationSupported = typeof(TAnnotation) != typeof(NoResponsesAnnotationStreamLine) && typeof(TAnnotation) != typeof(NoChatCompletionAnnotationStreamLine);
StreamReader? streamReader = null;
try
{
// Send the request using exponential backoff:
var responseData = await this.SendRequest(requestBuilder, token);
if(responseData.IsFailedAfterAllRetries)
{
this.logger.LogError($"The {providerName} responses call failed: {responseData.ErrorMessage}");
yield break;
}
// Open the response stream:
var providerStream = await responseData.Response!.Content.ReadAsStreamAsync(token);
// Add a stream reader to read the stream, line by line:
streamReader = new StreamReader(providerStream);
}
catch(Exception e)
{
await MessageBus.INSTANCE.SendError(new(Icons.Material.Filled.Stream, string.Format(TB("Tried to communicate with the LLM provider '{0}'. There were some problems with the request. The provider message is: '{1}'"), this.InstanceName, e.Message)));
this.logger.LogError($"Failed to stream responses from {providerName} '{this.InstanceName}': {e.Message}");
}
if (streamReader is null)
yield break;
//
// Read the stream, line by line:
//
while (true)
{
try
{
// We know that the line starts with "data: ". Hence, we can
// skip the first 6 characters to get the JSON data after that.
var jsonData = line[6..];
// Deserialize the JSON data:
providerResponse = JsonSerializer.Deserialize<T>(jsonData, JSON_SERIALIZER_OPTIONS);
if(streamReader.EndOfStream)
break;
}
catch
catch (Exception e)
{
// Skip invalid JSON data:
continue;
await MessageBus.INSTANCE.SendError(new(Icons.Material.Filled.Stream, string.Format(TB("Tried to stream the LLM provider '{0}' answer. There were some problems with the stream. The message is: '{1}'"), this.InstanceName, e.Message)));
this.logger.LogWarning($"Failed to read the end-of-stream state from {providerName} '{this.InstanceName}': {e.Message}");
break;
}
// Check if the token is canceled:
if (token.IsCancellationRequested)
{
this.logger.LogWarning($"The user canceled the responses for {providerName} '{this.InstanceName}'.");
streamReader.Close();
yield break;
}
// Skip empty responses:
if (!providerResponse.ContainsContent())
continue;
//
// Read the next line:
//
string? line;
try
{
line = await streamReader.ReadLineAsync(token);
}
catch (Exception e)
{
await MessageBus.INSTANCE.SendError(new(Icons.Material.Filled.Stream, string.Format(TB("Tried to stream the LLM provider '{0}' answer. Was not able to read the stream. The message is: '{1}'"), this.InstanceName, e.Message)));
this.logger.LogError($"Failed to read the stream from {providerName} '{this.InstanceName}': {e.Message}");
break;
}
// Yield the response:
yield return providerResponse.GetContent();
// Skip empty lines:
if (string.IsNullOrWhiteSpace(line))
continue;
// Check if the line is the end of the stream:
if (line.StartsWith("event: response.completed", StringComparison.InvariantCulture))
yield break;
//
// Find delta lines:
//
if (line.StartsWith("""
data: {"type":"response.output_text.delta"
""", StringComparison.InvariantCulture))
{
TDelta? providerResponse;
try
{
// We know that the line starts with "data: ". Hence, we can
// skip the first 6 characters to get the JSON data after that.
var jsonData = line[6..];
// Deserialize the JSON data:
providerResponse = JsonSerializer.Deserialize<TDelta>(jsonData, JSON_SERIALIZER_OPTIONS);
if (providerResponse is null)
continue;
}
catch
{
// Skip invalid JSON data:
continue;
}
// Skip empty responses:
if (!providerResponse.ContainsContent())
continue;
// Yield the response:
yield return providerResponse.GetContent();
}
//
// Find annotation added lines:
//
else if (annotationSupported && line.StartsWith(
"""
data: {"type":"response.output_text.annotation.added"
""", StringComparison.InvariantCulture))
{
TAnnotation? providerResponse;
try
{
// We know that the line starts with "data: ". Hence, we can
// skip the first 6 characters to get the JSON data after that.
var jsonData = line[6..];
// Deserialize the JSON data:
providerResponse = JsonSerializer.Deserialize<TAnnotation>(jsonData, JSON_SERIALIZER_OPTIONS);
if (providerResponse is null)
continue;
}
catch
{
// Skip invalid JSON data:
continue;
}
// Skip empty responses:
if (!providerResponse.ContainsSources())
continue;
// Yield the response:
yield return new(string.Empty, providerResponse.GetSources());
}
}
streamReader.Dispose();

View File

@ -76,7 +76,7 @@ public sealed class ProviderDeepSeek(ILogger logger) : BaseProvider("https://api
return request;
}
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionResponseStreamLine>("DeepSeek", RequestBuilder, token))
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionDeltaStreamLine, NoChatCompletionAnnotationStreamLine>("DeepSeek", RequestBuilder, token))
yield return content;
}

View File

@ -4,6 +4,7 @@ using System.Text;
using System.Text.Json;
using AIStudio.Chat;
using AIStudio.Provider.OpenAI;
using AIStudio.Settings;
namespace AIStudio.Provider.Fireworks;
@ -77,7 +78,7 @@ public class ProviderFireworks(ILogger logger) : BaseProvider("https://api.firew
return request;
}
await foreach (var content in this.StreamChatCompletionInternal<ResponseStreamLine>("Fireworks", RequestBuilder, token))
await foreach (var content in this.StreamChatCompletionInternal<ResponseStreamLine, ChatCompletionAnnotationStreamLine>("Fireworks", RequestBuilder, token))
yield return content;
}

View File

@ -15,6 +15,20 @@ public readonly record struct ResponseStreamLine(string Id, string Object, uint
/// <inheritdoc />
public ContentStreamChunk GetContent() => new(this.Choices[0].Delta.Content, []);
#region Implementation of IAnnotationStreamLine
//
// Currently, Fireworks does not provide source citations in their response stream.
//
/// <inheritdoc />
public bool ContainsSources() => false;
/// <inheritdoc />
public IList<ISource> GetSources() => [];
#endregion
}
/// <summary>

View File

@ -76,7 +76,7 @@ public sealed class ProviderGWDG(ILogger logger) : BaseProvider("https://chat-ai
return request;
}
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionResponseStreamLine>("GWDG", RequestBuilder, token))
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionDeltaStreamLine, ChatCompletionAnnotationStreamLine>("GWDG", RequestBuilder, token))
yield return content;
}

View File

@ -78,7 +78,7 @@ public class ProviderGoogle(ILogger logger) : BaseProvider("https://generativela
return request;
}
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionResponseStreamLine>("Google", RequestBuilder, token))
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionDeltaStreamLine, NoChatCompletionAnnotationStreamLine>("Google", RequestBuilder, token))
yield return content;
}

View File

@ -78,7 +78,7 @@ public class ProviderGroq(ILogger logger) : BaseProvider("https://api.groq.com/o
return request;
}
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionResponseStreamLine>("Groq", RequestBuilder, token))
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionDeltaStreamLine, ChatCompletionAnnotationStreamLine>("Groq", RequestBuilder, token))
yield return content;
}

View File

@ -76,7 +76,7 @@ public sealed class ProviderHelmholtz(ILogger logger) : BaseProvider("https://ap
return request;
}
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionResponseStreamLine>("Helmholtz", RequestBuilder, token))
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionDeltaStreamLine, ChatCompletionAnnotationStreamLine>("Helmholtz", RequestBuilder, token))
yield return content;
}

View File

@ -81,7 +81,7 @@ public sealed class ProviderHuggingFace : BaseProvider
return request;
}
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionResponseStreamLine>("HuggingFace", RequestBuilder, token))
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionDeltaStreamLine, ChatCompletionAnnotationStreamLine>("HuggingFace", RequestBuilder, token))
yield return content;
}

View File

@ -0,0 +1,19 @@
namespace AIStudio.Provider;
/// <summary>
/// A contract for a line in a response stream that can provide annotations such as sources.
/// </summary>
public interface IAnnotationStreamLine
{
/// <summary>
/// Checks if the response line contains any sources.
/// </summary>
/// <returns>True when the response line contains sources, false otherwise.</returns>
public bool ContainsSources();
/// <summary>
/// Gets the sources of the response line.
/// </summary>
/// <returns>The sources of the response line.</returns>
public IList<ISource> GetSources();
}

View File

@ -1,6 +1,9 @@
namespace AIStudio.Provider;
public interface IResponseStreamLine
/// <summary>
/// A contract for a streamed response line that may contain content and annotations.
/// </summary>
public interface IResponseStreamLine : IAnnotationStreamLine
{
/// <summary>
/// Checks if the response line contains any content.
@ -13,16 +16,4 @@ public interface IResponseStreamLine
/// </summary>
/// <returns>The content of the response line.</returns>
public ContentStreamChunk GetContent();
/// <summary>
/// Checks if the response line contains any sources.
/// </summary>
/// <returns>True when the response line contains sources, false otherwise.</returns>
public bool ContainsSources() => false;
/// <summary>
/// Gets the sources of the response line.
/// </summary>
/// <returns>The sources of the response line.</returns>
public IList<ISource> GetSources() => [];
}

View File

@ -77,7 +77,7 @@ public sealed class ProviderMistral(ILogger logger) : BaseProvider("https://api.
return request;
}
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionResponseStreamLine>("Mistral", RequestBuilder, token))
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionDeltaStreamLine, NoChatCompletionAnnotationStreamLine>("Mistral", RequestBuilder, token))
yield return content;
}

View File

@ -0,0 +1,15 @@
namespace AIStudio.Provider;
/// <summary>
/// A marker record indicating that no chat completion annotation line is expected in that stream.
/// </summary>
public sealed record NoChatCompletionAnnotationStreamLine : IAnnotationStreamLine
{
#region Implementation of IAnnotationStreamLine
public bool ContainsSources() => false;
public IList<ISource> GetSources() => [];
#endregion
}

View File

@ -0,0 +1,15 @@
namespace AIStudio.Provider;
/// <summary>
/// A marker record indicating that no annotation line is expected in that Responses API stream.
/// </summary>
public sealed record NoResponsesAnnotationStreamLine : IAnnotationStreamLine
{
#region Implementation of IAnnotationStreamLine
public bool ContainsSources() => false;
public IList<ISource> GetSources() => [];
#endregion
}

View File

@ -0,0 +1,7 @@
namespace AIStudio.Provider.OpenAI;
/// <summary>
/// Represents an unknown annotation type.
/// </summary>
/// <param name="Type">The type of the unknown annotation.</param>
public sealed record AnnotatingUnknown(string Type) : Annotation(Type);

View File

@ -0,0 +1,10 @@
namespace AIStudio.Provider.OpenAI;
/// <summary>
/// Base class for different types of annotations.
/// </summary>
/// <remarks>
/// We use this base class to represent various annotation types for all types of LLM providers.
/// </remarks>
/// <param name="Type">The type of the annotation.</param>
public abstract record Annotation(string Type);

View File

@ -0,0 +1,62 @@
using System.Text.Json;
using System.Text.Json.Serialization;
namespace AIStudio.Provider.OpenAI;
/// <summary>
/// Custom JSON converter for the annotation class to handle polymorphic deserialization.
/// </summary>
/// <remarks>
/// We use this converter for chat completion API and responses API annotation deserialization.
/// </remarks>
public sealed class AnnotationConverter : JsonConverter<Annotation>
{
public override Annotation? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
{
using var doc = JsonDocument.ParseValue(ref reader);
var root = doc.RootElement;
if (!root.TryGetProperty("type", out var typeElement))
return null;
var type = typeElement.GetString();
var rawText = root.GetRawText();
Annotation? annotation;
switch (type)
{
case "url_citation":
// Let's check the responses API data type first:
var responsesAnnotation = JsonSerializer.Deserialize<ResponsesAnnotatingUrlCitationData>(rawText, options);
// If it fails, let's try the chat completion API data type:
if(responsesAnnotation is null || string.IsNullOrWhiteSpace(responsesAnnotation.Title) || string.IsNullOrWhiteSpace(responsesAnnotation.URL))
{
// Try chat completion API data type:
var chatCompletionAnnotation = JsonSerializer.Deserialize<ChatCompletionAnnotatingURL>(rawText, options);
// If both fail, we return the unknown type:
if(chatCompletionAnnotation is null)
annotation = new AnnotatingUnknown(type);
else
annotation = chatCompletionAnnotation;
}
else
annotation = responsesAnnotation;
break;
default:
annotation = new AnnotatingUnknown(type ?? "unknown");
break;
}
return annotation;
}
public override void Write(Utf8JsonWriter writer, Annotation value, JsonSerializerOptions options)
{
JsonSerializer.Serialize(writer, value, value.GetType(), options);
}
}

View File

@ -0,0 +1,16 @@
namespace AIStudio.Provider.OpenAI;
/// <summary>
/// Data structure for URL annotation in chat completions.
/// </summary>
/// <remarks>
/// Although this class is not directly intended for the Responses API, it is
/// used there as a fallback solution. One day, one of the open source LLM
/// drivers may use this data structure for their responses API.
/// </remarks>
/// <param name="Type">The type of annotation, typically "url_citation".</param>
/// <param name="UrlCitation">The URL citation details.</param>
public sealed record ChatCompletionAnnotatingURL(
string Type,
ChatCompletionUrlCitationData UrlCitation
) : Annotation(Type);

View File

@ -0,0 +1,8 @@
namespace AIStudio.Provider.OpenAI;
/// <summary>
/// Data structure representing a choice in a chat completion annotation response.
/// </summary>
/// <param name="Index">The index of the choice.</param>
/// <param name="Delta">The delta information for the choice.</param>
public record ChatCompletionAnnotationChoice(int Index, ChatCompletionAnnotationDelta Delta);

View File

@ -0,0 +1,7 @@
namespace AIStudio.Provider.OpenAI;
/// <summary>
/// Data structure representing annotation deltas in chat completions.
/// </summary>
/// <param name="Annotations">The list of annotations, which can be null.</param>
public record ChatCompletionAnnotationDelta(IList<Annotation>? Annotations);

View File

@ -0,0 +1,57 @@
namespace AIStudio.Provider.OpenAI;
/// <summary>
/// Represents a line of a chat completion annotation stream.
/// </summary>
/// <param name="Id">The unique identifier of the chat completion.</param>
/// <param name="Object">The type of object returned, typically "chat.completion".</param>
/// <param name="Created">The creation timestamp of the chat completion in Unix epoch format.</param>
/// <param name="Model">The model used for the chat completion.</param>
/// <param name="SystemFingerprint">The system fingerprint associated with the chat completion.</param>
/// <param name="Choices">The list of choices returned in the chat completion.</param>
public record ChatCompletionAnnotationStreamLine(string Id, string Object, uint Created, string Model, string SystemFingerprint, IList<ChatCompletionAnnotationChoice> Choices) : IAnnotationStreamLine
{
#region Implementation of IAnnotationStreamLine
/// <inheritdoc />
public bool ContainsSources() => this.Choices.Any(choice => choice.Delta.Annotations is not null && choice.Delta.Annotations.Any(annotation => annotation is not AnnotatingUnknown));
/// <inheritdoc />
public IList<ISource> GetSources()
{
var sources = new List<ISource>();
foreach (var choice in this.Choices)
{
if (choice.Delta.Annotations is null)
continue;
// Iterate through all annotations:
foreach (var annotation in choice.Delta.Annotations)
{
// Check if the annotation is of the expected type and extract the source information:
if (annotation is ChatCompletionAnnotatingURL urlAnnotation)
sources.Add(new Source(urlAnnotation.UrlCitation.Title, urlAnnotation.UrlCitation.URL));
//
// Check for the unexpected annotation type of the Responses API.
//
// This seems weird at first. But there are two possibilities why this could happen:
// - Anyone of the open source providers such as ollama, LM Studio, etc. could
// implement & use the Responses API data structures for annotations in their
// chat completion endpoint.
//
// - Our custom JSON converter checks for the Responses API data type first. If it
// fails, it checks for the chat completion API data type. So, when the Responses
// API data type is valid, it will be deserialized into that type, even though
// we are calling the chat completion endpoint.
//
if (annotation is ResponsesAnnotatingUrlCitationData citationData)
sources.Add(new Source(citationData.Title, citationData.URL));
}
}
return sources;
}
#endregion
}

View File

@ -0,0 +1,45 @@
namespace AIStudio.Provider.OpenAI;
/// <summary>
/// Data model for a delta line in the chat completion response stream.
/// </summary>
/// <param name="Id">The id of the response.</param>
/// <param name="Object">The object describing the response.</param>
/// <param name="Created">The timestamp of the response.</param>
/// <param name="Model">The model used for the response.</param>
/// <param name="SystemFingerprint">The system fingerprint; together with the seed, this allows you to reproduce the response.</param>
/// <param name="Choices">The choices made by the AI.</param>
public record ChatCompletionDeltaStreamLine(string Id, string Object, uint Created, string Model, string SystemFingerprint, IList<ChatCompletionChoice> Choices) : IResponseStreamLine
{
public ChatCompletionDeltaStreamLine() : this(string.Empty, string.Empty, 0, string.Empty, string.Empty, [])
{
}
/// <inheritdoc />
public bool ContainsContent() => this.Choices.Count > 0;
/// <inheritdoc />
public ContentStreamChunk GetContent() => new(this.Choices[0].Delta.Content, []);
#region Implementation of IAnnotationStreamLine
//
// Please note that there are multiple options where LLM providers might stream sources:
//
// - As part of the delta content while streaming. That would be part of this class.
// - By using a dedicated stream event and data structure. That would be another class implementing IResponseStreamLine.
//
// Right now, OpenAI uses the latter approach, so we don't have any sources here. And
// because no other provider does it yet, we don't have any implementation here either.
//
// One example where sources are part of the delta content is the Perplexity provider.
//
/// <inheritdoc />
public bool ContainsSources() => false;
/// <inheritdoc />
public IList<ISource> GetSources() => [];
#endregion
}

View File

@ -1,33 +0,0 @@
namespace AIStudio.Provider.OpenAI;
/// <summary>
/// Data model for a line in the response stream, for streaming chat completions.
/// </summary>
/// <param name="Id">The id of the response.</param>
/// <param name="Object">The object describing the response.</param>
/// <param name="Created">The timestamp of the response.</param>
/// <param name="Model">The model used for the response.</param>
/// <param name="SystemFingerprint">The system fingerprint; together with the seed, this allows you to reproduce the response.</param>
/// <param name="Choices">The choices made by the AI.</param>
public record ChatCompletionResponseStreamLine(string Id, string Object, uint Created, string Model, string SystemFingerprint, IList<ChatCompletionChoice> Choices) : IResponseStreamLine
{
public ChatCompletionResponseStreamLine() : this(string.Empty, string.Empty, 0, string.Empty, string.Empty, [])
{
}
/// <inheritdoc />
public bool ContainsContent() => this.Choices.Count > 0;
/// <inheritdoc />
public ContentStreamChunk GetContent() => new(this.Choices[0].Delta.Content, []);
#region Implementation of IAnnotationStreamLine
/// <inheritdoc />
public bool ContainsSources() => false;
/// <inheritdoc />
public IList<ISource> GetSources() => [];
#endregion
}

View File

@ -0,0 +1,14 @@
namespace AIStudio.Provider.OpenAI;
/// <summary>
/// Represents citation data for a URL in a chat completion response.
/// </summary>
/// <param name="EndIndex">The end index of the citation in the response text.</param>
/// <param name="StartIndex">The start index of the citation in the response text.</param>
/// <param name="Title">The title of the cited source.</param>
/// <param name="URL">The URL of the cited source.</param>
public sealed record ChatCompletionUrlCitationData(
int EndIndex,
int StartIndex,
string Title,
string URL);

View File

@ -55,51 +55,113 @@ public sealed class ProviderOpenAI(ILogger logger) : BaseProvider("https://api.o
_ => systemPromptRole,
};
this.logger.LogInformation($"Using the system prompt role '{systemPromptRole}' for model '{chatModel.Id}'.");
// Read the model capabilities:
var modelCapabilities = this.GetModelCapabilities(chatModel);
// Check if we are using the Responses API or the Chat Completion API:
var usingResponsesAPI = modelCapabilities.Contains(Capability.RESPONSES_API);
// Prepare the request path based on the API we are using:
var requestPath = usingResponsesAPI ? "responses" : "chat/completions";
this.logger.LogInformation("Using the system prompt role '{SystemPromptRole}' and the '{RequestPath}' API for model '{ChatModelId}'.", systemPromptRole, requestPath, chatModel.Id);
// Prepare the system prompt:
var systemPrompt = new Message
{
Role = systemPromptRole,
Content = chatThread.PrepareSystemPrompt(settingsManager, chatThread, this.logger),
};
// Prepare the OpenAI HTTP chat request:
var openAIChatRequest = JsonSerializer.Serialize(new ChatCompletionAPIRequest
//
// Prepare the tools we want to use:
//
IList<Tool> tools = modelCapabilities.Contains(Capability.WEB_SEARCH) switch
{
Model = chatModel.Id,
// Build the messages:
// - First of all the system prompt
// - Then none-empty user and AI messages
Messages = [systemPrompt, ..chatThread.Blocks.Where(n => n.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace((n.Content as ContentText)?.Text)).Select(n => new Message
true => [ Tools.WEB_SEARCH ],
_ => []
};
//
// Create the request: either for the Responses API or the Chat Completion API
//
var openAIChatRequest = usingResponsesAPI switch
{
// Chat Completion API request:
false => JsonSerializer.Serialize(new ChatCompletionAPIRequest
{
Role = n.Role switch
{
ChatRole.USER => "user",
ChatRole.AI => "assistant",
ChatRole.AGENT => "assistant",
ChatRole.SYSTEM => systemPromptRole,
_ => "user",
},
Content = n.Content switch
{
ContentText text => text.Text,
_ => string.Empty,
}
}).ToList()],
Model = chatModel.Id,
// Right now, we only support streaming completions:
Stream = true,
}, JSON_SERIALIZER_OPTIONS);
// Build the messages:
// - First of all the system prompt
// - Then none-empty user and AI messages
Messages = [systemPrompt, ..chatThread.Blocks.Where(n => n.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace((n.Content as ContentText)?.Text)).Select(n => new Message
{
Role = n.Role switch
{
ChatRole.USER => "user",
ChatRole.AI => "assistant",
ChatRole.AGENT => "assistant",
ChatRole.SYSTEM => systemPromptRole,
_ => "user",
},
Content = n.Content switch
{
ContentText text => text.Text,
_ => string.Empty,
}
}).ToList()],
// Right now, we only support streaming completions:
Stream = true,
}, JSON_SERIALIZER_OPTIONS),
// Responses API request:
true => JsonSerializer.Serialize(new ResponsesAPIRequest
{
Model = chatModel.Id,
// Build the messages:
// - First of all the system prompt
// - Then none-empty user and AI messages
Input = [systemPrompt, ..chatThread.Blocks.Where(n => n.ContentType is ContentType.TEXT && !string.IsNullOrWhiteSpace((n.Content as ContentText)?.Text)).Select(n => new Message
{
Role = n.Role switch
{
ChatRole.USER => "user",
ChatRole.AI => "assistant",
ChatRole.AGENT => "assistant",
ChatRole.SYSTEM => systemPromptRole,
_ => "user",
},
Content = n.Content switch
{
ContentText text => text.Text,
_ => string.Empty,
}
}).ToList()],
// Right now, we only support streaming completions:
Stream = true,
// We do not want to store any data on OpenAI's servers:
Store = false,
// Tools we want to use:
Tools = tools,
}, JSON_SERIALIZER_OPTIONS),
};
async Task<HttpRequestMessage> RequestBuilder()
{
// Build the HTTP post request:
var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions");
var request = new HttpRequestMessage(HttpMethod.Post, requestPath);
// Set the authorization header:
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION));
@ -108,18 +170,24 @@ public sealed class ProviderOpenAI(ILogger logger) : BaseProvider("https://api.o
request.Content = new StringContent(openAIChatRequest, Encoding.UTF8, "application/json");
return request;
}
if (usingResponsesAPI)
await foreach (var content in this.StreamResponsesInternal<ResponsesDeltaStreamLine, ResponsesAnnotationStreamLine>("OpenAI", RequestBuilder, token))
yield return content;
await foreach (var content in this.StreamChatCompletionInternal<ResponseStreamLine>("OpenAI", RequestBuilder, token))
yield return content;
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionResponseStreamLine>("OpenAI", RequestBuilder, token))
else
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionDeltaStreamLine, ChatCompletionAnnotationStreamLine>("OpenAI", RequestBuilder, token))
yield return content;
}
#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously
/// <inheritdoc />
public override async IAsyncEnumerable<ImageURL> StreamImageCompletion(Model imageModel, string promptPositive, string promptNegative = FilterOperator.String.Empty, ImageURL referenceImageURL = default, [EnumeratorCancellation] CancellationToken token = default)
{
yield break;
}
#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
/// <inheritdoc />
@ -213,7 +281,7 @@ public sealed class ProviderOpenAI(ILogger logger) : BaseProvider("https://api.o
Capability.RESPONSES_API,
];
if (modelName.StartsWith("o4-mini") || modelName.StartsWith("o1") || modelName.StartsWith("o3"))
if (modelName.StartsWith("o4-mini") || modelName.StartsWith("o3"))
return
[
Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT,
@ -224,6 +292,16 @@ public sealed class ProviderOpenAI(ILogger logger) : BaseProvider("https://api.o
Capability.RESPONSES_API,
];
if (modelName.StartsWith("o1"))
return
[
Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT,
Capability.TEXT_OUTPUT,
Capability.ALWAYS_REASONING, Capability.FUNCTION_CALLING,
Capability.RESPONSES_API,
];
if(modelName.StartsWith("gpt-4-turbo"))
return
[
@ -242,6 +320,16 @@ public sealed class ProviderOpenAI(ILogger logger) : BaseProvider("https://api.o
Capability.RESPONSES_API,
];
if(modelName.StartsWith("gpt-5-nano"))
return
[
Capability.TEXT_INPUT, Capability.MULTIPLE_IMAGE_INPUT,
Capability.TEXT_OUTPUT,
Capability.FUNCTION_CALLING, Capability.ALWAYS_REASONING,
Capability.RESPONSES_API,
];
if(modelName is "gpt-5" || modelName.StartsWith("gpt-5-"))
return
[

View File

@ -0,0 +1,21 @@
namespace AIStudio.Provider.OpenAI;
/// <summary>
/// The request body for the Responses API.
/// </summary>
/// <param name="Model">Which model to use.</param>
/// <param name="Input">The chat messages.</param>
/// <param name="Stream">Whether to stream the response.</param>
/// <param name="Store">Whether to store the response on the server (usually OpenAI's infrastructure).</param>
/// <param name="Tools">The tools to use for the request.</param>
public record ResponsesAPIRequest(
string Model,
IList<Message> Input,
bool Stream,
bool Store,
IList<Tool> Tools)
{
public ResponsesAPIRequest() : this(string.Empty, [], true, false, [])
{
}
}

View File

@ -0,0 +1,16 @@
namespace AIStudio.Provider.OpenAI;
/// <summary>
/// Data structure for URL citation annotations in the OpenAI Responses API.
/// </summary>
/// <param name="Type">The type of annotation, typically "url_citation".</param>
/// <param name="EndIndex">The end index of the annotated text in the response.</param>
/// <param name="StartIndex">The start index of the annotated text in the response.</param>
/// <param name="Title">The title of the cited URL.</param>
/// <param name="URL">The URL being cited.</param>
public sealed record ResponsesAnnotatingUrlCitationData(
string Type,
int EndIndex,
int StartIndex,
string Title,
string URL) : Annotation(Type);

View File

@ -0,0 +1,45 @@
namespace AIStudio.Provider.OpenAI;
/// <summary>
/// Data structure for a line in the response stream of the Responses API, containing an annotation.
/// </summary>
/// <param name="Type">The type of the annotation.</param>
/// <param name="AnnotationIndex">The continuous index of the annotation in the response.</param>
/// <param name="Annotation">The annotation details.</param>
public sealed record ResponsesAnnotationStreamLine(string Type, int AnnotationIndex, Annotation Annotation) : IAnnotationStreamLine
{
#region Implementation of IAnnotationStreamLine
/// <inheritdoc />
public bool ContainsSources()
{
return this.Annotation is not AnnotatingUnknown;
}
/// <inheritdoc />
public IList<ISource> GetSources()
{
//
// Check for the unexpected annotation type of the chat completion API.
//
// This seems weird at first. But there are two possibilities why this could happen:
// - Anyone of the open source providers such as ollama, LM Studio, etc. could
// implement and use the chat completion API data structures for annotations in their
// Responses API endpoint.
//
// - Our custom JSON converter checks for all possible annotation data types. So,
// when the streamed data is valid for any annotation type, it will be deserialized
// into that type, even though we are calling the Responses API endpoint.
//
if (this.Annotation is ChatCompletionAnnotatingURL urlAnnotation)
return [new Source(urlAnnotation.UrlCitation.Title, urlAnnotation.UrlCitation.URL)];
// Check for the expected annotation type of the Responses API:
if (this.Annotation is ResponsesAnnotatingUrlCitationData urlCitationData)
return [new Source(urlCitationData.Title, urlCitationData.URL)];
return [];
}
#endregion
}

View File

@ -0,0 +1,39 @@
namespace AIStudio.Provider.OpenAI;
/// <summary>
/// Data model for a delta line in the Response API chat completion stream.
/// </summary>
/// <param name="Type">The type of the response.</param>
/// <param name="Delta">The delta content of the response.</param>
public record ResponsesDeltaStreamLine(
string Type,
string Delta) : IResponseStreamLine
{
#region Implementation of IResponseStreamLine
/// <inheritdoc />
public bool ContainsContent() => !string.IsNullOrWhiteSpace(this.Delta);
/// <inheritdoc />
public ContentStreamChunk GetContent() => new(this.Delta, this.GetSources());
//
// Please note that there are multiple options where LLM providers might stream sources:
//
// - As part of the delta content while streaming. That would be part of this class.
// - By using a dedicated stream event and data structure. That would be another class implementing IResponseStreamLine.
//
// Right now, OpenAI uses the latter approach, so we don't have any sources here. And
// because no other provider does it yet, we don't have any implementation here either.
//
// One example where sources are part of the delta content is the Perplexity provider.
//
/// <inheritdoc />
public bool ContainsSources() => false;
/// <inheritdoc />
public IList<ISource> GetSources() => [];
#endregion
}

View File

@ -0,0 +1,12 @@
namespace AIStudio.Provider.OpenAI;
/// <summary>
/// Represents a tool used by the AI model.
/// </summary>
/// <remarks>
/// Right now, only our OpenAI provider is using tools. Thus, this class is located in the
/// OpenAI namespace. In the future, when other providers also support tools, this class can
/// be moved into the provider namespace.
/// </remarks>
/// <param name="Type">The type of the tool.</param>
public record Tool(string Type);

View File

@ -0,0 +1,14 @@
namespace AIStudio.Provider.OpenAI;
/// <summary>
/// Known tools for LLM providers.
/// </summary>
/// <remarks>
/// Right now, only our OpenAI provider is using tools. Thus, this class is located in the
/// OpenAI namespace. In the future, when other providers also support tools, this class can
/// be moved into the provider namespace.
/// </remarks>
public static class Tools
{
public static readonly Tool WEB_SEARCH = new("web_search");
}

View File

@ -85,7 +85,7 @@ public sealed class ProviderPerplexity(ILogger logger) : BaseProvider("https://a
return request;
}
await foreach (var content in this.StreamChatCompletionInternal<ResponseStreamLine>("Perplexity", RequestBuilder, token))
await foreach (var content in this.StreamChatCompletionInternal<ResponseStreamLine, NoChatCompletionAnnotationStreamLine>("Perplexity", RequestBuilder, token))
yield return content;
}

View File

@ -75,7 +75,7 @@ public sealed class ProviderSelfHosted(ILogger logger, Host host, string hostnam
return request;
}
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionResponseStreamLine>("self-hosted provider", RequestBuilder, token))
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionDeltaStreamLine, ChatCompletionAnnotationStreamLine>("self-hosted provider", RequestBuilder, token))
yield return content;
}

View File

@ -78,7 +78,7 @@ public sealed class ProviderX(ILogger logger) : BaseProvider("https://api.x.ai/v
return request;
}
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionResponseStreamLine>("xAI", RequestBuilder, token))
await foreach (var content in this.StreamChatCompletionInternal<ChatCompletionDeltaStreamLine, NoChatCompletionAnnotationStreamLine>("xAI", RequestBuilder, token))
yield return content;
}