diff --git a/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs b/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs index dc9767dc..7c85ff16 100644 --- a/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs +++ b/app/MindWork AI Studio/Provider/Anthropic/ProviderAnthropic.cs @@ -58,26 +58,33 @@ public sealed class ProviderAnthropic(ILogger logger) : BaseProvider("https://ap // Right now, we only support streaming completions: Stream = true, }, JSON_SERIALIZER_OPTIONS); - - // Build the HTTP post request: - var request = new HttpRequestMessage(HttpMethod.Post, "messages"); - - // Set the authorization header: - request.Headers.Add("x-api-key", await requestedSecret.Secret.Decrypt(ENCRYPTION)); - - // Set the Anthropic version: - request.Headers.Add("anthropic-version", "2023-06-01"); - - // Set the content: - request.Content = new StringContent(chatRequest, Encoding.UTF8, "application/json"); - - // Send the request with the ResponseHeadersRead option. - // This allows us to read the stream as soon as the headers are received. - // This is important because we want to stream the responses. - var response = await this.httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, token); + + async Task RequestBuilder() + { + // Build the HTTP post request: + var request = new HttpRequestMessage(HttpMethod.Post, "messages"); + + // Set the authorization header: + request.Headers.Add("x-api-key", await requestedSecret.Secret.Decrypt(ENCRYPTION)); + + // Set the Anthropic version: + request.Headers.Add("anthropic-version", "2023-06-01"); + + // Set the content: + request.Content = new StringContent(chatRequest, Encoding.UTF8, "application/json"); + return request; + } + + // Send the request using exponential backoff: + using var responseData = await this.SendRequest(RequestBuilder, token); + if(responseData.IsFailedAfterAllRetries) + { + this.logger.LogError($"Anthropic chat completion failed: {responseData.ErrorMessage}"); + yield break; + } // Open the response stream: - var stream = await response.Content.ReadAsStreamAsync(token); + var stream = await responseData.Response!.Content.ReadAsStreamAsync(token); // Add a stream reader to read the stream, line by line: var streamReader = new StreamReader(stream); diff --git a/app/MindWork AI Studio/Provider/BaseProvider.cs b/app/MindWork AI Studio/Provider/BaseProvider.cs index e52d0c20..32ee6853 100644 --- a/app/MindWork AI Studio/Provider/BaseProvider.cs +++ b/app/MindWork AI Studio/Provider/BaseProvider.cs @@ -74,4 +74,47 @@ public abstract class BaseProvider : IProvider, ISecretId public string SecretName => this.InstanceName; #endregion + + /// + /// Sends a request and handles rate limiting by exponential backoff. + /// + /// A function that builds the request. + /// The cancellation token. + /// The status object of the request. + protected async Task SendRequest(Func> requestBuilder, CancellationToken token = default) + { + const int MAX_RETRIES = 6; + const double RETRY_DELAY_SECONDS = 4; + + var retry = 0; + var response = default(HttpResponseMessage); + var errorMessage = string.Empty; + while (retry++ < MAX_RETRIES) + { + using var request = await requestBuilder(); + + // Send the request with the ResponseHeadersRead option. + // This allows us to read the stream as soon as the headers are received. + // This is important because we want to stream the responses. + var nextResponse = await this.httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, token); + if (nextResponse.IsSuccessStatusCode) + { + response = nextResponse; + break; + } + + errorMessage = nextResponse.ReasonPhrase; + var timeSeconds = Math.Pow(RETRY_DELAY_SECONDS, retry + 1); + if(timeSeconds > 90) + timeSeconds = 90; + + this.logger.LogDebug($"Failed request with status code {nextResponse.StatusCode} (message = '{errorMessage}'). Retrying in {timeSeconds:0.00} seconds."); + await Task.Delay(TimeSpan.FromSeconds(timeSeconds), token); + } + + if(retry >= MAX_RETRIES) + return new HttpRateLimitedStreamResult(false, true, errorMessage ?? $"Failed after {MAX_RETRIES} retries; no provider message available", response); + + return new HttpRateLimitedStreamResult(true, false, string.Empty, response); + } } \ No newline at end of file diff --git a/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs b/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs index 709aad15..3ab0c500 100644 --- a/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs +++ b/app/MindWork AI Studio/Provider/Fireworks/ProviderFireworks.cs @@ -68,22 +68,29 @@ public class ProviderFireworks(ILogger logger) : BaseProvider("https://api.firew Stream = true, }, JSON_SERIALIZER_OPTIONS); - // Build the HTTP post request: - var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions"); - - // Set the authorization header: - request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION)); - - // Set the content: - request.Content = new StringContent(fireworksChatRequest, Encoding.UTF8, "application/json"); - - // Send the request with the ResponseHeadersRead option. - // This allows us to read the stream as soon as the headers are received. - // This is important because we want to stream the responses. - var response = await this.httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, token); + async Task RequestBuilder() + { + // Build the HTTP post request: + var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions"); + + // Set the authorization header: + request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION)); + + // Set the content: + request.Content = new StringContent(fireworksChatRequest, Encoding.UTF8, "application/json"); + return request; + } + + // Send the request using exponential backoff: + using var responseData = await this.SendRequest(RequestBuilder, token); + if(responseData.IsFailedAfterAllRetries) + { + this.logger.LogError($"Fireworks chat completion failed: {responseData.ErrorMessage}"); + yield break; + } // Open the response stream: - var fireworksStream = await response.Content.ReadAsStreamAsync(token); + var fireworksStream = await responseData.Response!.Content.ReadAsStreamAsync(token); // Add a stream reader to read the stream, line by line: var streamReader = new StreamReader(fireworksStream); diff --git a/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs b/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs index 6ca6d923..e3ec1733 100644 --- a/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs +++ b/app/MindWork AI Studio/Provider/Google/ProviderGoogle.cs @@ -69,22 +69,29 @@ public class ProviderGoogle(ILogger logger) : BaseProvider("https://generativela Stream = true, }, JSON_SERIALIZER_OPTIONS); - // Build the HTTP post request: - var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions"); - - // Set the authorization header: - request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION)); - - // Set the content: - request.Content = new StringContent(geminiChatRequest, Encoding.UTF8, "application/json"); - - // Send the request with the ResponseHeadersRead option. - // This allows us to read the stream as soon as the headers are received. - // This is important because we want to stream the responses. - var response = await this.httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, token); + async Task RequestBuilder() + { + // Build the HTTP post request: + var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions"); + + // Set the authorization header: + request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION)); + + // Set the content: + request.Content = new StringContent(geminiChatRequest, Encoding.UTF8, "application/json"); + return request; + } + + // Send the request using exponential backoff: + using var responseData = await this.SendRequest(RequestBuilder, token); + if(responseData.IsFailedAfterAllRetries) + { + this.logger.LogError($"Google chat completion failed: {responseData.ErrorMessage}"); + yield break; + } // Open the response stream: - var geminiStream = await response.Content.ReadAsStreamAsync(token); + var geminiStream = await responseData.Response!.Content.ReadAsStreamAsync(token); // Add a stream reader to read the stream, line by line: var streamReader = new StreamReader(geminiStream); diff --git a/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs b/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs index 477f9a0f..5d0fed83 100644 --- a/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs +++ b/app/MindWork AI Studio/Provider/Groq/ProviderGroq.cs @@ -71,22 +71,29 @@ public class ProviderGroq(ILogger logger) : BaseProvider("https://api.groq.com/o Stream = true, }, JSON_SERIALIZER_OPTIONS); - // Build the HTTP post request: - var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions"); - - // Set the authorization header: - request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION)); - - // Set the content: - request.Content = new StringContent(groqChatRequest, Encoding.UTF8, "application/json"); - - // Send the request with the ResponseHeadersRead option. - // This allows us to read the stream as soon as the headers are received. - // This is important because we want to stream the responses. - var response = await this.httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, token); + async Task RequestBuilder() + { + // Build the HTTP post request: + var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions"); + + // Set the authorization header: + request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION)); + + // Set the content: + request.Content = new StringContent(groqChatRequest, Encoding.UTF8, "application/json"); + return request; + } + + // Send the request using exponential backoff: + using var responseData = await this.SendRequest(RequestBuilder, token); + if(responseData.IsFailedAfterAllRetries) + { + this.logger.LogError($"Groq chat completion failed: {responseData.ErrorMessage}"); + yield break; + } // Open the response stream: - var groqStream = await response.Content.ReadAsStreamAsync(token); + var groqStream = await responseData.Response!.Content.ReadAsStreamAsync(token); // Add a stream reader to read the stream, line by line: var streamReader = new StreamReader(groqStream); diff --git a/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs b/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs index 633fa94b..b51778ab 100644 --- a/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs +++ b/app/MindWork AI Studio/Provider/Mistral/ProviderMistral.cs @@ -70,22 +70,29 @@ public sealed class ProviderMistral(ILogger logger) : BaseProvider("https://api. SafePrompt = false, }, JSON_SERIALIZER_OPTIONS); - // Build the HTTP post request: - var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions"); - - // Set the authorization header: - request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION)); - - // Set the content: - request.Content = new StringContent(mistralChatRequest, Encoding.UTF8, "application/json"); - - // Send the request with the ResponseHeadersRead option. - // This allows us to read the stream as soon as the headers are received. - // This is important because we want to stream the responses. - var response = await this.httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, token); + async Task RequestBuilder() + { + // Build the HTTP post request: + var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions"); + + // Set the authorization header: + request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION)); + + // Set the content: + request.Content = new StringContent(mistralChatRequest, Encoding.UTF8, "application/json"); + return request; + } + + // Send the request using exponential backoff: + using var responseData = await this.SendRequest(RequestBuilder, token); + if(responseData.IsFailedAfterAllRetries) + { + this.logger.LogError($"Mistral chat completion failed: {responseData.ErrorMessage}"); + yield break; + } // Open the response stream: - var mistralStream = await response.Content.ReadAsStreamAsync(token); + var mistralStream = await responseData.Response!.Content.ReadAsStreamAsync(token); // Add a stream reader to read the stream, line by line: var streamReader = new StreamReader(mistralStream); diff --git a/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs b/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs index 2f1c25ad..767d4fe1 100644 --- a/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs +++ b/app/MindWork AI Studio/Provider/OpenAI/ProviderOpenAI.cs @@ -74,22 +74,29 @@ public sealed class ProviderOpenAI(ILogger logger) : BaseProvider("https://api.o FrequencyPenalty = 0f, }, JSON_SERIALIZER_OPTIONS); - // Build the HTTP post request: - var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions"); - - // Set the authorization header: - request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION)); - - // Set the content: - request.Content = new StringContent(openAIChatRequest, Encoding.UTF8, "application/json"); - - // Send the request with the ResponseHeadersRead option. - // This allows us to read the stream as soon as the headers are received. - // This is important because we want to stream the responses. - var response = await this.httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, token); + async Task RequestBuilder() + { + // Build the HTTP post request: + var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions"); + + // Set the authorization header: + request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION)); + + // Set the content: + request.Content = new StringContent(openAIChatRequest, Encoding.UTF8, "application/json"); + return request; + } + // Send the request using exponential backoff: + using var responseData = await this.SendRequest(RequestBuilder, token); + if(responseData.IsFailedAfterAllRetries) + { + this.logger.LogError($"OpenAI chat completion failed: {responseData.ErrorMessage}"); + yield break; + } + // Open the response stream: - var openAIStream = await response.Content.ReadAsStreamAsync(token); + var openAIStream = await responseData.Response!.Content.ReadAsStreamAsync(token); // Add a stream reader to read the stream, line by line: var streamReader = new StreamReader(openAIStream); diff --git a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs index 46958e96..ec81247b 100644 --- a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs +++ b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs @@ -69,23 +69,30 @@ public sealed class ProviderSelfHosted(ILogger logger, Host host, string hostnam StreamReader? streamReader = default; try { - // Build the HTTP post request: - var request = new HttpRequestMessage(HttpMethod.Post, host.ChatURL()); + async Task RequestBuilder() + { + // Build the HTTP post request: + var request = new HttpRequestMessage(HttpMethod.Post, host.ChatURL()); - // Set the authorization header: - if (requestedSecret.Success) - request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION)); + // Set the authorization header: + if (requestedSecret.Success) + request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION)); - // Set the content: - request.Content = new StringContent(providerChatRequest, Encoding.UTF8, "application/json"); - - // Send the request with the ResponseHeadersRead option. - // This allows us to read the stream as soon as the headers are received. - // This is important because we want to stream the responses. - var response = await this.httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, token); + // Set the content: + request.Content = new StringContent(providerChatRequest, Encoding.UTF8, "application/json"); + return request; + } + + // Send the request using exponential backoff: + using var responseData = await this.SendRequest(RequestBuilder, token); + if(responseData.IsFailedAfterAllRetries) + { + this.logger.LogError($"Self-hosted provider's chat completion failed: {responseData.ErrorMessage}"); + yield break; + } // Open the response stream: - var providerStream = await response.Content.ReadAsStreamAsync(token); + var providerStream = await responseData.Response!.Content.ReadAsStreamAsync(token); // Add a stream reader to read the stream, line by line: streamReader = new StreamReader(providerStream); diff --git a/app/MindWork AI Studio/Tools/HttpRateLimitedStreamResult.cs b/app/MindWork AI Studio/Tools/HttpRateLimitedStreamResult.cs new file mode 100644 index 00000000..1e02c86a --- /dev/null +++ b/app/MindWork AI Studio/Tools/HttpRateLimitedStreamResult.cs @@ -0,0 +1,23 @@ +namespace AIStudio.Tools; + +/// +/// The result of a rate-limited HTTP stream. +/// +/// True, when the stream failed after all retries. +/// The error message which we might show to the user. +/// The response from the server. +public readonly record struct HttpRateLimitedStreamResult( + bool IsSuccessful, + bool IsFailedAfterAllRetries, + string ErrorMessage, + HttpResponseMessage? Response) : IDisposable +{ + #region IDisposable + + public void Dispose() + { + this.Response?.Dispose(); + } + + #endregion +} \ No newline at end of file diff --git a/app/MindWork AI Studio/wwwroot/changelog/v0.9.23.md b/app/MindWork AI Studio/wwwroot/changelog/v0.9.23.md index 965a4558..286a40e0 100644 --- a/app/MindWork AI Studio/wwwroot/changelog/v0.9.23.md +++ b/app/MindWork AI Studio/wwwroot/changelog/v0.9.23.md @@ -1,3 +1,4 @@ # v0.9.23, build 198 (2024-12-xx xx:xx UTC) - Added an ERI server coding assistant as a preview feature behind the RAG feature flag. This helps you implement an ERI server to gain access to, e.g., your enterprise data from within AI Studio. +- Improved provider requests by handling rate limits by retrying requests. - Fixed layout issues when selecting `other` items (e.g., programming languages) \ No newline at end of file