mirror of
https://github.com/MindWorkAI/AI-Studio.git
synced 2025-04-28 21:39:46 +00:00
Implemented rate limit handling for providers
This commit is contained in:
parent
e932c21709
commit
45ca0cdb23
@ -58,26 +58,33 @@ public sealed class ProviderAnthropic(ILogger logger) : BaseProvider("https://ap
|
|||||||
// Right now, we only support streaming completions:
|
// Right now, we only support streaming completions:
|
||||||
Stream = true,
|
Stream = true,
|
||||||
}, JSON_SERIALIZER_OPTIONS);
|
}, JSON_SERIALIZER_OPTIONS);
|
||||||
|
|
||||||
// Build the HTTP post request:
|
async Task<HttpRequestMessage> RequestBuilder()
|
||||||
var request = new HttpRequestMessage(HttpMethod.Post, "messages");
|
{
|
||||||
|
// Build the HTTP post request:
|
||||||
// Set the authorization header:
|
var request = new HttpRequestMessage(HttpMethod.Post, "messages");
|
||||||
request.Headers.Add("x-api-key", await requestedSecret.Secret.Decrypt(ENCRYPTION));
|
|
||||||
|
// Set the authorization header:
|
||||||
// Set the Anthropic version:
|
request.Headers.Add("x-api-key", await requestedSecret.Secret.Decrypt(ENCRYPTION));
|
||||||
request.Headers.Add("anthropic-version", "2023-06-01");
|
|
||||||
|
// Set the Anthropic version:
|
||||||
// Set the content:
|
request.Headers.Add("anthropic-version", "2023-06-01");
|
||||||
request.Content = new StringContent(chatRequest, Encoding.UTF8, "application/json");
|
|
||||||
|
// Set the content:
|
||||||
// Send the request with the ResponseHeadersRead option.
|
request.Content = new StringContent(chatRequest, Encoding.UTF8, "application/json");
|
||||||
// This allows us to read the stream as soon as the headers are received.
|
return request;
|
||||||
// This is important because we want to stream the responses.
|
}
|
||||||
var response = await this.httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, token);
|
|
||||||
|
// Send the request using exponential backoff:
|
||||||
|
using var responseData = await this.SendRequest(RequestBuilder, token);
|
||||||
|
if(responseData.IsFailedAfterAllRetries)
|
||||||
|
{
|
||||||
|
this.logger.LogError($"Anthropic chat completion failed: {responseData.ErrorMessage}");
|
||||||
|
yield break;
|
||||||
|
}
|
||||||
|
|
||||||
// Open the response stream:
|
// Open the response stream:
|
||||||
var stream = await response.Content.ReadAsStreamAsync(token);
|
var stream = await responseData.Response!.Content.ReadAsStreamAsync(token);
|
||||||
|
|
||||||
// Add a stream reader to read the stream, line by line:
|
// Add a stream reader to read the stream, line by line:
|
||||||
var streamReader = new StreamReader(stream);
|
var streamReader = new StreamReader(stream);
|
||||||
|
@ -74,4 +74,47 @@ public abstract class BaseProvider : IProvider, ISecretId
|
|||||||
public string SecretName => this.InstanceName;
|
public string SecretName => this.InstanceName;
|
||||||
|
|
||||||
#endregion
|
#endregion
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Sends a request and handles rate limiting by exponential backoff.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="requestBuilder">A function that builds the request.</param>
|
||||||
|
/// <param name="token">The cancellation token.</param>
|
||||||
|
/// <returns>The status object of the request.</returns>
|
||||||
|
protected async Task<HttpRateLimitedStreamResult> SendRequest(Func<Task<HttpRequestMessage>> requestBuilder, CancellationToken token = default)
|
||||||
|
{
|
||||||
|
const int MAX_RETRIES = 6;
|
||||||
|
const double RETRY_DELAY_SECONDS = 4;
|
||||||
|
|
||||||
|
var retry = 0;
|
||||||
|
var response = default(HttpResponseMessage);
|
||||||
|
var errorMessage = string.Empty;
|
||||||
|
while (retry++ < MAX_RETRIES)
|
||||||
|
{
|
||||||
|
using var request = await requestBuilder();
|
||||||
|
|
||||||
|
// Send the request with the ResponseHeadersRead option.
|
||||||
|
// This allows us to read the stream as soon as the headers are received.
|
||||||
|
// This is important because we want to stream the responses.
|
||||||
|
var nextResponse = await this.httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, token);
|
||||||
|
if (nextResponse.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
response = nextResponse;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
errorMessage = nextResponse.ReasonPhrase;
|
||||||
|
var timeSeconds = Math.Pow(RETRY_DELAY_SECONDS, retry + 1);
|
||||||
|
if(timeSeconds > 90)
|
||||||
|
timeSeconds = 90;
|
||||||
|
|
||||||
|
this.logger.LogDebug($"Failed request with status code {nextResponse.StatusCode} (message = '{errorMessage}'). Retrying in {timeSeconds:0.00} seconds.");
|
||||||
|
await Task.Delay(TimeSpan.FromSeconds(timeSeconds), token);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(retry >= MAX_RETRIES)
|
||||||
|
return new HttpRateLimitedStreamResult(false, true, errorMessage ?? $"Failed after {MAX_RETRIES} retries; no provider message available", response);
|
||||||
|
|
||||||
|
return new HttpRateLimitedStreamResult(true, false, string.Empty, response);
|
||||||
|
}
|
||||||
}
|
}
|
@ -68,22 +68,29 @@ public class ProviderFireworks(ILogger logger) : BaseProvider("https://api.firew
|
|||||||
Stream = true,
|
Stream = true,
|
||||||
}, JSON_SERIALIZER_OPTIONS);
|
}, JSON_SERIALIZER_OPTIONS);
|
||||||
|
|
||||||
// Build the HTTP post request:
|
async Task<HttpRequestMessage> RequestBuilder()
|
||||||
var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions");
|
{
|
||||||
|
// Build the HTTP post request:
|
||||||
// Set the authorization header:
|
var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions");
|
||||||
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION));
|
|
||||||
|
// Set the authorization header:
|
||||||
// Set the content:
|
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION));
|
||||||
request.Content = new StringContent(fireworksChatRequest, Encoding.UTF8, "application/json");
|
|
||||||
|
// Set the content:
|
||||||
// Send the request with the ResponseHeadersRead option.
|
request.Content = new StringContent(fireworksChatRequest, Encoding.UTF8, "application/json");
|
||||||
// This allows us to read the stream as soon as the headers are received.
|
return request;
|
||||||
// This is important because we want to stream the responses.
|
}
|
||||||
var response = await this.httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, token);
|
|
||||||
|
// Send the request using exponential backoff:
|
||||||
|
using var responseData = await this.SendRequest(RequestBuilder, token);
|
||||||
|
if(responseData.IsFailedAfterAllRetries)
|
||||||
|
{
|
||||||
|
this.logger.LogError($"Fireworks chat completion failed: {responseData.ErrorMessage}");
|
||||||
|
yield break;
|
||||||
|
}
|
||||||
|
|
||||||
// Open the response stream:
|
// Open the response stream:
|
||||||
var fireworksStream = await response.Content.ReadAsStreamAsync(token);
|
var fireworksStream = await responseData.Response!.Content.ReadAsStreamAsync(token);
|
||||||
|
|
||||||
// Add a stream reader to read the stream, line by line:
|
// Add a stream reader to read the stream, line by line:
|
||||||
var streamReader = new StreamReader(fireworksStream);
|
var streamReader = new StreamReader(fireworksStream);
|
||||||
|
@ -69,22 +69,29 @@ public class ProviderGoogle(ILogger logger) : BaseProvider("https://generativela
|
|||||||
Stream = true,
|
Stream = true,
|
||||||
}, JSON_SERIALIZER_OPTIONS);
|
}, JSON_SERIALIZER_OPTIONS);
|
||||||
|
|
||||||
// Build the HTTP post request:
|
async Task<HttpRequestMessage> RequestBuilder()
|
||||||
var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions");
|
{
|
||||||
|
// Build the HTTP post request:
|
||||||
// Set the authorization header:
|
var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions");
|
||||||
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION));
|
|
||||||
|
// Set the authorization header:
|
||||||
// Set the content:
|
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION));
|
||||||
request.Content = new StringContent(geminiChatRequest, Encoding.UTF8, "application/json");
|
|
||||||
|
// Set the content:
|
||||||
// Send the request with the ResponseHeadersRead option.
|
request.Content = new StringContent(geminiChatRequest, Encoding.UTF8, "application/json");
|
||||||
// This allows us to read the stream as soon as the headers are received.
|
return request;
|
||||||
// This is important because we want to stream the responses.
|
}
|
||||||
var response = await this.httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, token);
|
|
||||||
|
// Send the request using exponential backoff:
|
||||||
|
using var responseData = await this.SendRequest(RequestBuilder, token);
|
||||||
|
if(responseData.IsFailedAfterAllRetries)
|
||||||
|
{
|
||||||
|
this.logger.LogError($"Google chat completion failed: {responseData.ErrorMessage}");
|
||||||
|
yield break;
|
||||||
|
}
|
||||||
|
|
||||||
// Open the response stream:
|
// Open the response stream:
|
||||||
var geminiStream = await response.Content.ReadAsStreamAsync(token);
|
var geminiStream = await responseData.Response!.Content.ReadAsStreamAsync(token);
|
||||||
|
|
||||||
// Add a stream reader to read the stream, line by line:
|
// Add a stream reader to read the stream, line by line:
|
||||||
var streamReader = new StreamReader(geminiStream);
|
var streamReader = new StreamReader(geminiStream);
|
||||||
|
@ -71,22 +71,29 @@ public class ProviderGroq(ILogger logger) : BaseProvider("https://api.groq.com/o
|
|||||||
Stream = true,
|
Stream = true,
|
||||||
}, JSON_SERIALIZER_OPTIONS);
|
}, JSON_SERIALIZER_OPTIONS);
|
||||||
|
|
||||||
// Build the HTTP post request:
|
async Task<HttpRequestMessage> RequestBuilder()
|
||||||
var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions");
|
{
|
||||||
|
// Build the HTTP post request:
|
||||||
// Set the authorization header:
|
var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions");
|
||||||
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION));
|
|
||||||
|
// Set the authorization header:
|
||||||
// Set the content:
|
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION));
|
||||||
request.Content = new StringContent(groqChatRequest, Encoding.UTF8, "application/json");
|
|
||||||
|
// Set the content:
|
||||||
// Send the request with the ResponseHeadersRead option.
|
request.Content = new StringContent(groqChatRequest, Encoding.UTF8, "application/json");
|
||||||
// This allows us to read the stream as soon as the headers are received.
|
return request;
|
||||||
// This is important because we want to stream the responses.
|
}
|
||||||
var response = await this.httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, token);
|
|
||||||
|
// Send the request using exponential backoff:
|
||||||
|
using var responseData = await this.SendRequest(RequestBuilder, token);
|
||||||
|
if(responseData.IsFailedAfterAllRetries)
|
||||||
|
{
|
||||||
|
this.logger.LogError($"Groq chat completion failed: {responseData.ErrorMessage}");
|
||||||
|
yield break;
|
||||||
|
}
|
||||||
|
|
||||||
// Open the response stream:
|
// Open the response stream:
|
||||||
var groqStream = await response.Content.ReadAsStreamAsync(token);
|
var groqStream = await responseData.Response!.Content.ReadAsStreamAsync(token);
|
||||||
|
|
||||||
// Add a stream reader to read the stream, line by line:
|
// Add a stream reader to read the stream, line by line:
|
||||||
var streamReader = new StreamReader(groqStream);
|
var streamReader = new StreamReader(groqStream);
|
||||||
|
@ -70,22 +70,29 @@ public sealed class ProviderMistral(ILogger logger) : BaseProvider("https://api.
|
|||||||
SafePrompt = false,
|
SafePrompt = false,
|
||||||
}, JSON_SERIALIZER_OPTIONS);
|
}, JSON_SERIALIZER_OPTIONS);
|
||||||
|
|
||||||
// Build the HTTP post request:
|
async Task<HttpRequestMessage> RequestBuilder()
|
||||||
var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions");
|
{
|
||||||
|
// Build the HTTP post request:
|
||||||
// Set the authorization header:
|
var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions");
|
||||||
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION));
|
|
||||||
|
// Set the authorization header:
|
||||||
// Set the content:
|
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION));
|
||||||
request.Content = new StringContent(mistralChatRequest, Encoding.UTF8, "application/json");
|
|
||||||
|
// Set the content:
|
||||||
// Send the request with the ResponseHeadersRead option.
|
request.Content = new StringContent(mistralChatRequest, Encoding.UTF8, "application/json");
|
||||||
// This allows us to read the stream as soon as the headers are received.
|
return request;
|
||||||
// This is important because we want to stream the responses.
|
}
|
||||||
var response = await this.httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, token);
|
|
||||||
|
// Send the request using exponential backoff:
|
||||||
|
using var responseData = await this.SendRequest(RequestBuilder, token);
|
||||||
|
if(responseData.IsFailedAfterAllRetries)
|
||||||
|
{
|
||||||
|
this.logger.LogError($"Mistral chat completion failed: {responseData.ErrorMessage}");
|
||||||
|
yield break;
|
||||||
|
}
|
||||||
|
|
||||||
// Open the response stream:
|
// Open the response stream:
|
||||||
var mistralStream = await response.Content.ReadAsStreamAsync(token);
|
var mistralStream = await responseData.Response!.Content.ReadAsStreamAsync(token);
|
||||||
|
|
||||||
// Add a stream reader to read the stream, line by line:
|
// Add a stream reader to read the stream, line by line:
|
||||||
var streamReader = new StreamReader(mistralStream);
|
var streamReader = new StreamReader(mistralStream);
|
||||||
|
@ -74,22 +74,29 @@ public sealed class ProviderOpenAI(ILogger logger) : BaseProvider("https://api.o
|
|||||||
FrequencyPenalty = 0f,
|
FrequencyPenalty = 0f,
|
||||||
}, JSON_SERIALIZER_OPTIONS);
|
}, JSON_SERIALIZER_OPTIONS);
|
||||||
|
|
||||||
// Build the HTTP post request:
|
async Task<HttpRequestMessage> RequestBuilder()
|
||||||
var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions");
|
{
|
||||||
|
// Build the HTTP post request:
|
||||||
// Set the authorization header:
|
var request = new HttpRequestMessage(HttpMethod.Post, "chat/completions");
|
||||||
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION));
|
|
||||||
|
// Set the authorization header:
|
||||||
// Set the content:
|
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION));
|
||||||
request.Content = new StringContent(openAIChatRequest, Encoding.UTF8, "application/json");
|
|
||||||
|
// Set the content:
|
||||||
// Send the request with the ResponseHeadersRead option.
|
request.Content = new StringContent(openAIChatRequest, Encoding.UTF8, "application/json");
|
||||||
// This allows us to read the stream as soon as the headers are received.
|
return request;
|
||||||
// This is important because we want to stream the responses.
|
}
|
||||||
var response = await this.httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, token);
|
|
||||||
|
|
||||||
|
// Send the request using exponential backoff:
|
||||||
|
using var responseData = await this.SendRequest(RequestBuilder, token);
|
||||||
|
if(responseData.IsFailedAfterAllRetries)
|
||||||
|
{
|
||||||
|
this.logger.LogError($"OpenAI chat completion failed: {responseData.ErrorMessage}");
|
||||||
|
yield break;
|
||||||
|
}
|
||||||
|
|
||||||
// Open the response stream:
|
// Open the response stream:
|
||||||
var openAIStream = await response.Content.ReadAsStreamAsync(token);
|
var openAIStream = await responseData.Response!.Content.ReadAsStreamAsync(token);
|
||||||
|
|
||||||
// Add a stream reader to read the stream, line by line:
|
// Add a stream reader to read the stream, line by line:
|
||||||
var streamReader = new StreamReader(openAIStream);
|
var streamReader = new StreamReader(openAIStream);
|
||||||
|
@ -69,23 +69,30 @@ public sealed class ProviderSelfHosted(ILogger logger, Host host, string hostnam
|
|||||||
StreamReader? streamReader = default;
|
StreamReader? streamReader = default;
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
// Build the HTTP post request:
|
async Task<HttpRequestMessage> RequestBuilder()
|
||||||
var request = new HttpRequestMessage(HttpMethod.Post, host.ChatURL());
|
{
|
||||||
|
// Build the HTTP post request:
|
||||||
|
var request = new HttpRequestMessage(HttpMethod.Post, host.ChatURL());
|
||||||
|
|
||||||
// Set the authorization header:
|
// Set the authorization header:
|
||||||
if (requestedSecret.Success)
|
if (requestedSecret.Success)
|
||||||
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION));
|
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", await requestedSecret.Secret.Decrypt(ENCRYPTION));
|
||||||
|
|
||||||
// Set the content:
|
// Set the content:
|
||||||
request.Content = new StringContent(providerChatRequest, Encoding.UTF8, "application/json");
|
request.Content = new StringContent(providerChatRequest, Encoding.UTF8, "application/json");
|
||||||
|
return request;
|
||||||
// Send the request with the ResponseHeadersRead option.
|
}
|
||||||
// This allows us to read the stream as soon as the headers are received.
|
|
||||||
// This is important because we want to stream the responses.
|
// Send the request using exponential backoff:
|
||||||
var response = await this.httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, token);
|
using var responseData = await this.SendRequest(RequestBuilder, token);
|
||||||
|
if(responseData.IsFailedAfterAllRetries)
|
||||||
|
{
|
||||||
|
this.logger.LogError($"Self-hosted provider's chat completion failed: {responseData.ErrorMessage}");
|
||||||
|
yield break;
|
||||||
|
}
|
||||||
|
|
||||||
// Open the response stream:
|
// Open the response stream:
|
||||||
var providerStream = await response.Content.ReadAsStreamAsync(token);
|
var providerStream = await responseData.Response!.Content.ReadAsStreamAsync(token);
|
||||||
|
|
||||||
// Add a stream reader to read the stream, line by line:
|
// Add a stream reader to read the stream, line by line:
|
||||||
streamReader = new StreamReader(providerStream);
|
streamReader = new StreamReader(providerStream);
|
||||||
|
23
app/MindWork AI Studio/Tools/HttpRateLimitedStreamResult.cs
Normal file
23
app/MindWork AI Studio/Tools/HttpRateLimitedStreamResult.cs
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
namespace AIStudio.Tools;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The result of a rate-limited HTTP stream.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="IsFailedAfterAllRetries">True, when the stream failed after all retries.</param>
|
||||||
|
/// <param name="ErrorMessage">The error message which we might show to the user.</param>
|
||||||
|
/// <param name="Response">The response from the server.</param>
|
||||||
|
public readonly record struct HttpRateLimitedStreamResult(
|
||||||
|
bool IsSuccessful,
|
||||||
|
bool IsFailedAfterAllRetries,
|
||||||
|
string ErrorMessage,
|
||||||
|
HttpResponseMessage? Response) : IDisposable
|
||||||
|
{
|
||||||
|
#region IDisposable
|
||||||
|
|
||||||
|
public void Dispose()
|
||||||
|
{
|
||||||
|
this.Response?.Dispose();
|
||||||
|
}
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
}
|
@ -1,3 +1,4 @@
|
|||||||
# v0.9.23, build 198 (2024-12-xx xx:xx UTC)
|
# v0.9.23, build 198 (2024-12-xx xx:xx UTC)
|
||||||
- Added an ERI server coding assistant as a preview feature behind the RAG feature flag. This helps you implement an ERI server to gain access to, e.g., your enterprise data from within AI Studio.
|
- Added an ERI server coding assistant as a preview feature behind the RAG feature flag. This helps you implement an ERI server to gain access to, e.g., your enterprise data from within AI Studio.
|
||||||
|
- Improved provider requests by handling rate limits by retrying requests.
|
||||||
- Fixed layout issues when selecting `other` items (e.g., programming languages)
|
- Fixed layout issues when selecting `other` items (e.g., programming languages)
|
Loading…
Reference in New Issue
Block a user