diff --git a/app/MindWork AI Studio/Provider/BaseProvider.cs b/app/MindWork AI Studio/Provider/BaseProvider.cs index 08821c3b..531030b9 100644 --- a/app/MindWork AI Studio/Provider/BaseProvider.cs +++ b/app/MindWork AI Studio/Provider/BaseProvider.cs @@ -675,13 +675,14 @@ public abstract class BaseProvider : IProvider, ISecretId Content = responseMessage.Content, ToolCalls = responseMessage.ToolCalls, }); - + + var maxToolCalls = 30; foreach (var toolCall in responseMessage.ToolCalls) { toolCallCount++; - if (toolCallCount > 10) + if (toolCallCount > maxToolCalls) { - var limitMessage = "Tool calling stopped because the maximum of 10 tool calls was reached."; + var limitMessage = $"Tool calling stopped because the maximum of {maxToolCalls} tool calls was reached."; currentAssistantContent.ToolInvocations.Add(new ToolInvocationTrace { Order = toolCallCount, diff --git a/app/MindWork AI Studio/Tools/HTMLParser.cs b/app/MindWork AI Studio/Tools/HTMLParser.cs index fb5334ea..c2463750 100644 --- a/app/MindWork AI Studio/Tools/HTMLParser.cs +++ b/app/MindWork AI Studio/Tools/HTMLParser.cs @@ -1,6 +1,7 @@ using System.Net; using System.Net.Http.Headers; using System.Net.Sockets; +using System.Text; using HtmlAgilityPack; using ReverseMarkdown; @@ -10,6 +11,7 @@ public sealed class HTMLParser { private const string USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) MindWorkAIStudio/1.0"; private const int MAX_REDIRECTS = 10; + private const int DEFAULT_MAX_RESPONSE_BYTES = 5 * 1024 * 1024; private static readonly Config MARKDOWN_PARSER_CONFIG = new() { @@ -42,7 +44,7 @@ public sealed class HTMLParser return innerHtml; } - public async Task LoadWebPageAsync(Uri url, CancellationToken token = default, int timeoutSeconds = 30, Func>>? resolveUrlAddressesAsync = null) + public async Task LoadWebPageAsync(Uri url, CancellationToken token = default, int timeoutSeconds = 30, Func>>? resolveUrlAddressesAsync = null, int maxResponseBytes = DEFAULT_MAX_RESPONSE_BYTES) { using var handler = new SocketsHttpHandler { @@ -89,7 +91,7 @@ public sealed class HTMLParser throw new HttpRequestException($"The server returned HTTP {statusCode} ({reasonPhrase}) for '{currentUrl}'.", null, response.StatusCode); } - var html = await response.Content.ReadAsStringAsync(timeoutCts.Token); + var html = await ReadContentAsStringWithLimitAsync(response.Content, maxResponseBytes, timeoutCts.Token); var document = new HtmlDocument(); document.LoadHtml(html); @@ -178,6 +180,46 @@ public sealed class HTMLParser private static bool IsRedirect(HttpStatusCode statusCode) => (int)statusCode is >= 300 and <= 399; + private static async Task ReadContentAsStringWithLimitAsync(HttpContent content, int maxResponseBytes, CancellationToken token) + { + if (content.Headers.ContentLength is long contentLength && contentLength > maxResponseBytes) + throw new HttpRequestException($"The response body is too large. Maximum allowed size is {maxResponseBytes} bytes."); + + await using var stream = await content.ReadAsStreamAsync(token); + await using var buffer = new MemoryStream(); + var chunk = new byte[8192]; + while (true) + { + var read = await stream.ReadAsync(chunk, token); + if (read == 0) + break; + + if (buffer.Length + read > maxResponseBytes) + throw new HttpRequestException($"The response body is too large. Maximum allowed size is {maxResponseBytes} bytes."); + + buffer.Write(chunk, 0, read); + } + + var encoding = TryGetContentEncoding(content) ?? Encoding.UTF8; + return encoding.GetString(buffer.ToArray()); + } + + private static Encoding? TryGetContentEncoding(HttpContent content) + { + var charset = content.Headers.ContentType?.CharSet?.Trim(); + if (string.IsNullOrWhiteSpace(charset)) + return null; + + try + { + return Encoding.GetEncoding(charset.Trim('"')); + } + catch + { + return null; + } + } + public string ExtractTitle(HtmlDocument document) { var title = document.DocumentNode.SelectSingleNode("//title")?.InnerText?.Trim(); diff --git a/app/MindWork AI Studio/Tools/ToolCallingSystem/ToolCallingImplementations/ReadWebPageTool.cs b/app/MindWork AI Studio/Tools/ToolCallingSystem/ToolCallingImplementations/ReadWebPageTool.cs index 4098d9ac..15f05453 100644 --- a/app/MindWork AI Studio/Tools/ToolCallingSystem/ToolCallingImplementations/ReadWebPageTool.cs +++ b/app/MindWork AI Studio/Tools/ToolCallingSystem/ToolCallingImplementations/ReadWebPageTool.cs @@ -14,6 +14,9 @@ public sealed class ReadWebPageTool(HTMLParser htmlParser, ILogger await this.ResolveValidatedUrlAddressesAsync(candidateUrl, allowedPrivateHosts, context.ProviderConfidence, validationToken)); + async (candidateUrl, validationToken) => await this.ResolveValidatedUrlAddressesAsync(candidateUrl, allowedPrivateHosts, context.ProviderConfidence, validationToken), + MAX_RESPONSE_BYTES); } catch (OperationCanceledException) when (!token.IsCancellationRequested) { diff --git a/app/MindWork AI Studio/Tools/ToolCallingSystem/ToolCallingImplementations/SearXNGWebSearchTool.cs b/app/MindWork AI Studio/Tools/ToolCallingSystem/ToolCallingImplementations/SearXNGWebSearchTool.cs index 8dfde6e8..042b463d 100644 --- a/app/MindWork AI Studio/Tools/ToolCallingSystem/ToolCallingImplementations/SearXNGWebSearchTool.cs +++ b/app/MindWork AI Studio/Tools/ToolCallingSystem/ToolCallingImplementations/SearXNGWebSearchTool.cs @@ -12,6 +12,10 @@ public sealed class SearXNGWebSearchTool : IToolImplementation private const int DEFAULT_MAX_RESULTS = 5; private const int DEFAULT_TIMEOUT_SECONDS = 20; + private const int MAX_RESULTS = 20; + private const int MAX_PAGE = 20; + private const int MAX_TIMEOUT_SECONDS = 60; + private const int MAX_RESPONSE_BYTES = 1024 * 1024; private const int MAX_TRACE_LENGTH = 4000; public string ImplementationKey => "web_search"; @@ -127,8 +131,10 @@ public sealed class SearXNGWebSearchTool : IToolImplementation throw new InvalidOperationException(TB("Default categories and default engines cannot both be set for the web search tool.")); var defaultLimit = ReadOptionalPositiveIntSetting(context.SettingsValues, "maxResults") ?? DEFAULT_MAX_RESULTS; - var effectiveLimit = requestedLimit ?? defaultLimit; - var timeoutSeconds = ReadOptionalPositiveIntSetting(context.SettingsValues, "timeoutSeconds") ?? DEFAULT_TIMEOUT_SECONDS; + var effectiveLimit = Math.Min(requestedLimit ?? defaultLimit, MAX_RESULTS); + var timeoutSeconds = Math.Min(ReadOptionalPositiveIntSetting(context.SettingsValues, "timeoutSeconds") ?? DEFAULT_TIMEOUT_SECONDS, MAX_TIMEOUT_SECONDS); + if (page is > MAX_PAGE) + throw new ArgumentException($"Argument 'page' must be less than or equal to {MAX_PAGE}."); var queryParameters = new List> { @@ -163,7 +169,7 @@ public sealed class SearXNGWebSearchTool : IToolImplementation timeoutCts.CancelAfter(TimeSpan.FromSeconds(timeoutSeconds)); using var response = await SendAsync(httpClient, request, timeoutCts.Token, timeoutSeconds, token); - var responseBody = await response.Content.ReadAsStringAsync(token); + var responseBody = await ReadContentAsStringWithLimitAsync(response.Content, MAX_RESPONSE_BYTES, token); if (!response.IsSuccessStatusCode) { var responseDetails = string.IsNullOrWhiteSpace(responseBody) ? string.Empty : $" Response body: {responseBody[..Math.Min(responseBody.Length, 400)]}"; @@ -409,6 +415,29 @@ public sealed class SearXNGWebSearchTool : IToolImplementation return int.TryParse(value, out var parsedValue) && parsedValue > 0 ? parsedValue : null; } + private static async Task ReadContentAsStringWithLimitAsync(HttpContent content, int maxResponseBytes, CancellationToken token) + { + if (content.Headers.ContentLength is long contentLength && contentLength > maxResponseBytes) + throw new InvalidOperationException($"The SearXNG response body is too large. Maximum allowed size is {maxResponseBytes} bytes."); + + await using var stream = await content.ReadAsStreamAsync(token); + await using var buffer = new MemoryStream(); + var chunk = new byte[8192]; + while (true) + { + var read = await stream.ReadAsync(chunk, token); + if (read == 0) + break; + + if (buffer.Length + read > maxResponseBytes) + throw new InvalidOperationException($"The SearXNG response body is too large. Maximum allowed size is {maxResponseBytes} bytes."); + + buffer.Write(chunk, 0, read); + } + + return Encoding.UTF8.GetString(buffer.ToArray()); + } + private static bool TryReadOptionalPositiveInt( IReadOnlyDictionary settingsValues, string key,