From cf6256c215ddb8e01faff003f190bf12c8d08efa Mon Sep 17 00:00:00 2001 From: Nils Kruthoff Date: Mon, 18 May 2026 17:00:05 +0200 Subject: [PATCH] Resolve target host addresses before connecting, then bind HTTP connection to those validated IPs. Prevents request from re-resolving the host after validation --- app/MindWork AI Studio/Tools/HTMLParser.cs | 70 +++++++++++++++++-- .../ReadWebPageTool.cs | 28 ++++++-- 2 files changed, 87 insertions(+), 11 deletions(-) diff --git a/app/MindWork AI Studio/Tools/HTMLParser.cs b/app/MindWork AI Studio/Tools/HTMLParser.cs index 3e86e830..fb5334ea 100644 --- a/app/MindWork AI Studio/Tools/HTMLParser.cs +++ b/app/MindWork AI Studio/Tools/HTMLParser.cs @@ -1,9 +1,7 @@ using System.Net; -using System.Net.Http; using System.Net.Http.Headers; - +using System.Net.Sockets; using HtmlAgilityPack; - using ReverseMarkdown; namespace AIStudio.Tools; @@ -44,13 +42,20 @@ public sealed class HTMLParser return innerHtml; } - public async Task LoadWebPageAsync(Uri url, CancellationToken token = default, int timeoutSeconds = 30, Func? validateUrlAsync = null) + public async Task LoadWebPageAsync(Uri url, CancellationToken token = default, int timeoutSeconds = 30, Func>>? resolveUrlAddressesAsync = null) { - using var handler = new HttpClientHandler + using var handler = new SocketsHttpHandler { AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate | DecompressionMethods.Brotli, AllowAutoRedirect = false, }; + if (resolveUrlAddressesAsync is not null) + { + // The callback binds the request to a vetted target IP; a proxy would change the endpoint being connected to. + handler.UseProxy = false; + handler.ConnectCallback = async (context, connectionToken) => await ConnectToResolvedAddressAsync(context, resolveUrlAddressesAsync, connectionToken); + } + using var httpClient = new HttpClient(handler) { Timeout = Timeout.InfiniteTimeSpan, @@ -61,8 +66,7 @@ public sealed class HTMLParser var currentUrl = url; for (var redirectCount = 0; redirectCount <= MAX_REDIRECTS; redirectCount++) { - if (validateUrlAsync is not null) - await validateUrlAsync(currentUrl, timeoutCts.Token); + ValidateHttpOrHttpsUrl(currentUrl); using var request = CreateRequest(currentUrl); using var response = await httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, timeoutCts.Token); @@ -101,6 +105,58 @@ public sealed class HTMLParser throw new HttpRequestException($"The server returned more than {MAX_REDIRECTS} redirects for '{url}'."); } + private static void ValidateHttpOrHttpsUrl(Uri url) + { + if (url.Scheme.Equals(Uri.UriSchemeHttp, StringComparison.OrdinalIgnoreCase) || + url.Scheme.Equals(Uri.UriSchemeHttps, StringComparison.OrdinalIgnoreCase)) + return; + + throw new HttpRequestException($"Unsupported URL scheme '{url.Scheme}' for '{url}'."); + } + + private static async ValueTask ConnectToResolvedAddressAsync( + SocketsHttpConnectionContext context, + Func>> resolveUrlAddressesAsync, + CancellationToken token) + { + var requestUri = context.InitialRequestMessage.RequestUri ?? + throw new HttpRequestException("The HTTP request did not contain a target URL."); + + var addresses = await resolveUrlAddressesAsync(requestUri, token); + if (addresses.Count == 0) + throw new HttpRequestException($"The host '{requestUri.Host}' did not resolve to an IP address."); + + List connectionErrors = []; + foreach (var address in addresses.Distinct()) + { + var socket = new Socket(address.AddressFamily, SocketType.Stream, ProtocolType.Tcp) + { + NoDelay = true, + }; + + try + { + await socket.ConnectAsync(new IPEndPoint(address, context.DnsEndPoint.Port), token); + return new NetworkStream(socket, ownsSocket: true); + } + catch (SocketException exception) + { + connectionErrors.Add(exception); + socket.Dispose(); + } + catch + { + socket.Dispose(); + throw; + } + } + + Exception innerException = connectionErrors.Count == 1 + ? connectionErrors[0] + : new AggregateException(connectionErrors); + throw new HttpRequestException($"Could not connect to a validated address for '{requestUri.Host}'.", innerException); + } + private static HttpRequestMessage CreateRequest(Uri url) { var request = new HttpRequestMessage(HttpMethod.Get, url); diff --git a/app/MindWork AI Studio/Tools/ToolCallingSystem/ToolCallingImplementations/ReadWebPageTool.cs b/app/MindWork AI Studio/Tools/ToolCallingSystem/ToolCallingImplementations/ReadWebPageTool.cs index 0920789c..4098d9ac 100644 --- a/app/MindWork AI Studio/Tools/ToolCallingSystem/ToolCallingImplementations/ReadWebPageTool.cs +++ b/app/MindWork AI Studio/Tools/ToolCallingSystem/ToolCallingImplementations/ReadWebPageTool.cs @@ -111,7 +111,7 @@ public sealed class ReadWebPageTool(HTMLParser htmlParser, ILogger await this.ValidateUrlAccessAsync(candidateUrl, allowedPrivateHosts, context.ProviderConfidence, validationToken)); + async (candidateUrl, validationToken) => await this.ResolveValidatedUrlAddressesAsync(candidateUrl, allowedPrivateHosts, context.ProviderConfidence, validationToken)); } catch (OperationCanceledException) when (!token.IsCancellationRequested) { @@ -119,6 +119,9 @@ public sealed class ReadWebPageTool(HTMLParser htmlParser, ILogger> ResolveValidatedUrlAddressesAsync( Uri url, IReadOnlyList allowedPrivateHosts, ConfidenceLevel providerConfidence, @@ -203,13 +223,13 @@ public sealed class ReadWebPageTool(HTMLParser htmlParser, ILogger= ConfidenceLevel.HIGH) - return; + return addresses; await this.ReportPrivateHostProviderBlockAsync(url, providerConfidence); throw new ToolExecutionBlockedException("This private or VPN web page requires a High-confidence provider.");