diff --git a/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs b/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs index 4a201564..1b693d9c 100644 --- a/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs +++ b/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs @@ -1,3 +1,7 @@ +using System.Text; +using System.Text.Json; +using AIStudio.Settings.DataModel; + namespace AIStudio.Tools.Services; public sealed partial class RustService @@ -13,4 +17,72 @@ public sealed partial class RustService return await response.Content.ReadAsStringAsync(); } + + public async Task ReadArbitraryFileData(string path, int maxEvents) + { + var requestUri = $"/retrieval/fs/extract?path={Uri.EscapeDataString(path)}"; + this.logger?.LogInformation("The encoded path is: '{Path}'", requestUri); + + var request = new HttpRequestMessage(HttpMethod.Get, requestUri); + var response = await this.http.SendAsync(request, HttpCompletionOption.ResponseHeadersRead); + + this.logger?.LogInformation("Response received: {StatusCode}", response.StatusCode); + + if (!response.IsSuccessStatusCode) + { + this.logger?.LogError("Fehler beim Empfangen des SSE-Streams: {ResponseStatusCode}", response.StatusCode); + return string.Empty; + } + + await using var stream = await response.Content.ReadAsStreamAsync(); + using var reader = new StreamReader(stream); + + var resultBuilder = new StringBuilder(); + var eventCount = 0; + var images = new Dictionary>(); + + this.logger?.LogInformation("Starting to read SSE events"); + + while (!reader.EndOfStream && eventCount < maxEvents) + { + var line = await reader.ReadLineAsync(); + if (string.IsNullOrEmpty(line)) + { + continue; // SSE Format trennt Events durch leere Zeilen + } + + if (line.StartsWith("data:")) + { + var jsonContent = line[5..]; + + try + { + var sseEvent = JsonSerializer.Deserialize(jsonContent); + if (sseEvent != null) + { + var content = await SseHandler.ProcessEventAsync(sseEvent); + resultBuilder.Append(content); + eventCount++; + this.logger?.LogDebug("Processed event {Count}:\t{Content}", eventCount, line); + } + } + catch (JsonException ex) + { + this.logger?.LogWarning("Failed to parse JSON data: {Error}\nLine: {Line}", ex.Message, line); + } + } + } + + var result = resultBuilder.ToString(); + this.logger?.LogInformation("Finished reading. Total events: {Count}, Result length: {Length} chars", + eventCount, result.Length); + + if (images.Count > 0) + { + this.logger?.LogInformation("Extracted {Count} images", images.Count); + // Hier könntest du die Bilder weiterverarbeiten + } + + return result; + } } \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/SseHandler.cs b/app/MindWork AI Studio/Tools/SseHandler.cs new file mode 100644 index 00000000..d2771843 --- /dev/null +++ b/app/MindWork AI Studio/Tools/SseHandler.cs @@ -0,0 +1,162 @@ +using System.Text; +using AIStudio.Settings.DataModel; +using _Imports = MudExtensions._Imports; + +namespace AIStudio.Tools; + +public static class SseHandler +{ + // public static async Task ProcessEventAsync(SseEvent sseEvent, StringBuilder resultBuilder, Dictionary> images, ILogger logger) + // { + // if (sseEvent.Metadata != null) + // { + // await HandleMetadataAsync(sseEvent.Metadata, resultBuilder, images, logger); + // } + // + // if (!string.IsNullOrEmpty(sseEvent.Content)) + // { + // resultBuilder.Append(sseEvent.Content); + // } + // } + + public static async Task ProcessEventAsync(SseEvent sseEvent) + { + var result = new StringBuilder(); + + if (sseEvent == null) + { + // Falls `sseEvent` null ist, gib einen leeren String zurück oder handle es entsprechend. + return result.ToString(); + } + + // Kombiniere Content und Metadata + if (sseEvent.Content != null && sseEvent.Metadata != null) + { + // Je nach Typ der Metadata, verarbeite entsprechend + switch (sseEvent.Metadata) + { + case TextMetadata textMetadata: + var lineNumber = textMetadata.Text?.LineNumber ?? 0; + result.AppendLine($"{lineNumber}:\t{sseEvent.Content}"); + break; + + case SpreadsheetMetadata spreadsheetMetadata: + var sheetName = spreadsheetMetadata.Spreadsheet.SheetName; + var rowNumber = spreadsheetMetadata.Spreadsheet.RowNumber; + if (rowNumber == 1) { result.AppendLine($"{sheetName}");} + + result.AppendLine($"{rowNumber}:\t{sseEvent.Content}"); + break; + + case DocumentMetadata documentMetadata: + result.AppendLine($"{sseEvent.Content}"); + break; + + // Weitere Metadaten-Typen können hier hinzugefügt werden + // case EmbeddingMetadata embeddingMetadata: + // // Verarbeitung für EmbeddingMetadata + // break; + + default: + // Wenn der Metadaten-Typ nicht erkannt wird, füge nur den Content hinzu + result.AppendLine(sseEvent.Content); + break; + } + } + else if (!string.IsNullOrEmpty(sseEvent.Content)) + { + // Falls nur Content vorhanden ist + result.AppendLine(sseEvent.Content); + } + else if (string.IsNullOrEmpty(sseEvent.Content)) + { + result.AppendLine(); + } + + // Asynchrone Verarbeitung, falls erforderlich + await Task.CompletedTask; // Placeholder für asynchrone Operationen + + return result.ToString(); + } + + private static async Task HandleMetadataAsync(Settings.DataModel.Metadata metadata, StringBuilder resultBuilder, Dictionary> images, ILogger logger) + { + switch (metadata) + { + case TextMetadata textMetadata: + // Für Textdateien: Zeilennummer hinzufügen + resultBuilder.AppendLine(); + if (textMetadata.Text != null) + { + var lineNumber = textMetadata.Text.LineNumber; + resultBuilder.AppendLine($"[Zeile {lineNumber}]"); + } + break; + + case PdfMetadata pdfMetadata: + // Für PDF: Seitennummer und Umbruch + var pageNumber = pdfMetadata.Pdf.PageNumber; + resultBuilder.AppendLine(); + resultBuilder.AppendLine($"[Seite {pageNumber}]"); + break; + + case SpreadsheetMetadata spreadsheetMetadata: + // Für Tabellen: Arbeitsblattname und Zeilennummer + var sheetName = spreadsheetMetadata.Spreadsheet.SheetName; + var rowNumber = spreadsheetMetadata.Spreadsheet.RowNumber; + resultBuilder.AppendLine(); + resultBuilder.AppendLine($"[Tabelle: {sheetName}, Zeile: {rowNumber}]"); + break; + + case PresentationMetadata presentationMetadata: + // Für Präsentationen: Foliennummer und ggf. Bild + var slideNumber = presentationMetadata.Presentation.SlideNumber; + resultBuilder.AppendLine(); + resultBuilder.AppendLine($"[Folie {slideNumber}]"); + + if (presentationMetadata.Presentation.Image != null) + { + ProcessImageSegment(presentationMetadata.Presentation.Image, images, resultBuilder, logger); + } + break; + + case ImageMetadata _: + // Für Bilder + resultBuilder.AppendLine(); + resultBuilder.AppendLine("[Bildbeschreibung]"); + break; + + default: + // Unbekannter Metadaten-Typ + logger?.LogWarning("Unbekannter Metadaten-Typ: {Type}", metadata.GetType().Name); + break; + } + + await Task.CompletedTask; + } + + private static void ProcessImageSegment(ImageData imageData, Dictionary> images, StringBuilder resultBuilder, ILogger logger) + { + if (string.IsNullOrEmpty(imageData.Id) || string.IsNullOrEmpty(imageData.Content)) + { + return; + } + + if (!images.ContainsKey(imageData.Id)) + { + images[imageData.Id] = new List(); + } + + images[imageData.Id].Add(imageData.Content); + + logger?.LogDebug("Added image segment {Segment} for image {Id}", imageData.Segment, imageData.Id); + + if (imageData.IsEnd) + { + logger?.LogDebug("Completed image {Id} with {SegmentCount} segments", imageData.Id, images[imageData.Id].Count); + resultBuilder.AppendLine("[Präsentationsbild eingebettet]"); + + // Hier kannst du die Bilddaten weiterverarbeiten, z.B. zusammenfügen und speichern + } + } +} \ No newline at end of file