diff --git a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua index 98ed0089..ea006f6c 100644 --- a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua +++ b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua @@ -5476,6 +5476,9 @@ UI_TEXT_CONTENT["AISTUDIO::TOOLS::RAG::RAGPROCESSES::AISRCSELWITHRETCTXVAL::T304 -- AI source selection with AI retrieval context validation UI_TEXT_CONTENT["AISTUDIO::TOOLS::RAG::RAGPROCESSES::AISRCSELWITHRETCTXVAL::T3775725978"] = "AI source selection with AI retrieval context validation" +-- Executable Files +UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T2217313358"] = "Executable Files" + -- PDF Files UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T3108466742"] = "PDF Files" diff --git a/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs b/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs index 1b693d9c..76556089 100644 --- a/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs +++ b/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs @@ -21,67 +21,42 @@ public sealed partial class RustService public async Task ReadArbitraryFileData(string path, int maxEvents) { var requestUri = $"/retrieval/fs/extract?path={Uri.EscapeDataString(path)}"; - this.logger?.LogInformation("The encoded path is: '{Path}'", requestUri); var request = new HttpRequestMessage(HttpMethod.Get, requestUri); var response = await this.http.SendAsync(request, HttpCompletionOption.ResponseHeadersRead); - this.logger?.LogInformation("Response received: {StatusCode}", response.StatusCode); - if (!response.IsSuccessStatusCode) - { - this.logger?.LogError("Fehler beim Empfangen des SSE-Streams: {ResponseStatusCode}", response.StatusCode); - return string.Empty; - } + if (!response.IsSuccessStatusCode) { return string.Empty; } await using var stream = await response.Content.ReadAsStreamAsync(); using var reader = new StreamReader(stream); var resultBuilder = new StringBuilder(); var eventCount = 0; - var images = new Dictionary>(); - - this.logger?.LogInformation("Starting to read SSE events"); while (!reader.EndOfStream && eventCount < maxEvents) { var line = await reader.ReadLineAsync(); - if (string.IsNullOrEmpty(line)) - { - continue; // SSE Format trennt Events durch leere Zeilen - } + + if (string.IsNullOrEmpty(line)) continue; + if (!line.StartsWith("data:")) continue; + + var jsonContent = line[5..]; - if (line.StartsWith("data:")) + try { - var jsonContent = line[5..]; - - try + var sseEvent = JsonSerializer.Deserialize(jsonContent); + if (sseEvent != null) { - var sseEvent = JsonSerializer.Deserialize(jsonContent); - if (sseEvent != null) - { - var content = await SseHandler.ProcessEventAsync(sseEvent); - resultBuilder.Append(content); - eventCount++; - this.logger?.LogDebug("Processed event {Count}:\t{Content}", eventCount, line); - } - } - catch (JsonException ex) - { - this.logger?.LogWarning("Failed to parse JSON data: {Error}\nLine: {Line}", ex.Message, line); + var content = await SseHandler.ProcessEventAsync(sseEvent); + resultBuilder.Append(content); + eventCount++; } } + catch (JsonException ex) { return string.Empty; } + } - var result = resultBuilder.ToString(); - this.logger?.LogInformation("Finished reading. Total events: {Count}, Result length: {Length} chars", - eventCount, result.Length); - - if (images.Count > 0) - { - this.logger?.LogInformation("Extracted {Count} images", images.Count); - // Hier könntest du die Bilder weiterverarbeiten - } return result; } diff --git a/app/MindWork AI Studio/Tools/SseHandler.cs b/app/MindWork AI Studio/Tools/SseHandler.cs index d2771843..34777b44 100644 --- a/app/MindWork AI Studio/Tools/SseHandler.cs +++ b/app/MindWork AI Studio/Tools/SseHandler.cs @@ -1,71 +1,54 @@ using System.Text; using AIStudio.Settings.DataModel; -using _Imports = MudExtensions._Imports; namespace AIStudio.Tools; public static class SseHandler { - // public static async Task ProcessEventAsync(SseEvent sseEvent, StringBuilder resultBuilder, Dictionary> images, ILogger logger) - // { - // if (sseEvent.Metadata != null) - // { - // await HandleMetadataAsync(sseEvent.Metadata, resultBuilder, images, logger); - // } - // - // if (!string.IsNullOrEmpty(sseEvent.Content)) - // { - // resultBuilder.Append(sseEvent.Content); - // } - // } - - public static async Task ProcessEventAsync(SseEvent sseEvent) + public static async Task ProcessEventAsync(SseEvent? sseEvent) { var result = new StringBuilder(); - if (sseEvent == null) - { - // Falls `sseEvent` null ist, gib einen leeren String zurück oder handle es entsprechend. - return result.ToString(); - } + if (sseEvent == null) { return result.ToString(); } - // Kombiniere Content und Metadata - if (sseEvent.Content != null && sseEvent.Metadata != null) + if (sseEvent is { Content: not null, Metadata: not null }) { - // Je nach Typ der Metadata, verarbeite entsprechend switch (sseEvent.Metadata) { case TextMetadata textMetadata: var lineNumber = textMetadata.Text?.LineNumber ?? 0; - result.AppendLine($"{lineNumber}:\t{sseEvent.Content}"); + result.AppendLine($"{sseEvent.Content}"); break; - case SpreadsheetMetadata spreadsheetMetadata: - var sheetName = spreadsheetMetadata.Spreadsheet.SheetName; - var rowNumber = spreadsheetMetadata.Spreadsheet.RowNumber; - if (rowNumber == 1) { result.AppendLine($"{sheetName}");} + case PdfMetadata pdfMetadata: + var pageNumber = pdfMetadata.Pdf?.PageNumber ?? 0; + result.AppendLine($"[Page {pageNumber}]:\n{sseEvent.Content}"); + break; - result.AppendLine($"{rowNumber}:\t{sseEvent.Content}"); + case SpreadsheetMetadata spreadsheetMetadata: + var sheetName = spreadsheetMetadata.Spreadsheet?.SheetName; + var rowNumber = spreadsheetMetadata.Spreadsheet?.RowNumber; + + if (rowNumber == 1) { result.AppendLine($"\n{sheetName}:"); } + + result.AppendLine($"{sseEvent.Content}"); break; case DocumentMetadata documentMetadata: result.AppendLine($"{sseEvent.Content}"); break; - - // Weitere Metadaten-Typen können hier hinzugefügt werden - // case EmbeddingMetadata embeddingMetadata: - // // Verarbeitung für EmbeddingMetadata - // break; + + case ImageMetadata imageMetadata: + result.AppendLine($"{sseEvent.Content}"); + break; default: - // Wenn der Metadaten-Typ nicht erkannt wird, füge nur den Content hinzu result.AppendLine(sseEvent.Content); break; } } else if (!string.IsNullOrEmpty(sseEvent.Content)) { - // Falls nur Content vorhanden ist result.AppendLine(sseEvent.Content); } else if (string.IsNullOrEmpty(sseEvent.Content)) @@ -73,90 +56,28 @@ public static class SseHandler result.AppendLine(); } - // Asynchrone Verarbeitung, falls erforderlich - await Task.CompletedTask; // Placeholder für asynchrone Operationen - + await Task.CompletedTask; return result.ToString(); } - - private static async Task HandleMetadataAsync(Settings.DataModel.Metadata metadata, StringBuilder resultBuilder, Dictionary> images, ILogger logger) + + private static void ProcessImageSegment(PptxImageData pptxImageData, Dictionary> images, StringBuilder resultBuilder, ILogger logger) { - switch (metadata) - { - case TextMetadata textMetadata: - // Für Textdateien: Zeilennummer hinzufügen - resultBuilder.AppendLine(); - if (textMetadata.Text != null) - { - var lineNumber = textMetadata.Text.LineNumber; - resultBuilder.AppendLine($"[Zeile {lineNumber}]"); - } - break; - - case PdfMetadata pdfMetadata: - // Für PDF: Seitennummer und Umbruch - var pageNumber = pdfMetadata.Pdf.PageNumber; - resultBuilder.AppendLine(); - resultBuilder.AppendLine($"[Seite {pageNumber}]"); - break; - - case SpreadsheetMetadata spreadsheetMetadata: - // Für Tabellen: Arbeitsblattname und Zeilennummer - var sheetName = spreadsheetMetadata.Spreadsheet.SheetName; - var rowNumber = spreadsheetMetadata.Spreadsheet.RowNumber; - resultBuilder.AppendLine(); - resultBuilder.AppendLine($"[Tabelle: {sheetName}, Zeile: {rowNumber}]"); - break; - - case PresentationMetadata presentationMetadata: - // Für Präsentationen: Foliennummer und ggf. Bild - var slideNumber = presentationMetadata.Presentation.SlideNumber; - resultBuilder.AppendLine(); - resultBuilder.AppendLine($"[Folie {slideNumber}]"); - - if (presentationMetadata.Presentation.Image != null) - { - ProcessImageSegment(presentationMetadata.Presentation.Image, images, resultBuilder, logger); - } - break; - - case ImageMetadata _: - // Für Bilder - resultBuilder.AppendLine(); - resultBuilder.AppendLine("[Bildbeschreibung]"); - break; - - default: - // Unbekannter Metadaten-Typ - logger?.LogWarning("Unbekannter Metadaten-Typ: {Type}", metadata.GetType().Name); - break; - } - - await Task.CompletedTask; - } - - private static void ProcessImageSegment(ImageData imageData, Dictionary> images, StringBuilder resultBuilder, ILogger logger) - { - if (string.IsNullOrEmpty(imageData.Id) || string.IsNullOrEmpty(imageData.Content)) + if (string.IsNullOrEmpty(pptxImageData.Id) || string.IsNullOrEmpty(pptxImageData.Content)) { return; } - if (!images.ContainsKey(imageData.Id)) + if (!images.ContainsKey(pptxImageData.Id)) { - images[imageData.Id] = new List(); + images[pptxImageData.Id] = new List(); } - images[imageData.Id].Add(imageData.Content); + images[pptxImageData.Id].Add(pptxImageData.Content); - logger?.LogDebug("Added image segment {Segment} for image {Id}", imageData.Segment, imageData.Id); - - if (imageData.IsEnd) + if (pptxImageData.IsEnd) { - logger?.LogDebug("Completed image {Id} with {SegmentCount} segments", imageData.Id, images[imageData.Id].Count); resultBuilder.AppendLine("[Präsentationsbild eingebettet]"); - - // Hier kannst du die Bilddaten weiterverarbeiten, z.B. zusammenfügen und speichern + // TODO } } } \ No newline at end of file diff --git a/runtime/src/file_data.rs b/runtime/src/file_data.rs index 8d7ad9fb..f3b6498a 100644 --- a/runtime/src/file_data.rs +++ b/runtime/src/file_data.rs @@ -30,7 +30,7 @@ pub enum Metadata { Pdf { page_number: usize }, Spreadsheet { sheet_name: String, row_number: usize }, Document {}, - Image, + Image {}, } const TO_MARKDOWN: &str = "markdown"; @@ -289,7 +289,7 @@ async fn chunk_image(file_path: &str) -> Result { let stream = stream! { yield Ok(Chunk { content: base64, - metadata: Metadata::Image, + metadata: Metadata::Image {}, }); };