finalize event handler and retrieval logic

This commit is contained in:
krut_ni 2025-06-24 10:20:31 +02:00
parent 53f8257a01
commit 52866c34dd
4 changed files with 47 additions and 148 deletions

View File

@ -5476,6 +5476,9 @@ UI_TEXT_CONTENT["AISTUDIO::TOOLS::RAG::RAGPROCESSES::AISRCSELWITHRETCTXVAL::T304
-- AI source selection with AI retrieval context validation -- AI source selection with AI retrieval context validation
UI_TEXT_CONTENT["AISTUDIO::TOOLS::RAG::RAGPROCESSES::AISRCSELWITHRETCTXVAL::T3775725978"] = "AI source selection with AI retrieval context validation" UI_TEXT_CONTENT["AISTUDIO::TOOLS::RAG::RAGPROCESSES::AISRCSELWITHRETCTXVAL::T3775725978"] = "AI source selection with AI retrieval context validation"
-- Executable Files
UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T2217313358"] = "Executable Files"
-- PDF Files -- PDF Files
UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T3108466742"] = "PDF Files" UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T3108466742"] = "PDF Files"

View File

@ -21,38 +21,26 @@ public sealed partial class RustService
public async Task<string> ReadArbitraryFileData(string path, int maxEvents) public async Task<string> ReadArbitraryFileData(string path, int maxEvents)
{ {
var requestUri = $"/retrieval/fs/extract?path={Uri.EscapeDataString(path)}"; var requestUri = $"/retrieval/fs/extract?path={Uri.EscapeDataString(path)}";
this.logger?.LogInformation("The encoded path is: '{Path}'", requestUri);
var request = new HttpRequestMessage(HttpMethod.Get, requestUri); var request = new HttpRequestMessage(HttpMethod.Get, requestUri);
var response = await this.http.SendAsync(request, HttpCompletionOption.ResponseHeadersRead); var response = await this.http.SendAsync(request, HttpCompletionOption.ResponseHeadersRead);
this.logger?.LogInformation("Response received: {StatusCode}", response.StatusCode);
if (!response.IsSuccessStatusCode) if (!response.IsSuccessStatusCode) { return string.Empty; }
{
this.logger?.LogError("Fehler beim Empfangen des SSE-Streams: {ResponseStatusCode}", response.StatusCode);
return string.Empty;
}
await using var stream = await response.Content.ReadAsStreamAsync(); await using var stream = await response.Content.ReadAsStreamAsync();
using var reader = new StreamReader(stream); using var reader = new StreamReader(stream);
var resultBuilder = new StringBuilder(); var resultBuilder = new StringBuilder();
var eventCount = 0; var eventCount = 0;
var images = new Dictionary<string, List<string>>();
this.logger?.LogInformation("Starting to read SSE events");
while (!reader.EndOfStream && eventCount < maxEvents) while (!reader.EndOfStream && eventCount < maxEvents)
{ {
var line = await reader.ReadLineAsync(); var line = await reader.ReadLineAsync();
if (string.IsNullOrEmpty(line))
{
continue; // SSE Format trennt Events durch leere Zeilen
}
if (line.StartsWith("data:")) if (string.IsNullOrEmpty(line)) continue;
{ if (!line.StartsWith("data:")) continue;
var jsonContent = line[5..]; var jsonContent = line[5..];
try try
@ -63,25 +51,12 @@ public sealed partial class RustService
var content = await SseHandler.ProcessEventAsync(sseEvent); var content = await SseHandler.ProcessEventAsync(sseEvent);
resultBuilder.Append(content); resultBuilder.Append(content);
eventCount++; eventCount++;
this.logger?.LogDebug("Processed event {Count}:\t{Content}", eventCount, line);
}
}
catch (JsonException ex)
{
this.logger?.LogWarning("Failed to parse JSON data: {Error}\nLine: {Line}", ex.Message, line);
}
} }
} }
catch (JsonException ex) { return string.Empty; }
}
var result = resultBuilder.ToString(); var result = resultBuilder.ToString();
this.logger?.LogInformation("Finished reading. Total events: {Count}, Result length: {Length} chars",
eventCount, result.Length);
if (images.Count > 0)
{
this.logger?.LogInformation("Extracted {Count} images", images.Count);
// Hier könntest du die Bilder weiterverarbeiten
}
return result; return result;
} }

View File

@ -1,71 +1,54 @@
using System.Text; using System.Text;
using AIStudio.Settings.DataModel; using AIStudio.Settings.DataModel;
using _Imports = MudExtensions._Imports;
namespace AIStudio.Tools; namespace AIStudio.Tools;
public static class SseHandler public static class SseHandler
{ {
// public static async Task ProcessEventAsync(SseEvent sseEvent, StringBuilder resultBuilder, Dictionary<string, List<string>> images, ILogger logger) public static async Task<string> ProcessEventAsync(SseEvent? sseEvent)
// {
// if (sseEvent.Metadata != null)
// {
// await HandleMetadataAsync(sseEvent.Metadata, resultBuilder, images, logger);
// }
//
// if (!string.IsNullOrEmpty(sseEvent.Content))
// {
// resultBuilder.Append(sseEvent.Content);
// }
// }
public static async Task<string> ProcessEventAsync(SseEvent sseEvent)
{ {
var result = new StringBuilder(); var result = new StringBuilder();
if (sseEvent == null) if (sseEvent == null) { return result.ToString(); }
{
// Falls `sseEvent` null ist, gib einen leeren String zurück oder handle es entsprechend.
return result.ToString();
}
// Kombiniere Content und Metadata if (sseEvent is { Content: not null, Metadata: not null })
if (sseEvent.Content != null && sseEvent.Metadata != null)
{ {
// Je nach Typ der Metadata, verarbeite entsprechend
switch (sseEvent.Metadata) switch (sseEvent.Metadata)
{ {
case TextMetadata textMetadata: case TextMetadata textMetadata:
var lineNumber = textMetadata.Text?.LineNumber ?? 0; var lineNumber = textMetadata.Text?.LineNumber ?? 0;
result.AppendLine($"{lineNumber}:\t{sseEvent.Content}"); result.AppendLine($"{sseEvent.Content}");
break;
case PdfMetadata pdfMetadata:
var pageNumber = pdfMetadata.Pdf?.PageNumber ?? 0;
result.AppendLine($"[Page {pageNumber}]:\n{sseEvent.Content}");
break; break;
case SpreadsheetMetadata spreadsheetMetadata: case SpreadsheetMetadata spreadsheetMetadata:
var sheetName = spreadsheetMetadata.Spreadsheet.SheetName; var sheetName = spreadsheetMetadata.Spreadsheet?.SheetName;
var rowNumber = spreadsheetMetadata.Spreadsheet.RowNumber; var rowNumber = spreadsheetMetadata.Spreadsheet?.RowNumber;
if (rowNumber == 1) { result.AppendLine($"{sheetName}");}
result.AppendLine($"{rowNumber}:\t{sseEvent.Content}"); if (rowNumber == 1) { result.AppendLine($"\n{sheetName}:"); }
result.AppendLine($"{sseEvent.Content}");
break; break;
case DocumentMetadata documentMetadata: case DocumentMetadata documentMetadata:
result.AppendLine($"{sseEvent.Content}"); result.AppendLine($"{sseEvent.Content}");
break; break;
// Weitere Metadaten-Typen können hier hinzugefügt werden case ImageMetadata imageMetadata:
// case EmbeddingMetadata embeddingMetadata: result.AppendLine($"{sseEvent.Content}");
// // Verarbeitung für EmbeddingMetadata break;
// break;
default: default:
// Wenn der Metadaten-Typ nicht erkannt wird, füge nur den Content hinzu
result.AppendLine(sseEvent.Content); result.AppendLine(sseEvent.Content);
break; break;
} }
} }
else if (!string.IsNullOrEmpty(sseEvent.Content)) else if (!string.IsNullOrEmpty(sseEvent.Content))
{ {
// Falls nur Content vorhanden ist
result.AppendLine(sseEvent.Content); result.AppendLine(sseEvent.Content);
} }
else if (string.IsNullOrEmpty(sseEvent.Content)) else if (string.IsNullOrEmpty(sseEvent.Content))
@ -73,90 +56,28 @@ public static class SseHandler
result.AppendLine(); result.AppendLine();
} }
// Asynchrone Verarbeitung, falls erforderlich await Task.CompletedTask;
await Task.CompletedTask; // Placeholder für asynchrone Operationen
return result.ToString(); return result.ToString();
} }
private static async Task HandleMetadataAsync(Settings.DataModel.Metadata metadata, StringBuilder resultBuilder, Dictionary<string, List<string>> images, ILogger logger) private static void ProcessImageSegment(PptxImageData pptxImageData, Dictionary<string, List<string>> images, StringBuilder resultBuilder, ILogger logger)
{ {
switch (metadata) if (string.IsNullOrEmpty(pptxImageData.Id) || string.IsNullOrEmpty(pptxImageData.Content))
{
case TextMetadata textMetadata:
// Für Textdateien: Zeilennummer hinzufügen
resultBuilder.AppendLine();
if (textMetadata.Text != null)
{
var lineNumber = textMetadata.Text.LineNumber;
resultBuilder.AppendLine($"[Zeile {lineNumber}]");
}
break;
case PdfMetadata pdfMetadata:
// Für PDF: Seitennummer und Umbruch
var pageNumber = pdfMetadata.Pdf.PageNumber;
resultBuilder.AppendLine();
resultBuilder.AppendLine($"[Seite {pageNumber}]");
break;
case SpreadsheetMetadata spreadsheetMetadata:
// Für Tabellen: Arbeitsblattname und Zeilennummer
var sheetName = spreadsheetMetadata.Spreadsheet.SheetName;
var rowNumber = spreadsheetMetadata.Spreadsheet.RowNumber;
resultBuilder.AppendLine();
resultBuilder.AppendLine($"[Tabelle: {sheetName}, Zeile: {rowNumber}]");
break;
case PresentationMetadata presentationMetadata:
// Für Präsentationen: Foliennummer und ggf. Bild
var slideNumber = presentationMetadata.Presentation.SlideNumber;
resultBuilder.AppendLine();
resultBuilder.AppendLine($"[Folie {slideNumber}]");
if (presentationMetadata.Presentation.Image != null)
{
ProcessImageSegment(presentationMetadata.Presentation.Image, images, resultBuilder, logger);
}
break;
case ImageMetadata _:
// Für Bilder
resultBuilder.AppendLine();
resultBuilder.AppendLine("[Bildbeschreibung]");
break;
default:
// Unbekannter Metadaten-Typ
logger?.LogWarning("Unbekannter Metadaten-Typ: {Type}", metadata.GetType().Name);
break;
}
await Task.CompletedTask;
}
private static void ProcessImageSegment(ImageData imageData, Dictionary<string, List<string>> images, StringBuilder resultBuilder, ILogger logger)
{
if (string.IsNullOrEmpty(imageData.Id) || string.IsNullOrEmpty(imageData.Content))
{ {
return; return;
} }
if (!images.ContainsKey(imageData.Id)) if (!images.ContainsKey(pptxImageData.Id))
{ {
images[imageData.Id] = new List<string>(); images[pptxImageData.Id] = new List<string>();
} }
images[imageData.Id].Add(imageData.Content); images[pptxImageData.Id].Add(pptxImageData.Content);
logger?.LogDebug("Added image segment {Segment} for image {Id}", imageData.Segment, imageData.Id); if (pptxImageData.IsEnd)
if (imageData.IsEnd)
{ {
logger?.LogDebug("Completed image {Id} with {SegmentCount} segments", imageData.Id, images[imageData.Id].Count);
resultBuilder.AppendLine("[Präsentationsbild eingebettet]"); resultBuilder.AppendLine("[Präsentationsbild eingebettet]");
// TODO
// Hier kannst du die Bilddaten weiterverarbeiten, z.B. zusammenfügen und speichern
} }
} }
} }

View File

@ -30,7 +30,7 @@ pub enum Metadata {
Pdf { page_number: usize }, Pdf { page_number: usize },
Spreadsheet { sheet_name: String, row_number: usize }, Spreadsheet { sheet_name: String, row_number: usize },
Document {}, Document {},
Image, Image {},
} }
const TO_MARKDOWN: &str = "markdown"; const TO_MARKDOWN: &str = "markdown";
@ -289,7 +289,7 @@ async fn chunk_image(file_path: &str) -> Result<ChunkStream> {
let stream = stream! { let stream = stream! {
yield Ok(Chunk { yield Ok(Chunk {
content: base64, content: base64,
metadata: Metadata::Image, metadata: Metadata::Image {},
}); });
}; };