mirror of
https://github.com/MindWorkAI/AI-Studio.git
synced 2025-07-28 05:42:57 +00:00
finalize event handler and retrieval logic
This commit is contained in:
parent
53f8257a01
commit
52866c34dd
@ -5476,6 +5476,9 @@ UI_TEXT_CONTENT["AISTUDIO::TOOLS::RAG::RAGPROCESSES::AISRCSELWITHRETCTXVAL::T304
|
|||||||
-- AI source selection with AI retrieval context validation
|
-- AI source selection with AI retrieval context validation
|
||||||
UI_TEXT_CONTENT["AISTUDIO::TOOLS::RAG::RAGPROCESSES::AISRCSELWITHRETCTXVAL::T3775725978"] = "AI source selection with AI retrieval context validation"
|
UI_TEXT_CONTENT["AISTUDIO::TOOLS::RAG::RAGPROCESSES::AISRCSELWITHRETCTXVAL::T3775725978"] = "AI source selection with AI retrieval context validation"
|
||||||
|
|
||||||
|
-- Executable Files
|
||||||
|
UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T2217313358"] = "Executable Files"
|
||||||
|
|
||||||
-- PDF Files
|
-- PDF Files
|
||||||
UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T3108466742"] = "PDF Files"
|
UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T3108466742"] = "PDF Files"
|
||||||
|
|
||||||
|
@ -21,67 +21,42 @@ public sealed partial class RustService
|
|||||||
public async Task<string> ReadArbitraryFileData(string path, int maxEvents)
|
public async Task<string> ReadArbitraryFileData(string path, int maxEvents)
|
||||||
{
|
{
|
||||||
var requestUri = $"/retrieval/fs/extract?path={Uri.EscapeDataString(path)}";
|
var requestUri = $"/retrieval/fs/extract?path={Uri.EscapeDataString(path)}";
|
||||||
this.logger?.LogInformation("The encoded path is: '{Path}'", requestUri);
|
|
||||||
|
|
||||||
var request = new HttpRequestMessage(HttpMethod.Get, requestUri);
|
var request = new HttpRequestMessage(HttpMethod.Get, requestUri);
|
||||||
var response = await this.http.SendAsync(request, HttpCompletionOption.ResponseHeadersRead);
|
var response = await this.http.SendAsync(request, HttpCompletionOption.ResponseHeadersRead);
|
||||||
|
|
||||||
this.logger?.LogInformation("Response received: {StatusCode}", response.StatusCode);
|
|
||||||
|
|
||||||
if (!response.IsSuccessStatusCode)
|
if (!response.IsSuccessStatusCode) { return string.Empty; }
|
||||||
{
|
|
||||||
this.logger?.LogError("Fehler beim Empfangen des SSE-Streams: {ResponseStatusCode}", response.StatusCode);
|
|
||||||
return string.Empty;
|
|
||||||
}
|
|
||||||
|
|
||||||
await using var stream = await response.Content.ReadAsStreamAsync();
|
await using var stream = await response.Content.ReadAsStreamAsync();
|
||||||
using var reader = new StreamReader(stream);
|
using var reader = new StreamReader(stream);
|
||||||
|
|
||||||
var resultBuilder = new StringBuilder();
|
var resultBuilder = new StringBuilder();
|
||||||
var eventCount = 0;
|
var eventCount = 0;
|
||||||
var images = new Dictionary<string, List<string>>();
|
|
||||||
|
|
||||||
this.logger?.LogInformation("Starting to read SSE events");
|
|
||||||
|
|
||||||
while (!reader.EndOfStream && eventCount < maxEvents)
|
while (!reader.EndOfStream && eventCount < maxEvents)
|
||||||
{
|
{
|
||||||
var line = await reader.ReadLineAsync();
|
var line = await reader.ReadLineAsync();
|
||||||
if (string.IsNullOrEmpty(line))
|
|
||||||
{
|
|
||||||
continue; // SSE Format trennt Events durch leere Zeilen
|
|
||||||
}
|
|
||||||
|
|
||||||
if (line.StartsWith("data:"))
|
if (string.IsNullOrEmpty(line)) continue;
|
||||||
{
|
if (!line.StartsWith("data:")) continue;
|
||||||
var jsonContent = line[5..];
|
|
||||||
|
|
||||||
try
|
var jsonContent = line[5..];
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var sseEvent = JsonSerializer.Deserialize<SseEvent>(jsonContent);
|
||||||
|
if (sseEvent != null)
|
||||||
{
|
{
|
||||||
var sseEvent = JsonSerializer.Deserialize<SseEvent>(jsonContent);
|
var content = await SseHandler.ProcessEventAsync(sseEvent);
|
||||||
if (sseEvent != null)
|
resultBuilder.Append(content);
|
||||||
{
|
eventCount++;
|
||||||
var content = await SseHandler.ProcessEventAsync(sseEvent);
|
|
||||||
resultBuilder.Append(content);
|
|
||||||
eventCount++;
|
|
||||||
this.logger?.LogDebug("Processed event {Count}:\t{Content}", eventCount, line);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (JsonException ex)
|
|
||||||
{
|
|
||||||
this.logger?.LogWarning("Failed to parse JSON data: {Error}\nLine: {Line}", ex.Message, line);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
catch (JsonException ex) { return string.Empty; }
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var result = resultBuilder.ToString();
|
var result = resultBuilder.ToString();
|
||||||
this.logger?.LogInformation("Finished reading. Total events: {Count}, Result length: {Length} chars",
|
|
||||||
eventCount, result.Length);
|
|
||||||
|
|
||||||
if (images.Count > 0)
|
|
||||||
{
|
|
||||||
this.logger?.LogInformation("Extracted {Count} images", images.Count);
|
|
||||||
// Hier könntest du die Bilder weiterverarbeiten
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -1,71 +1,54 @@
|
|||||||
using System.Text;
|
using System.Text;
|
||||||
using AIStudio.Settings.DataModel;
|
using AIStudio.Settings.DataModel;
|
||||||
using _Imports = MudExtensions._Imports;
|
|
||||||
|
|
||||||
namespace AIStudio.Tools;
|
namespace AIStudio.Tools;
|
||||||
|
|
||||||
public static class SseHandler
|
public static class SseHandler
|
||||||
{
|
{
|
||||||
// public static async Task ProcessEventAsync(SseEvent sseEvent, StringBuilder resultBuilder, Dictionary<string, List<string>> images, ILogger logger)
|
public static async Task<string> ProcessEventAsync(SseEvent? sseEvent)
|
||||||
// {
|
|
||||||
// if (sseEvent.Metadata != null)
|
|
||||||
// {
|
|
||||||
// await HandleMetadataAsync(sseEvent.Metadata, resultBuilder, images, logger);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// if (!string.IsNullOrEmpty(sseEvent.Content))
|
|
||||||
// {
|
|
||||||
// resultBuilder.Append(sseEvent.Content);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
public static async Task<string> ProcessEventAsync(SseEvent sseEvent)
|
|
||||||
{
|
{
|
||||||
var result = new StringBuilder();
|
var result = new StringBuilder();
|
||||||
|
|
||||||
if (sseEvent == null)
|
if (sseEvent == null) { return result.ToString(); }
|
||||||
{
|
|
||||||
// Falls `sseEvent` null ist, gib einen leeren String zurück oder handle es entsprechend.
|
|
||||||
return result.ToString();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Kombiniere Content und Metadata
|
if (sseEvent is { Content: not null, Metadata: not null })
|
||||||
if (sseEvent.Content != null && sseEvent.Metadata != null)
|
|
||||||
{
|
{
|
||||||
// Je nach Typ der Metadata, verarbeite entsprechend
|
|
||||||
switch (sseEvent.Metadata)
|
switch (sseEvent.Metadata)
|
||||||
{
|
{
|
||||||
case TextMetadata textMetadata:
|
case TextMetadata textMetadata:
|
||||||
var lineNumber = textMetadata.Text?.LineNumber ?? 0;
|
var lineNumber = textMetadata.Text?.LineNumber ?? 0;
|
||||||
result.AppendLine($"{lineNumber}:\t{sseEvent.Content}");
|
result.AppendLine($"{sseEvent.Content}");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PdfMetadata pdfMetadata:
|
||||||
|
var pageNumber = pdfMetadata.Pdf?.PageNumber ?? 0;
|
||||||
|
result.AppendLine($"[Page {pageNumber}]:\n{sseEvent.Content}");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SpreadsheetMetadata spreadsheetMetadata:
|
case SpreadsheetMetadata spreadsheetMetadata:
|
||||||
var sheetName = spreadsheetMetadata.Spreadsheet.SheetName;
|
var sheetName = spreadsheetMetadata.Spreadsheet?.SheetName;
|
||||||
var rowNumber = spreadsheetMetadata.Spreadsheet.RowNumber;
|
var rowNumber = spreadsheetMetadata.Spreadsheet?.RowNumber;
|
||||||
if (rowNumber == 1) { result.AppendLine($"{sheetName}");}
|
|
||||||
|
|
||||||
result.AppendLine($"{rowNumber}:\t{sseEvent.Content}");
|
if (rowNumber == 1) { result.AppendLine($"\n{sheetName}:"); }
|
||||||
|
|
||||||
|
result.AppendLine($"{sseEvent.Content}");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case DocumentMetadata documentMetadata:
|
case DocumentMetadata documentMetadata:
|
||||||
result.AppendLine($"{sseEvent.Content}");
|
result.AppendLine($"{sseEvent.Content}");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// Weitere Metadaten-Typen können hier hinzugefügt werden
|
case ImageMetadata imageMetadata:
|
||||||
// case EmbeddingMetadata embeddingMetadata:
|
result.AppendLine($"{sseEvent.Content}");
|
||||||
// // Verarbeitung für EmbeddingMetadata
|
break;
|
||||||
// break;
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
// Wenn der Metadaten-Typ nicht erkannt wird, füge nur den Content hinzu
|
|
||||||
result.AppendLine(sseEvent.Content);
|
result.AppendLine(sseEvent.Content);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (!string.IsNullOrEmpty(sseEvent.Content))
|
else if (!string.IsNullOrEmpty(sseEvent.Content))
|
||||||
{
|
{
|
||||||
// Falls nur Content vorhanden ist
|
|
||||||
result.AppendLine(sseEvent.Content);
|
result.AppendLine(sseEvent.Content);
|
||||||
}
|
}
|
||||||
else if (string.IsNullOrEmpty(sseEvent.Content))
|
else if (string.IsNullOrEmpty(sseEvent.Content))
|
||||||
@ -73,90 +56,28 @@ public static class SseHandler
|
|||||||
result.AppendLine();
|
result.AppendLine();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Asynchrone Verarbeitung, falls erforderlich
|
await Task.CompletedTask;
|
||||||
await Task.CompletedTask; // Placeholder für asynchrone Operationen
|
|
||||||
|
|
||||||
return result.ToString();
|
return result.ToString();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static async Task HandleMetadataAsync(Settings.DataModel.Metadata metadata, StringBuilder resultBuilder, Dictionary<string, List<string>> images, ILogger logger)
|
private static void ProcessImageSegment(PptxImageData pptxImageData, Dictionary<string, List<string>> images, StringBuilder resultBuilder, ILogger logger)
|
||||||
{
|
{
|
||||||
switch (metadata)
|
if (string.IsNullOrEmpty(pptxImageData.Id) || string.IsNullOrEmpty(pptxImageData.Content))
|
||||||
{
|
|
||||||
case TextMetadata textMetadata:
|
|
||||||
// Für Textdateien: Zeilennummer hinzufügen
|
|
||||||
resultBuilder.AppendLine();
|
|
||||||
if (textMetadata.Text != null)
|
|
||||||
{
|
|
||||||
var lineNumber = textMetadata.Text.LineNumber;
|
|
||||||
resultBuilder.AppendLine($"[Zeile {lineNumber}]");
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case PdfMetadata pdfMetadata:
|
|
||||||
// Für PDF: Seitennummer und Umbruch
|
|
||||||
var pageNumber = pdfMetadata.Pdf.PageNumber;
|
|
||||||
resultBuilder.AppendLine();
|
|
||||||
resultBuilder.AppendLine($"[Seite {pageNumber}]");
|
|
||||||
break;
|
|
||||||
|
|
||||||
case SpreadsheetMetadata spreadsheetMetadata:
|
|
||||||
// Für Tabellen: Arbeitsblattname und Zeilennummer
|
|
||||||
var sheetName = spreadsheetMetadata.Spreadsheet.SheetName;
|
|
||||||
var rowNumber = spreadsheetMetadata.Spreadsheet.RowNumber;
|
|
||||||
resultBuilder.AppendLine();
|
|
||||||
resultBuilder.AppendLine($"[Tabelle: {sheetName}, Zeile: {rowNumber}]");
|
|
||||||
break;
|
|
||||||
|
|
||||||
case PresentationMetadata presentationMetadata:
|
|
||||||
// Für Präsentationen: Foliennummer und ggf. Bild
|
|
||||||
var slideNumber = presentationMetadata.Presentation.SlideNumber;
|
|
||||||
resultBuilder.AppendLine();
|
|
||||||
resultBuilder.AppendLine($"[Folie {slideNumber}]");
|
|
||||||
|
|
||||||
if (presentationMetadata.Presentation.Image != null)
|
|
||||||
{
|
|
||||||
ProcessImageSegment(presentationMetadata.Presentation.Image, images, resultBuilder, logger);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case ImageMetadata _:
|
|
||||||
// Für Bilder
|
|
||||||
resultBuilder.AppendLine();
|
|
||||||
resultBuilder.AppendLine("[Bildbeschreibung]");
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
// Unbekannter Metadaten-Typ
|
|
||||||
logger?.LogWarning("Unbekannter Metadaten-Typ: {Type}", metadata.GetType().Name);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
await Task.CompletedTask;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void ProcessImageSegment(ImageData imageData, Dictionary<string, List<string>> images, StringBuilder resultBuilder, ILogger logger)
|
|
||||||
{
|
|
||||||
if (string.IsNullOrEmpty(imageData.Id) || string.IsNullOrEmpty(imageData.Content))
|
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!images.ContainsKey(imageData.Id))
|
if (!images.ContainsKey(pptxImageData.Id))
|
||||||
{
|
{
|
||||||
images[imageData.Id] = new List<string>();
|
images[pptxImageData.Id] = new List<string>();
|
||||||
}
|
}
|
||||||
|
|
||||||
images[imageData.Id].Add(imageData.Content);
|
images[pptxImageData.Id].Add(pptxImageData.Content);
|
||||||
|
|
||||||
logger?.LogDebug("Added image segment {Segment} for image {Id}", imageData.Segment, imageData.Id);
|
if (pptxImageData.IsEnd)
|
||||||
|
|
||||||
if (imageData.IsEnd)
|
|
||||||
{
|
{
|
||||||
logger?.LogDebug("Completed image {Id} with {SegmentCount} segments", imageData.Id, images[imageData.Id].Count);
|
|
||||||
resultBuilder.AppendLine("[Präsentationsbild eingebettet]");
|
resultBuilder.AppendLine("[Präsentationsbild eingebettet]");
|
||||||
|
// TODO
|
||||||
// Hier kannst du die Bilddaten weiterverarbeiten, z.B. zusammenfügen und speichern
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -30,7 +30,7 @@ pub enum Metadata {
|
|||||||
Pdf { page_number: usize },
|
Pdf { page_number: usize },
|
||||||
Spreadsheet { sheet_name: String, row_number: usize },
|
Spreadsheet { sheet_name: String, row_number: usize },
|
||||||
Document {},
|
Document {},
|
||||||
Image,
|
Image {},
|
||||||
}
|
}
|
||||||
|
|
||||||
const TO_MARKDOWN: &str = "markdown";
|
const TO_MARKDOWN: &str = "markdown";
|
||||||
@ -289,7 +289,7 @@ async fn chunk_image(file_path: &str) -> Result<ChunkStream> {
|
|||||||
let stream = stream! {
|
let stream = stream! {
|
||||||
yield Ok(Chunk {
|
yield Ok(Chunk {
|
||||||
content: base64,
|
content: base64,
|
||||||
metadata: Metadata::Image,
|
metadata: Metadata::Image {},
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user