From 180e55c8ae3a29e0c96c2fac33da4ecf67b6e56a Mon Sep 17 00:00:00 2001 From: Thorsten Sommer Date: Mon, 30 Jun 2025 08:46:45 +0200 Subject: [PATCH] Renamed and refactored SSE metadata and event classes --- .../Settings/DataModel/SseEvent.cs | 88 ------------------- .../Tools/ContentStreamDocumentMetadata.cs | 4 + .../Tools/ContentStreamImageMetadata.cs | 4 + .../ContentStreamMetadataJsonConverter.cs | 32 +++++++ .../Tools/ContentStreamPdfDetails.cs | 11 +++ .../Tools/ContentStreamPdfMetadata.cs | 11 +++ .../Tools/ContentStreamPptxImageData.cs | 18 ++++ .../Tools/ContentStreamPresentationDetails.cs | 14 +++ .../ContentStreamPresentationMetadata.cs | 11 +++ .../Tools/ContentStreamSpreadsheetDetails.cs | 14 +++ .../Tools/ContentStreamSpreadsheetMetadata.cs | 11 +++ .../Tools/ContentStreamSseEvent.cs | 12 +++ ...eHandler.cs => ContentStreamSseHandler.cs} | 35 ++++---- .../Tools/ContentStreamSseMetadata.cs | 6 ++ .../Tools/ContentStreamTextDetails.cs | 10 +++ .../Tools/ContentStreamTextMetadata.cs | 10 +++ .../Tools/MetadataJsonConverter.cs | 32 ------- .../Tools/Services/RustService.Retrieval.cs | 4 +- 18 files changed, 189 insertions(+), 138 deletions(-) delete mode 100644 app/MindWork AI Studio/Settings/DataModel/SseEvent.cs create mode 100644 app/MindWork AI Studio/Tools/ContentStreamDocumentMetadata.cs create mode 100644 app/MindWork AI Studio/Tools/ContentStreamImageMetadata.cs create mode 100644 app/MindWork AI Studio/Tools/ContentStreamMetadataJsonConverter.cs create mode 100644 app/MindWork AI Studio/Tools/ContentStreamPdfDetails.cs create mode 100644 app/MindWork AI Studio/Tools/ContentStreamPdfMetadata.cs create mode 100644 app/MindWork AI Studio/Tools/ContentStreamPptxImageData.cs create mode 100644 app/MindWork AI Studio/Tools/ContentStreamPresentationDetails.cs create mode 100644 app/MindWork AI Studio/Tools/ContentStreamPresentationMetadata.cs create mode 100644 app/MindWork AI Studio/Tools/ContentStreamSpreadsheetDetails.cs create mode 100644 app/MindWork AI Studio/Tools/ContentStreamSpreadsheetMetadata.cs create mode 100644 app/MindWork AI Studio/Tools/ContentStreamSseEvent.cs rename app/MindWork AI Studio/Tools/{SseHandler.cs => ContentStreamSseHandler.cs} (71%) create mode 100644 app/MindWork AI Studio/Tools/ContentStreamSseMetadata.cs create mode 100644 app/MindWork AI Studio/Tools/ContentStreamTextDetails.cs create mode 100644 app/MindWork AI Studio/Tools/ContentStreamTextMetadata.cs delete mode 100644 app/MindWork AI Studio/Tools/MetadataJsonConverter.cs diff --git a/app/MindWork AI Studio/Settings/DataModel/SseEvent.cs b/app/MindWork AI Studio/Settings/DataModel/SseEvent.cs deleted file mode 100644 index a1aa4295..00000000 --- a/app/MindWork AI Studio/Settings/DataModel/SseEvent.cs +++ /dev/null @@ -1,88 +0,0 @@ -using System.Text.Json.Serialization; - -namespace AIStudio.Settings.DataModel; - -public class SseEvent -{ - [JsonPropertyName("content")] - public string? Content { get; set; } - - [JsonPropertyName("metadata")] - public Metadata? Metadata { get; set; } -} - -[JsonConverter(typeof(MetadataJsonConverter))] -public abstract class Metadata; - -public class TextMetadata : Metadata -{ - [JsonPropertyName("Text")] - public TextDetails? Text { get; set; } -} - -public class TextDetails -{ - [JsonPropertyName("line_number")] - public int? LineNumber { get; set; } -} - -public class PdfMetadata : Metadata -{ - [JsonPropertyName("Pdf")] - public PdfDetails? Pdf { get; set; } -} - -public class PdfDetails -{ - [JsonPropertyName("page_number")] - public int? PageNumber { get; set; } -} - -public class SpreadsheetMetadata : Metadata -{ - [JsonPropertyName("Spreadsheet")] - public SpreadsheetDetails? Spreadsheet { get; set; } -} - -public class SpreadsheetDetails -{ - [JsonPropertyName("sheet_name")] - public string? SheetName { get; set; } - - [JsonPropertyName("row_number")] - public int? RowNumber { get; set; } -} - -public class DocumentMetadata : Metadata {} - -public class ImageMetadata: Metadata {} - -public class PresentationMetadata : Metadata -{ - [JsonPropertyName("Presentation")] - public PresentationDetails? Presentation { get; set; } -} - -public class PresentationDetails -{ - [JsonPropertyName("slide_number")] - public int? SlideNumber { get; set; } - - [JsonPropertyName("image")] - public PptxImageData? Image { get; set; } -} - -public class PptxImageData -{ - [JsonPropertyName("id")] - public string? Id { get; set; } - - [JsonPropertyName("content")] - public string? Content { get; set; } - - [JsonPropertyName("segment")] - public int? Segment { get; set; } - - [JsonPropertyName("is_end")] - public bool IsEnd { get; set; } -} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/ContentStreamDocumentMetadata.cs b/app/MindWork AI Studio/Tools/ContentStreamDocumentMetadata.cs new file mode 100644 index 00000000..4b21faeb --- /dev/null +++ b/app/MindWork AI Studio/Tools/ContentStreamDocumentMetadata.cs @@ -0,0 +1,4 @@ +namespace AIStudio.Tools; + +// ReSharper disable ClassNeverInstantiated.Global +public sealed class ContentStreamDocumentMetadata : ContentStreamSseMetadata; \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/ContentStreamImageMetadata.cs b/app/MindWork AI Studio/Tools/ContentStreamImageMetadata.cs new file mode 100644 index 00000000..faa5bcad --- /dev/null +++ b/app/MindWork AI Studio/Tools/ContentStreamImageMetadata.cs @@ -0,0 +1,4 @@ +namespace AIStudio.Tools; + +// ReSharper disable ClassNeverInstantiated.Global +public class ContentStreamImageMetadata: ContentStreamSseMetadata; \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/ContentStreamMetadataJsonConverter.cs b/app/MindWork AI Studio/Tools/ContentStreamMetadataJsonConverter.cs new file mode 100644 index 00000000..9ffec78c --- /dev/null +++ b/app/MindWork AI Studio/Tools/ContentStreamMetadataJsonConverter.cs @@ -0,0 +1,32 @@ +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace AIStudio.Tools; + +public class ContentStreamMetadataJsonConverter : JsonConverter +{ + public override ContentStreamSseMetadata? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + using var jsonDoc = JsonDocument.ParseValue(ref reader); + var root = jsonDoc.RootElement; + var rawText = root.GetRawText(); + + var propertyName = root.EnumerateObject() + .Select(p => p.Name) + .FirstOrDefault(); + + return propertyName switch + { + "Text" => JsonSerializer.Deserialize(rawText, options), + "Pdf" => JsonSerializer.Deserialize(rawText, options), + "Spreadsheet" => JsonSerializer.Deserialize(rawText, options), + "Presentation" => JsonSerializer.Deserialize(rawText, options), + "Image" => JsonSerializer.Deserialize(rawText, options), + "Document" => JsonSerializer.Deserialize(rawText, options), + + _ => null + }; + } + + public override void Write(Utf8JsonWriter writer, ContentStreamSseMetadata value, JsonSerializerOptions options) => JsonSerializer.Serialize(writer, value, value.GetType(), options); +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/ContentStreamPdfDetails.cs b/app/MindWork AI Studio/Tools/ContentStreamPdfDetails.cs new file mode 100644 index 00000000..8df25e52 --- /dev/null +++ b/app/MindWork AI Studio/Tools/ContentStreamPdfDetails.cs @@ -0,0 +1,11 @@ +using System.Text.Json.Serialization; + +namespace AIStudio.Tools; + +// ReSharper disable UnusedAutoPropertyAccessor.Global +// ReSharper disable ClassNeverInstantiated.Global +public class ContentStreamPdfDetails +{ + [JsonPropertyName("page_number")] + public int? PageNumber { get; init; } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/ContentStreamPdfMetadata.cs b/app/MindWork AI Studio/Tools/ContentStreamPdfMetadata.cs new file mode 100644 index 00000000..43e36f9c --- /dev/null +++ b/app/MindWork AI Studio/Tools/ContentStreamPdfMetadata.cs @@ -0,0 +1,11 @@ +using System.Text.Json.Serialization; + +namespace AIStudio.Tools; + +// ReSharper disable UnusedAutoPropertyAccessor.Global +// ReSharper disable ClassNeverInstantiated.Global +public class ContentStreamPdfMetadata : ContentStreamSseMetadata +{ + [JsonPropertyName("Pdf")] + public ContentStreamPdfDetails? Pdf { get; init; } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/ContentStreamPptxImageData.cs b/app/MindWork AI Studio/Tools/ContentStreamPptxImageData.cs new file mode 100644 index 00000000..b884eadf --- /dev/null +++ b/app/MindWork AI Studio/Tools/ContentStreamPptxImageData.cs @@ -0,0 +1,18 @@ +using System.Text.Json.Serialization; + +namespace AIStudio.Tools; + +public class ContentStreamPptxImageData +{ + [JsonPropertyName("id")] + public string? Id { get; init; } + + [JsonPropertyName("content")] + public string? Content { get; init; } + + [JsonPropertyName("segment")] + public int? Segment { get; init; } + + [JsonPropertyName("is_end")] + public bool IsEnd { get; init; } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/ContentStreamPresentationDetails.cs b/app/MindWork AI Studio/Tools/ContentStreamPresentationDetails.cs new file mode 100644 index 00000000..e0b39fb8 --- /dev/null +++ b/app/MindWork AI Studio/Tools/ContentStreamPresentationDetails.cs @@ -0,0 +1,14 @@ +using System.Text.Json.Serialization; + +namespace AIStudio.Tools; + +// ReSharper disable UnusedAutoPropertyAccessor.Global +// ReSharper disable ClassNeverInstantiated.Global +public sealed class ContentStreamPresentationDetails +{ + [JsonPropertyName("slide_number")] + public int? SlideNumber { get; init; } + + [JsonPropertyName("image")] + public ContentStreamPptxImageData? Image { get; init; } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/ContentStreamPresentationMetadata.cs b/app/MindWork AI Studio/Tools/ContentStreamPresentationMetadata.cs new file mode 100644 index 00000000..7e033e3b --- /dev/null +++ b/app/MindWork AI Studio/Tools/ContentStreamPresentationMetadata.cs @@ -0,0 +1,11 @@ +using System.Text.Json.Serialization; + +namespace AIStudio.Tools; + +// ReSharper disable UnusedAutoPropertyAccessor.Global +// ReSharper disable ClassNeverInstantiated.Global +public class ContentStreamPresentationMetadata : ContentStreamSseMetadata +{ + [JsonPropertyName("Presentation")] + public ContentStreamPresentationDetails? Presentation { get; init; } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/ContentStreamSpreadsheetDetails.cs b/app/MindWork AI Studio/Tools/ContentStreamSpreadsheetDetails.cs new file mode 100644 index 00000000..b23cec1a --- /dev/null +++ b/app/MindWork AI Studio/Tools/ContentStreamSpreadsheetDetails.cs @@ -0,0 +1,14 @@ +using System.Text.Json.Serialization; + +namespace AIStudio.Tools; + +// ReSharper disable UnusedAutoPropertyAccessor.Global +// ReSharper disable ClassNeverInstantiated.Global +public class ContentStreamSpreadsheetDetails +{ + [JsonPropertyName("sheet_name")] + public string? SheetName { get; init; } + + [JsonPropertyName("row_number")] + public int? RowNumber { get; init; } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/ContentStreamSpreadsheetMetadata.cs b/app/MindWork AI Studio/Tools/ContentStreamSpreadsheetMetadata.cs new file mode 100644 index 00000000..dae515f5 --- /dev/null +++ b/app/MindWork AI Studio/Tools/ContentStreamSpreadsheetMetadata.cs @@ -0,0 +1,11 @@ +using System.Text.Json.Serialization; + +namespace AIStudio.Tools; + +// ReSharper disable UnusedAutoPropertyAccessor.Global +// ReSharper disable ClassNeverInstantiated.Global +public class ContentStreamSpreadsheetMetadata : ContentStreamSseMetadata +{ + [JsonPropertyName("Spreadsheet")] + public ContentStreamSpreadsheetDetails? Spreadsheet { get; init; } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/ContentStreamSseEvent.cs b/app/MindWork AI Studio/Tools/ContentStreamSseEvent.cs new file mode 100644 index 00000000..63f05fc0 --- /dev/null +++ b/app/MindWork AI Studio/Tools/ContentStreamSseEvent.cs @@ -0,0 +1,12 @@ +using System.Text.Json.Serialization; + +namespace AIStudio.Tools; + +public class ContentStreamSseEvent +{ + [JsonPropertyName("content")] + public string? Content { get; init; } + + [JsonPropertyName("metadata")] + public ContentStreamSseMetadata? Metadata { get; init; } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/SseHandler.cs b/app/MindWork AI Studio/Tools/ContentStreamSseHandler.cs similarity index 71% rename from app/MindWork AI Studio/Tools/SseHandler.cs rename to app/MindWork AI Studio/Tools/ContentStreamSseHandler.cs index 0de71867..1b777c3d 100644 --- a/app/MindWork AI Studio/Tools/SseHandler.cs +++ b/app/MindWork AI Studio/Tools/ContentStreamSseHandler.cs @@ -3,26 +3,28 @@ using System.Text; namespace AIStudio.Tools; -public static class SseHandler +public static class ContentStreamSseHandler { - private static readonly ConcurrentDictionary> PPTX_IMAGES = new(); + private static readonly ConcurrentDictionary> PPTX_IMAGES = new(); + + #warning We must used a ConcurrentDictionary as well for multiple parallel embeddings private static int CURRENT_SLIDE_NUMBER; - public static string ProcessEvent(SseEvent? sseEvent, bool extractImages = true) + public static string ProcessEvent(ContentStreamSseEvent? sseEvent, bool extractImages = true) { switch (sseEvent) { case { Content: not null, Metadata: not null }: switch (sseEvent.Metadata) { - case TextMetadata: + case ContentStreamTextMetadata: return $"{sseEvent.Content}"; - case PdfMetadata pdfMetadata: + case ContentStreamPdfMetadata pdfMetadata: var pageNumber = pdfMetadata.Pdf?.PageNumber ?? 0; return $"# Page {pageNumber}\n{sseEvent.Content}"; - case SpreadsheetMetadata spreadsheetMetadata: + case ContentStreamSpreadsheetMetadata spreadsheetMetadata: var sheetName = spreadsheetMetadata.Spreadsheet?.SheetName; var rowNumber = spreadsheetMetadata.Spreadsheet?.RowNumber; var spreadSheetResult = new StringBuilder(); @@ -32,11 +34,11 @@ public static class SseHandler spreadSheetResult.AppendLine($"{sseEvent.Content}"); return spreadSheetResult.ToString(); - case DocumentMetadata: - case ImageMetadata: + case ContentStreamDocumentMetadata: + case ContentStreamImageMetadata: return $"{sseEvent.Content}"; - case PresentationMetadata presentationMetadata: + case ContentStreamPresentationMetadata presentationMetadata: var slideNumber = presentationMetadata.Presentation?.SlideNumber ?? 0; var image = presentationMetadata.Presentation?.Image ?? null; var presentationResult = new StringBuilder(); @@ -67,17 +69,18 @@ public static class SseHandler } } - private static bool ProcessImageSegment(PptxImageData pptxImageData) + private static bool ProcessImageSegment(ContentStreamPptxImageData contentStreamPptxImageData) { - if (string.IsNullOrWhiteSpace(pptxImageData.Id)) + if (string.IsNullOrWhiteSpace(contentStreamPptxImageData.Id)) return false; - var id = pptxImageData.Id; - var segment = pptxImageData.Segment ?? 0; - var content = pptxImageData.Content ?? string.Empty; - var isEnd = pptxImageData.IsEnd; + #warning Image IDs must be unique across all parallel embeddings. Use a GUID or similar as prefix. + var id = contentStreamPptxImageData.Id; + var segment = contentStreamPptxImageData.Segment ?? 0; + var content = contentStreamPptxImageData.Content ?? string.Empty; + var isEnd = contentStreamPptxImageData.IsEnd; - var imageSegment = new PptxImageData + var imageSegment = new ContentStreamPptxImageData { Id = id, Content = content, diff --git a/app/MindWork AI Studio/Tools/ContentStreamSseMetadata.cs b/app/MindWork AI Studio/Tools/ContentStreamSseMetadata.cs new file mode 100644 index 00000000..70da325b --- /dev/null +++ b/app/MindWork AI Studio/Tools/ContentStreamSseMetadata.cs @@ -0,0 +1,6 @@ +using System.Text.Json.Serialization; + +namespace AIStudio.Tools; + +[JsonConverter(typeof(ContentStreamMetadataJsonConverter))] +public abstract class ContentStreamSseMetadata; \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/ContentStreamTextDetails.cs b/app/MindWork AI Studio/Tools/ContentStreamTextDetails.cs new file mode 100644 index 00000000..ec240612 --- /dev/null +++ b/app/MindWork AI Studio/Tools/ContentStreamTextDetails.cs @@ -0,0 +1,10 @@ +using System.Text.Json.Serialization; + +namespace AIStudio.Tools; + +// ReSharper disable ClassNeverInstantiated.Global +public class ContentStreamTextDetails +{ + [JsonPropertyName("line_number")] + public int? LineNumber { get; init; } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/ContentStreamTextMetadata.cs b/app/MindWork AI Studio/Tools/ContentStreamTextMetadata.cs new file mode 100644 index 00000000..6b7b7738 --- /dev/null +++ b/app/MindWork AI Studio/Tools/ContentStreamTextMetadata.cs @@ -0,0 +1,10 @@ +using System.Text.Json.Serialization; + +namespace AIStudio.Tools; + +// ReSharper disable ClassNeverInstantiated.Global +public class ContentStreamTextMetadata : ContentStreamSseMetadata +{ + [JsonPropertyName("Text")] + public ContentStreamTextDetails? Text { get; init; } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/MetadataJsonConverter.cs b/app/MindWork AI Studio/Tools/MetadataJsonConverter.cs deleted file mode 100644 index 5c89798f..00000000 --- a/app/MindWork AI Studio/Tools/MetadataJsonConverter.cs +++ /dev/null @@ -1,32 +0,0 @@ -using System.Text.Json; -using System.Text.Json.Serialization; - -namespace AIStudio.Tools; - -public class MetadataJsonConverter : JsonConverter -{ - public override SseMetadata? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) - { - using var jsonDoc = JsonDocument.ParseValue(ref reader); - var root = jsonDoc.RootElement; - var rawText = root.GetRawText(); - - var propertyName = root.EnumerateObject() - .Select(p => p.Name) - .FirstOrDefault(); - - return propertyName switch - { - "Text" => JsonSerializer.Deserialize(rawText, options), - "Pdf" => JsonSerializer.Deserialize(rawText, options), - "Spreadsheet" => JsonSerializer.Deserialize(rawText, options), - "Presentation" => JsonSerializer.Deserialize(rawText, options), - "Image" => JsonSerializer.Deserialize(rawText, options), - "Document" => JsonSerializer.Deserialize(rawText, options), - - _ => null - }; - } - - public override void Write(Utf8JsonWriter writer, SseMetadata value, JsonSerializerOptions options) => JsonSerializer.Serialize(writer, value, value.GetType(), options); -} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs b/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs index 59633c06..73223b58 100644 --- a/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs +++ b/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs @@ -33,10 +33,10 @@ public sealed partial class RustService try { - var sseEvent = JsonSerializer.Deserialize(jsonContent); + var sseEvent = JsonSerializer.Deserialize(jsonContent); if (sseEvent is not null) { - var content = SseHandler.ProcessEvent(sseEvent, false); + var content = ContentStreamSseHandler.ProcessEvent(sseEvent, false); resultBuilder.Append(content); chunkCount++; }