Renamed and refactored SSE metadata and event classes

This commit is contained in:
Thorsten Sommer 2025-06-30 08:46:45 +02:00
parent 1a4babc899
commit 180e55c8ae
Signed by: tsommer
GPG Key ID: 371BBA77A02C0108
18 changed files with 189 additions and 138 deletions

View File

@ -1,88 +0,0 @@
using System.Text.Json.Serialization;
namespace AIStudio.Settings.DataModel;
public class SseEvent
{
[JsonPropertyName("content")]
public string? Content { get; set; }
[JsonPropertyName("metadata")]
public Metadata? Metadata { get; set; }
}
[JsonConverter(typeof(MetadataJsonConverter))]
public abstract class Metadata;
public class TextMetadata : Metadata
{
[JsonPropertyName("Text")]
public TextDetails? Text { get; set; }
}
public class TextDetails
{
[JsonPropertyName("line_number")]
public int? LineNumber { get; set; }
}
public class PdfMetadata : Metadata
{
[JsonPropertyName("Pdf")]
public PdfDetails? Pdf { get; set; }
}
public class PdfDetails
{
[JsonPropertyName("page_number")]
public int? PageNumber { get; set; }
}
public class SpreadsheetMetadata : Metadata
{
[JsonPropertyName("Spreadsheet")]
public SpreadsheetDetails? Spreadsheet { get; set; }
}
public class SpreadsheetDetails
{
[JsonPropertyName("sheet_name")]
public string? SheetName { get; set; }
[JsonPropertyName("row_number")]
public int? RowNumber { get; set; }
}
public class DocumentMetadata : Metadata {}
public class ImageMetadata: Metadata {}
public class PresentationMetadata : Metadata
{
[JsonPropertyName("Presentation")]
public PresentationDetails? Presentation { get; set; }
}
public class PresentationDetails
{
[JsonPropertyName("slide_number")]
public int? SlideNumber { get; set; }
[JsonPropertyName("image")]
public PptxImageData? Image { get; set; }
}
public class PptxImageData
{
[JsonPropertyName("id")]
public string? Id { get; set; }
[JsonPropertyName("content")]
public string? Content { get; set; }
[JsonPropertyName("segment")]
public int? Segment { get; set; }
[JsonPropertyName("is_end")]
public bool IsEnd { get; set; }
}

View File

@ -0,0 +1,4 @@
namespace AIStudio.Tools;
// ReSharper disable ClassNeverInstantiated.Global
public sealed class ContentStreamDocumentMetadata : ContentStreamSseMetadata;

View File

@ -0,0 +1,4 @@
namespace AIStudio.Tools;
// ReSharper disable ClassNeverInstantiated.Global
public class ContentStreamImageMetadata: ContentStreamSseMetadata;

View File

@ -0,0 +1,32 @@
using System.Text.Json;
using System.Text.Json.Serialization;
namespace AIStudio.Tools;
public class ContentStreamMetadataJsonConverter : JsonConverter<ContentStreamSseMetadata>
{
public override ContentStreamSseMetadata? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
{
using var jsonDoc = JsonDocument.ParseValue(ref reader);
var root = jsonDoc.RootElement;
var rawText = root.GetRawText();
var propertyName = root.EnumerateObject()
.Select(p => p.Name)
.FirstOrDefault();
return propertyName switch
{
"Text" => JsonSerializer.Deserialize<ContentStreamTextMetadata?>(rawText, options),
"Pdf" => JsonSerializer.Deserialize<ContentStreamPdfMetadata?>(rawText, options),
"Spreadsheet" => JsonSerializer.Deserialize<ContentStreamSpreadsheetMetadata?>(rawText, options),
"Presentation" => JsonSerializer.Deserialize<ContentStreamPresentationMetadata?>(rawText, options),
"Image" => JsonSerializer.Deserialize<ContentStreamImageMetadata?>(rawText, options),
"Document" => JsonSerializer.Deserialize<ContentStreamDocumentMetadata?>(rawText, options),
_ => null
};
}
public override void Write(Utf8JsonWriter writer, ContentStreamSseMetadata value, JsonSerializerOptions options) => JsonSerializer.Serialize(writer, value, value.GetType(), options);
}

View File

@ -0,0 +1,11 @@
using System.Text.Json.Serialization;
namespace AIStudio.Tools;
// ReSharper disable UnusedAutoPropertyAccessor.Global
// ReSharper disable ClassNeverInstantiated.Global
public class ContentStreamPdfDetails
{
[JsonPropertyName("page_number")]
public int? PageNumber { get; init; }
}

View File

@ -0,0 +1,11 @@
using System.Text.Json.Serialization;
namespace AIStudio.Tools;
// ReSharper disable UnusedAutoPropertyAccessor.Global
// ReSharper disable ClassNeverInstantiated.Global
public class ContentStreamPdfMetadata : ContentStreamSseMetadata
{
[JsonPropertyName("Pdf")]
public ContentStreamPdfDetails? Pdf { get; init; }
}

View File

@ -0,0 +1,18 @@
using System.Text.Json.Serialization;
namespace AIStudio.Tools;
public class ContentStreamPptxImageData
{
[JsonPropertyName("id")]
public string? Id { get; init; }
[JsonPropertyName("content")]
public string? Content { get; init; }
[JsonPropertyName("segment")]
public int? Segment { get; init; }
[JsonPropertyName("is_end")]
public bool IsEnd { get; init; }
}

View File

@ -0,0 +1,14 @@
using System.Text.Json.Serialization;
namespace AIStudio.Tools;
// ReSharper disable UnusedAutoPropertyAccessor.Global
// ReSharper disable ClassNeverInstantiated.Global
public sealed class ContentStreamPresentationDetails
{
[JsonPropertyName("slide_number")]
public int? SlideNumber { get; init; }
[JsonPropertyName("image")]
public ContentStreamPptxImageData? Image { get; init; }
}

View File

@ -0,0 +1,11 @@
using System.Text.Json.Serialization;
namespace AIStudio.Tools;
// ReSharper disable UnusedAutoPropertyAccessor.Global
// ReSharper disable ClassNeverInstantiated.Global
public class ContentStreamPresentationMetadata : ContentStreamSseMetadata
{
[JsonPropertyName("Presentation")]
public ContentStreamPresentationDetails? Presentation { get; init; }
}

View File

@ -0,0 +1,14 @@
using System.Text.Json.Serialization;
namespace AIStudio.Tools;
// ReSharper disable UnusedAutoPropertyAccessor.Global
// ReSharper disable ClassNeverInstantiated.Global
public class ContentStreamSpreadsheetDetails
{
[JsonPropertyName("sheet_name")]
public string? SheetName { get; init; }
[JsonPropertyName("row_number")]
public int? RowNumber { get; init; }
}

View File

@ -0,0 +1,11 @@
using System.Text.Json.Serialization;
namespace AIStudio.Tools;
// ReSharper disable UnusedAutoPropertyAccessor.Global
// ReSharper disable ClassNeverInstantiated.Global
public class ContentStreamSpreadsheetMetadata : ContentStreamSseMetadata
{
[JsonPropertyName("Spreadsheet")]
public ContentStreamSpreadsheetDetails? Spreadsheet { get; init; }
}

View File

@ -0,0 +1,12 @@
using System.Text.Json.Serialization;
namespace AIStudio.Tools;
public class ContentStreamSseEvent
{
[JsonPropertyName("content")]
public string? Content { get; init; }
[JsonPropertyName("metadata")]
public ContentStreamSseMetadata? Metadata { get; init; }
}

View File

@ -3,26 +3,28 @@ using System.Text;
namespace AIStudio.Tools;
public static class SseHandler
public static class ContentStreamSseHandler
{
private static readonly ConcurrentDictionary<string, List<PptxImageData>> PPTX_IMAGES = new();
private static readonly ConcurrentDictionary<string, List<ContentStreamPptxImageData>> PPTX_IMAGES = new();
#warning We must used a ConcurrentDictionary as well for multiple parallel embeddings
private static int CURRENT_SLIDE_NUMBER;
public static string ProcessEvent(SseEvent? sseEvent, bool extractImages = true)
public static string ProcessEvent(ContentStreamSseEvent? sseEvent, bool extractImages = true)
{
switch (sseEvent)
{
case { Content: not null, Metadata: not null }:
switch (sseEvent.Metadata)
{
case TextMetadata:
case ContentStreamTextMetadata:
return $"{sseEvent.Content}";
case PdfMetadata pdfMetadata:
case ContentStreamPdfMetadata pdfMetadata:
var pageNumber = pdfMetadata.Pdf?.PageNumber ?? 0;
return $"# Page {pageNumber}\n{sseEvent.Content}";
case SpreadsheetMetadata spreadsheetMetadata:
case ContentStreamSpreadsheetMetadata spreadsheetMetadata:
var sheetName = spreadsheetMetadata.Spreadsheet?.SheetName;
var rowNumber = spreadsheetMetadata.Spreadsheet?.RowNumber;
var spreadSheetResult = new StringBuilder();
@ -32,11 +34,11 @@ public static class SseHandler
spreadSheetResult.AppendLine($"{sseEvent.Content}");
return spreadSheetResult.ToString();
case DocumentMetadata:
case ImageMetadata:
case ContentStreamDocumentMetadata:
case ContentStreamImageMetadata:
return $"{sseEvent.Content}";
case PresentationMetadata presentationMetadata:
case ContentStreamPresentationMetadata presentationMetadata:
var slideNumber = presentationMetadata.Presentation?.SlideNumber ?? 0;
var image = presentationMetadata.Presentation?.Image ?? null;
var presentationResult = new StringBuilder();
@ -67,17 +69,18 @@ public static class SseHandler
}
}
private static bool ProcessImageSegment(PptxImageData pptxImageData)
private static bool ProcessImageSegment(ContentStreamPptxImageData contentStreamPptxImageData)
{
if (string.IsNullOrWhiteSpace(pptxImageData.Id))
if (string.IsNullOrWhiteSpace(contentStreamPptxImageData.Id))
return false;
var id = pptxImageData.Id;
var segment = pptxImageData.Segment ?? 0;
var content = pptxImageData.Content ?? string.Empty;
var isEnd = pptxImageData.IsEnd;
#warning Image IDs must be unique across all parallel embeddings. Use a GUID or similar as prefix.
var id = contentStreamPptxImageData.Id;
var segment = contentStreamPptxImageData.Segment ?? 0;
var content = contentStreamPptxImageData.Content ?? string.Empty;
var isEnd = contentStreamPptxImageData.IsEnd;
var imageSegment = new PptxImageData
var imageSegment = new ContentStreamPptxImageData
{
Id = id,
Content = content,

View File

@ -0,0 +1,6 @@
using System.Text.Json.Serialization;
namespace AIStudio.Tools;
[JsonConverter(typeof(ContentStreamMetadataJsonConverter))]
public abstract class ContentStreamSseMetadata;

View File

@ -0,0 +1,10 @@
using System.Text.Json.Serialization;
namespace AIStudio.Tools;
// ReSharper disable ClassNeverInstantiated.Global
public class ContentStreamTextDetails
{
[JsonPropertyName("line_number")]
public int? LineNumber { get; init; }
}

View File

@ -0,0 +1,10 @@
using System.Text.Json.Serialization;
namespace AIStudio.Tools;
// ReSharper disable ClassNeverInstantiated.Global
public class ContentStreamTextMetadata : ContentStreamSseMetadata
{
[JsonPropertyName("Text")]
public ContentStreamTextDetails? Text { get; init; }
}

View File

@ -1,32 +0,0 @@
using System.Text.Json;
using System.Text.Json.Serialization;
namespace AIStudio.Tools;
public class MetadataJsonConverter : JsonConverter<SseMetadata>
{
public override SseMetadata? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
{
using var jsonDoc = JsonDocument.ParseValue(ref reader);
var root = jsonDoc.RootElement;
var rawText = root.GetRawText();
var propertyName = root.EnumerateObject()
.Select(p => p.Name)
.FirstOrDefault();
return propertyName switch
{
"Text" => JsonSerializer.Deserialize<TextMetadata?>(rawText, options),
"Pdf" => JsonSerializer.Deserialize<PdfMetadata?>(rawText, options),
"Spreadsheet" => JsonSerializer.Deserialize<SpreadsheetMetadata?>(rawText, options),
"Presentation" => JsonSerializer.Deserialize<PresentationMetadata?>(rawText, options),
"Image" => JsonSerializer.Deserialize<ImageMetadata?>(rawText, options),
"Document" => JsonSerializer.Deserialize<DocumentMetadata?>(rawText, options),
_ => null
};
}
public override void Write(Utf8JsonWriter writer, SseMetadata value, JsonSerializerOptions options) => JsonSerializer.Serialize(writer, value, value.GetType(), options);
}

View File

@ -33,10 +33,10 @@ public sealed partial class RustService
try
{
var sseEvent = JsonSerializer.Deserialize<SseEvent>(jsonContent);
var sseEvent = JsonSerializer.Deserialize<ContentStreamSseEvent>(jsonContent);
if (sseEvent is not null)
{
var content = SseHandler.ProcessEvent(sseEvent, false);
var content = ContentStreamSseHandler.ProcessEvent(sseEvent, false);
resultBuilder.Append(content);
chunkCount++;
}