2025-06-24 12:29:31 +00:00
|
|
|
|
using System.Collections.Concurrent;
|
|
|
|
|
using System.Text;
|
2025-06-23 12:15:49 +00:00
|
|
|
|
|
|
|
|
|
namespace AIStudio.Tools;
|
|
|
|
|
|
2025-06-30 06:46:45 +00:00
|
|
|
|
public static class ContentStreamSseHandler
|
2025-06-23 12:15:49 +00:00
|
|
|
|
{
|
2025-06-30 06:46:45 +00:00
|
|
|
|
private static readonly ConcurrentDictionary<string, List<ContentStreamPptxImageData>> PPTX_IMAGES = new();
|
|
|
|
|
|
|
|
|
|
#warning We must used a ConcurrentDictionary as well for multiple parallel embeddings
|
2025-06-24 18:08:27 +00:00
|
|
|
|
private static int CURRENT_SLIDE_NUMBER;
|
2025-06-24 12:29:31 +00:00
|
|
|
|
|
2025-06-30 06:46:45 +00:00
|
|
|
|
public static string ProcessEvent(ContentStreamSseEvent? sseEvent, bool extractImages = true)
|
2025-06-23 12:15:49 +00:00
|
|
|
|
{
|
2025-06-28 21:09:52 +00:00
|
|
|
|
switch (sseEvent)
|
2025-06-23 12:15:49 +00:00
|
|
|
|
{
|
2025-06-28 21:09:52 +00:00
|
|
|
|
case { Content: not null, Metadata: not null }:
|
|
|
|
|
switch (sseEvent.Metadata)
|
|
|
|
|
{
|
2025-06-30 06:46:45 +00:00
|
|
|
|
case ContentStreamTextMetadata:
|
2025-06-28 21:09:52 +00:00
|
|
|
|
return $"{sseEvent.Content}";
|
2025-06-23 12:15:49 +00:00
|
|
|
|
|
2025-06-30 06:46:45 +00:00
|
|
|
|
case ContentStreamPdfMetadata pdfMetadata:
|
2025-06-28 21:09:52 +00:00
|
|
|
|
var pageNumber = pdfMetadata.Pdf?.PageNumber ?? 0;
|
|
|
|
|
return $"# Page {pageNumber}\n{sseEvent.Content}";
|
2025-06-23 12:15:49 +00:00
|
|
|
|
|
2025-06-30 06:46:45 +00:00
|
|
|
|
case ContentStreamSpreadsheetMetadata spreadsheetMetadata:
|
2025-06-28 21:09:52 +00:00
|
|
|
|
var sheetName = spreadsheetMetadata.Spreadsheet?.SheetName;
|
|
|
|
|
var rowNumber = spreadsheetMetadata.Spreadsheet?.RowNumber;
|
|
|
|
|
var spreadSheetResult = new StringBuilder();
|
|
|
|
|
if (rowNumber == 1)
|
|
|
|
|
spreadSheetResult.AppendLine($"\n# {sheetName}");
|
2025-06-24 08:20:31 +00:00
|
|
|
|
|
2025-06-28 21:09:52 +00:00
|
|
|
|
spreadSheetResult.AppendLine($"{sseEvent.Content}");
|
|
|
|
|
return spreadSheetResult.ToString();
|
2025-06-23 12:15:49 +00:00
|
|
|
|
|
2025-06-30 06:46:45 +00:00
|
|
|
|
case ContentStreamDocumentMetadata:
|
|
|
|
|
case ContentStreamImageMetadata:
|
2025-06-28 21:09:52 +00:00
|
|
|
|
return $"{sseEvent.Content}";
|
2025-06-28 20:50:08 +00:00
|
|
|
|
|
2025-06-30 06:46:45 +00:00
|
|
|
|
case ContentStreamPresentationMetadata presentationMetadata:
|
2025-06-28 21:09:52 +00:00
|
|
|
|
var slideNumber = presentationMetadata.Presentation?.SlideNumber ?? 0;
|
|
|
|
|
var image = presentationMetadata.Presentation?.Image ?? null;
|
|
|
|
|
var presentationResult = new StringBuilder();
|
|
|
|
|
if (slideNumber != CURRENT_SLIDE_NUMBER)
|
|
|
|
|
presentationResult.AppendLine($"# Slide {slideNumber}");
|
|
|
|
|
|
|
|
|
|
presentationResult.Append($"{sseEvent.Content}");
|
2025-06-24 12:29:31 +00:00
|
|
|
|
|
2025-06-28 21:09:52 +00:00
|
|
|
|
if (image is not null)
|
|
|
|
|
{
|
|
|
|
|
var isEnd = ProcessImageSegment(image);
|
|
|
|
|
if (isEnd && extractImages)
|
|
|
|
|
presentationResult.AppendLine(BuildImage(image.Id!));
|
|
|
|
|
}
|
2025-06-24 18:08:27 +00:00
|
|
|
|
|
2025-06-28 21:09:52 +00:00
|
|
|
|
CURRENT_SLIDE_NUMBER = slideNumber;
|
|
|
|
|
return presentationResult.ToString();
|
2025-06-23 12:15:49 +00:00
|
|
|
|
|
2025-06-28 21:09:52 +00:00
|
|
|
|
default:
|
|
|
|
|
return sseEvent.Content;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case { Content: not null, Metadata: null }:
|
|
|
|
|
return sseEvent.Content;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
return string.Empty;
|
2025-06-23 12:15:49 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2025-06-24 08:20:31 +00:00
|
|
|
|
|
2025-06-30 06:46:45 +00:00
|
|
|
|
private static bool ProcessImageSegment(ContentStreamPptxImageData contentStreamPptxImageData)
|
2025-06-23 12:15:49 +00:00
|
|
|
|
{
|
2025-06-30 06:46:45 +00:00
|
|
|
|
if (string.IsNullOrWhiteSpace(contentStreamPptxImageData.Id))
|
2025-06-28 21:08:56 +00:00
|
|
|
|
return false;
|
2025-06-24 12:29:31 +00:00
|
|
|
|
|
2025-06-30 06:46:45 +00:00
|
|
|
|
#warning Image IDs must be unique across all parallel embeddings. Use a GUID or similar as prefix.
|
|
|
|
|
var id = contentStreamPptxImageData.Id;
|
|
|
|
|
var segment = contentStreamPptxImageData.Segment ?? 0;
|
|
|
|
|
var content = contentStreamPptxImageData.Content ?? string.Empty;
|
|
|
|
|
var isEnd = contentStreamPptxImageData.IsEnd;
|
2025-06-23 12:15:49 +00:00
|
|
|
|
|
2025-06-30 06:46:45 +00:00
|
|
|
|
var imageSegment = new ContentStreamPptxImageData
|
2025-06-23 12:15:49 +00:00
|
|
|
|
{
|
2025-06-24 12:29:31 +00:00
|
|
|
|
Id = id,
|
|
|
|
|
Content = content,
|
|
|
|
|
Segment = segment,
|
|
|
|
|
IsEnd = isEnd,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
PPTX_IMAGES.AddOrUpdate(
|
|
|
|
|
id,
|
|
|
|
|
_ => [imageSegment],
|
|
|
|
|
(_, existingList) =>
|
|
|
|
|
{
|
|
|
|
|
existingList.Add(imageSegment);
|
|
|
|
|
return existingList;
|
|
|
|
|
}
|
|
|
|
|
);
|
2025-06-23 12:15:49 +00:00
|
|
|
|
|
2025-06-24 12:29:31 +00:00
|
|
|
|
return isEnd;
|
|
|
|
|
}
|
2025-06-23 12:15:49 +00:00
|
|
|
|
|
2025-06-24 12:29:31 +00:00
|
|
|
|
private static string BuildImage(string id)
|
|
|
|
|
{
|
2025-06-28 21:10:39 +00:00
|
|
|
|
if (!PPTX_IMAGES.TryGetValue(id, out var imageSegments))
|
|
|
|
|
return string.Empty;
|
2025-06-24 12:29:31 +00:00
|
|
|
|
|
|
|
|
|
var sortedSegments = imageSegments
|
|
|
|
|
.OrderBy(item => item.Segment)
|
|
|
|
|
.ToList();
|
|
|
|
|
|
|
|
|
|
var base64Image = string.Join(string.Empty, sortedSegments
|
|
|
|
|
.Where(item => item.Content != null)
|
|
|
|
|
.Select(item => item.Content));
|
|
|
|
|
|
|
|
|
|
PPTX_IMAGES.Remove(id, out _);
|
|
|
|
|
return base64Image;
|
2025-06-23 12:15:49 +00:00
|
|
|
|
}
|
|
|
|
|
}
|