From 6d1ecb7678f7917997a4080c6908eb4bb6d935c5 Mon Sep 17 00:00:00 2001 From: Thorsten Sommer Date: Tue, 1 Jul 2025 18:34:14 +0200 Subject: [PATCH] Improved PowerPoint implementation for reading slide data (#517) --- .../Tools/ContentStreamSseHandler.cs | 59 +++++----- app/MindWork AI Studio/Tools/ISlideContent.cs | 3 + .../Tools/Services/RustService.Retrieval.cs | 63 ++++++----- app/MindWork AI Studio/Tools/Slide.cs | 10 ++ .../Tools/SlideImageContent.cs | 8 ++ app/MindWork AI Studio/Tools/SlideManager.cs | 106 ++++++++++++++++++ .../Tools/SlideTextContent.cs | 8 ++ runtime/Cargo.lock | 4 +- runtime/Cargo.toml | 2 +- 9 files changed, 207 insertions(+), 56 deletions(-) create mode 100644 app/MindWork AI Studio/Tools/ISlideContent.cs create mode 100644 app/MindWork AI Studio/Tools/Slide.cs create mode 100644 app/MindWork AI Studio/Tools/SlideImageContent.cs create mode 100644 app/MindWork AI Studio/Tools/SlideManager.cs create mode 100644 app/MindWork AI Studio/Tools/SlideTextContent.cs diff --git a/app/MindWork AI Studio/Tools/ContentStreamSseHandler.cs b/app/MindWork AI Studio/Tools/ContentStreamSseHandler.cs index 4090d49b..247d3ebf 100644 --- a/app/MindWork AI Studio/Tools/ContentStreamSseHandler.cs +++ b/app/MindWork AI Studio/Tools/ContentStreamSseHandler.cs @@ -6,7 +6,7 @@ namespace AIStudio.Tools; public static class ContentStreamSseHandler { private static readonly ConcurrentDictionary> CHUNKED_IMAGES = new(); - private static readonly ConcurrentDictionary CURRENT_SLIDE_NUMBERS = new(); + private static readonly ConcurrentDictionary SLIDE_MANAGERS = new(); public static string? ProcessEvent(ContentStreamSseEvent? sseEvent, bool extractImages = true) { @@ -44,31 +44,13 @@ public static class ContentStreamSseHandler return sseEvent.Content; case ContentStreamPresentationMetadata presentationMetadata: - var slideNumber = presentationMetadata.Presentation?.SlideNumber ?? 0; - var image = presentationMetadata.Presentation?.Image ?? null; - var presentationResult = new StringBuilder(); - var streamId = sseEvent.StreamId; + var slideManager = SLIDE_MANAGERS.GetOrAdd( + sseEvent.StreamId!, + _ => new() + ); - CURRENT_SLIDE_NUMBERS.TryGetValue(streamId!, out var currentSlideNumber); - if (slideNumber != currentSlideNumber) - { - presentationResult.AppendLine(); - presentationResult.AppendLine($"# Slide {slideNumber}"); - } - - if(!string.IsNullOrWhiteSpace(sseEvent.Content)) - presentationResult.AppendLine(sseEvent.Content); - - if (extractImages && image is not null) - { - var imageId = $"{streamId}-{image.Id!}"; - var isEnd = ProcessImageSegment(imageId, image); - if (isEnd && extractImages) - presentationResult.AppendLine(BuildImage(imageId)); - } - - CURRENT_SLIDE_NUMBERS[streamId!] = slideNumber; - return presentationResult.Length is 0 ? null : presentationResult.ToString(); + slideManager.AddSlide(presentationMetadata, sseEvent.Content, extractImages); + return null; default: return sseEvent.Content; @@ -81,8 +63,8 @@ public static class ContentStreamSseHandler return null; } } - - private static bool ProcessImageSegment(string imageId, ContentStreamPptxImageData contentStreamPptxImageData) + + public static bool ProcessImageSegment(string imageId, ContentStreamPptxImageData contentStreamPptxImageData) { if (string.IsNullOrWhiteSpace(contentStreamPptxImageData.Id) || string.IsNullOrWhiteSpace(imageId)) return false; @@ -112,7 +94,7 @@ public static class ContentStreamSseHandler return isEnd; } - private static string BuildImage(string id) + public static string BuildImage(string id) { if (!CHUNKED_IMAGES.TryGetValue(id, out var imageSegments)) return string.Empty; @@ -128,4 +110,25 @@ public static class ContentStreamSseHandler CHUNKED_IMAGES.Remove(id, out _); return base64Image; } + + public static string? Clear(string streamId) + { + if (string.IsNullOrWhiteSpace(streamId)) + return null; + + var finalContentChunk = new StringBuilder(); + if(SLIDE_MANAGERS.TryGetValue(streamId, out var slideManager)) + { + var result = slideManager.GetAllSlidesInOrder(); + if (!string.IsNullOrWhiteSpace(result)) + finalContentChunk.Append(result); + } + + SLIDE_MANAGERS.TryRemove(streamId, out _); + var imageIdPrefix = $"{streamId}-"; + foreach (var key in CHUNKED_IMAGES.Keys.Where(k => k.StartsWith(imageIdPrefix, StringComparison.InvariantCultureIgnoreCase))) + CHUNKED_IMAGES.TryRemove(key, out _); + + return finalContentChunk.Length > 0 ? finalContentChunk.ToString() : null; + } } \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/ISlideContent.cs b/app/MindWork AI Studio/Tools/ISlideContent.cs new file mode 100644 index 00000000..52e96001 --- /dev/null +++ b/app/MindWork AI Studio/Tools/ISlideContent.cs @@ -0,0 +1,3 @@ +namespace AIStudio.Tools; + +public interface ISlideContent; \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs b/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs index cdd8e0cf..6d63f022 100644 --- a/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs +++ b/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs @@ -15,39 +15,52 @@ public sealed partial class RustService if (!response.IsSuccessStatusCode) return string.Empty; - await using var stream = await response.Content.ReadAsStreamAsync(); - using var reader = new StreamReader(stream); - var resultBuilder = new StringBuilder(); - var chunkCount = 0; - while (!reader.EndOfStream && chunkCount < maxChunks) + try { - var line = await reader.ReadLineAsync(); - if (string.IsNullOrWhiteSpace(line)) - continue; - - if (!line.StartsWith("data:", StringComparison.InvariantCulture)) - continue; - - var jsonContent = line[5..]; + await using var stream = await response.Content.ReadAsStreamAsync(); + using var reader = new StreamReader(stream); + var chunkCount = 0; - try + while (!reader.EndOfStream && chunkCount < maxChunks) { - var sseEvent = JsonSerializer.Deserialize(jsonContent); - if (sseEvent is not null) + var line = await reader.ReadLineAsync(); + if (string.IsNullOrWhiteSpace(line)) + continue; + + if (!line.StartsWith("data:", StringComparison.InvariantCulture)) + continue; + + var jsonContent = line[5..]; + + try { - var content = ContentStreamSseHandler.ProcessEvent(sseEvent, extractImages); - if(content is not null) - resultBuilder.AppendLine(content); - - chunkCount++; + var sseEvent = JsonSerializer.Deserialize(jsonContent); + if (sseEvent is not null) + { + var content = ContentStreamSseHandler.ProcessEvent(sseEvent, extractImages); + if (content is not null) + resultBuilder.AppendLine(content); + + chunkCount++; + } + } + catch (JsonException) + { + this.logger?.LogError("Failed to deserialize SSE event: {JsonContent}", jsonContent); } } - catch (JsonException) - { - this.logger?.LogError("Failed to deserialize SSE event: {JsonContent}", jsonContent); - } + } + catch(Exception e) + { + this.logger?.LogError(e, "Error reading file data from stream: {Path}", path); + } + finally + { + var finalContentChunk = ContentStreamSseHandler.Clear(streamId); + if (!string.IsNullOrWhiteSpace(finalContentChunk)) + resultBuilder.AppendLine(finalContentChunk); } return resultBuilder.ToString(); diff --git a/app/MindWork AI Studio/Tools/Slide.cs b/app/MindWork AI Studio/Tools/Slide.cs new file mode 100644 index 00000000..d071cf7e --- /dev/null +++ b/app/MindWork AI Studio/Tools/Slide.cs @@ -0,0 +1,10 @@ +namespace AIStudio.Tools; + +public sealed class Slide +{ + public bool Delivered { get; set; } + + public int Position { get; init; } + + public List Content { get; } = new(); +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/SlideImageContent.cs b/app/MindWork AI Studio/Tools/SlideImageContent.cs new file mode 100644 index 00000000..ec261436 --- /dev/null +++ b/app/MindWork AI Studio/Tools/SlideImageContent.cs @@ -0,0 +1,8 @@ +using System.Text; + +namespace AIStudio.Tools; + +public sealed class SlideImageContent(string base64Image) : ISlideContent +{ + public StringBuilder Base64Image => new(base64Image); +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/SlideManager.cs b/app/MindWork AI Studio/Tools/SlideManager.cs new file mode 100644 index 00000000..47407bd1 --- /dev/null +++ b/app/MindWork AI Studio/Tools/SlideManager.cs @@ -0,0 +1,106 @@ +using System.Text; + +namespace AIStudio.Tools; + +public sealed class SlideManager +{ + private readonly Dictionary slides = new(); + + public void AddSlide(ContentStreamPresentationMetadata metadata, string? content, bool extractImages = false) + { + var slideNumber = metadata.Presentation?.SlideNumber ?? 0; + if(slideNumber is 0) + return; + + var image = metadata.Presentation?.Image ?? null; + var addImage = false; + if (extractImages && image is not null) + { + var isEnd = ContentStreamSseHandler.ProcessImageSegment(image.Id!, image); + if (isEnd) + addImage = true; + } + + if (!this.slides.TryGetValue(slideNumber, out var slide)) + { + // + // Case: No existing slide content for this slide number. + // + + var contentBuilder = new StringBuilder(); + contentBuilder.AppendLine(); + contentBuilder.AppendLine($"# Slide {slideNumber}"); + + // Add any text content to the slide? + if(!string.IsNullOrWhiteSpace(content)) + contentBuilder.AppendLine(content); + + // + // Add the text content to the slide: + // + var slideText = new SlideTextContent(contentBuilder.ToString()); + var createdSlide = new Slide + { + Delivered = false, + Position = slideNumber + }; + + createdSlide.Content.Add(slideText); + + // + // Add image content to the slide? + // + if (addImage) + { + var img = ContentStreamSseHandler.BuildImage(image!.Id!); + var slideImage = new SlideImageContent(img); + createdSlide.Content.Add(slideImage); + } + + this.slides[slideNumber] = createdSlide; + } + else + { + // + // Case: Existing slide content for this slide number. + // + + // Add any text content? + if (!string.IsNullOrWhiteSpace(content)) + { + var textContent = slide.Content.OfType().First(); + textContent.Text.AppendLine(content); + } + + // Add any image content? + if (addImage) + { + var img = ContentStreamSseHandler.BuildImage(image!.Id!); + var slideImage = new SlideImageContent(img); + slide.Content.Add(slideImage); + } + } + } + + public string? GetAllSlidesInOrder() + { + var content = new StringBuilder(); + foreach (var slide in this.slides.Values.Where(s => !s.Delivered).OrderBy(s => s.Position)) + { + slide.Delivered = true; + foreach (var text in slide.Content.OfType()) + { + content.AppendLine(text.Text.ToString()); + content.AppendLine(); + } + + foreach (var image in slide.Content.OfType()) + { + content.AppendLine(image.Base64Image.ToString()); + content.AppendLine(); + } + } + + return content.Length > 0 ? content.ToString() : null; + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/SlideTextContent.cs b/app/MindWork AI Studio/Tools/SlideTextContent.cs new file mode 100644 index 00000000..0a657559 --- /dev/null +++ b/app/MindWork AI Studio/Tools/SlideTextContent.cs @@ -0,0 +1,8 @@ +using System.Text; + +namespace AIStudio.Tools; + +public sealed class SlideTextContent(string textContent) : ISlideContent +{ + public StringBuilder Text => new(textContent); +} \ No newline at end of file diff --git a/runtime/Cargo.lock b/runtime/Cargo.lock index ed49d3f2..9a468f58 100644 --- a/runtime/Cargo.lock +++ b/runtime/Cargo.lock @@ -3408,9 +3408,9 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" [[package]] name = "pptx-to-md" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e26f6df203425a22367de642b415c18f1456de2bc870fbd7d2be83d5f57ae058" +checksum = "25f7bef20173da9d560ffb6b67cba2d2b834375d0d262e5aeb86f44e069ae446" dependencies = [ "base64 0.22.1", "image 0.24.9", diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index bda2073f..2b8ec24f 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -38,7 +38,7 @@ calamine = "0.28.0" pdfium-render = "0.8.33" sys-locale = "0.3.2" cfg-if = "1.0.1" -pptx-to-md = "0.3.0" +pptx-to-md = "0.4.0" # Fixes security vulnerability downstream, where the upstream is not fixed yet: url = "2.5"