From 32b43bbd049933c010a0f559da1281c97b51b6ba Mon Sep 17 00:00:00 2001 From: krut_ni Date: Tue, 24 Jun 2025 14:29:31 +0200 Subject: [PATCH] included parsing of pptx files; finished image segments processing --- .../Tools/Services/RustService.Retrieval.cs | 4 +- app/MindWork AI Studio/Tools/SseHandler.cs | 74 ++++++++++++++----- 2 files changed, 59 insertions(+), 19 deletions(-) diff --git a/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs b/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs index 76556089..79c65de0 100644 --- a/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs +++ b/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs @@ -48,12 +48,12 @@ public sealed partial class RustService var sseEvent = JsonSerializer.Deserialize(jsonContent); if (sseEvent != null) { - var content = await SseHandler.ProcessEventAsync(sseEvent); + var content = await SseHandler.ProcessEventAsync(sseEvent, false); resultBuilder.Append(content); eventCount++; } } - catch (JsonException ex) { return string.Empty; } + catch (JsonException) { resultBuilder.AppendLine(); } } var result = resultBuilder.ToString(); diff --git a/app/MindWork AI Studio/Tools/SseHandler.cs b/app/MindWork AI Studio/Tools/SseHandler.cs index 34777b44..a1b47101 100644 --- a/app/MindWork AI Studio/Tools/SseHandler.cs +++ b/app/MindWork AI Studio/Tools/SseHandler.cs @@ -1,11 +1,14 @@ -using System.Text; +using System.Collections.Concurrent; +using System.Text; using AIStudio.Settings.DataModel; namespace AIStudio.Tools; public static class SseHandler { - public static async Task ProcessEventAsync(SseEvent? sseEvent) + private static readonly ConcurrentDictionary> PPTX_IMAGES = new(); + + public static async Task ProcessEventAsync(SseEvent? sseEvent, bool extractImages = true) { var result = new StringBuilder(); @@ -41,6 +44,18 @@ public static class SseHandler case ImageMetadata imageMetadata: result.AppendLine($"{sseEvent.Content}"); break; + + case PresentationMetadata presentationMetadata: + var slideNumber = presentationMetadata.Presentation?.SlideNumber ?? 0; + var image = presentationMetadata.Presentation?.Image ?? null; + result.AppendLine($"{sseEvent.Content}"); + + if (image != null) + { + var isEnd = ProcessImageSegment(image); + if (isEnd && extractImages) { result.AppendLine(BuildImage(image.Id!)); } + } + break; default: result.AppendLine(sseEvent.Content); @@ -60,24 +75,49 @@ public static class SseHandler return result.ToString(); } - private static void ProcessImageSegment(PptxImageData pptxImageData, Dictionary> images, StringBuilder resultBuilder, ILogger logger) + private static bool ProcessImageSegment(PptxImageData pptxImageData) { - if (string.IsNullOrEmpty(pptxImageData.Id) || string.IsNullOrEmpty(pptxImageData.Content)) - { - return; - } + if (string.IsNullOrEmpty(pptxImageData.Id)) { return false; } - if (!images.ContainsKey(pptxImageData.Id)) - { - images[pptxImageData.Id] = new List(); - } + var id = pptxImageData.Id; + var segment = pptxImageData.Segment ?? 0; + var content = pptxImageData.Content ?? string.Empty; + var isEnd = pptxImageData.IsEnd; - images[pptxImageData.Id].Add(pptxImageData.Content); - - if (pptxImageData.IsEnd) + var imageSegment = new PptxImageData() { - resultBuilder.AppendLine("[Präsentationsbild eingebettet]"); - // TODO - } + Id = id, + Content = content, + Segment = segment, + IsEnd = isEnd, + }; + + PPTX_IMAGES.AddOrUpdate( + id, + _ => [imageSegment], + (_, existingList) => + { + existingList.Add(imageSegment); + return existingList; + } + ); + + return isEnd; + } + + private static string BuildImage(string id) + { + if (!PPTX_IMAGES.TryGetValue(id, out var imageSegments)) return string.Empty; + + var sortedSegments = imageSegments + .OrderBy(item => item.Segment) + .ToList(); + + var base64Image = string.Join(string.Empty, sortedSegments + .Where(item => item.Content != null) + .Select(item => item.Content)); + + PPTX_IMAGES.Remove(id, out _); + return base64Image; } } \ No newline at end of file