diff --git a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua
index 7afde4b6..01dbb5ca 100644
--- a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua
+++ b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua
@@ -1648,11 +1648,17 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::PROFILESELECTION::T918741365"] = "You can
-- Provider
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::PROVIDERSELECTION::T900237532"] = "Provider"
--- Use PDF content as input
-UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READPDFCONTENT::T2849276709"] = "Use PDF content as input"
+-- Images are not supported yet
+UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T298062956"] = "Images are not supported yet"
--- Select PDF file
-UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READPDFCONTENT::T63272795"] = "Select PDF file"
+-- Use file content as input
+UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T3499386973"] = "Use file content as input"
+
+-- Select file to read its content
+UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T354817589"] = "Select file to read its content"
+
+-- Executables are not allowed
+UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T4167762413"] = "Executables are not allowed"
-- The content is cleaned using an LLM agent: the main content is extracted, advertisements and other irrelevant things are attempted to be removed; relative links are attempted to be converted into absolute links so that they can be used.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READWEBCONTENT::T1164201762"] = "The content is cleaned using an LLM agent: the main content is extracted, advertisements and other irrelevant things are attempted to be removed; relative links are attempted to be converted into absolute links so that they can be used."
@@ -5482,6 +5488,9 @@ UI_TEXT_CONTENT["AISTUDIO::TOOLS::RAG::RAGPROCESSES::AISRCSELWITHRETCTXVAL::T304
-- AI source selection with AI retrieval context validation
UI_TEXT_CONTENT["AISTUDIO::TOOLS::RAG::RAGPROCESSES::AISRCSELWITHRETCTXVAL::T3775725978"] = "AI source selection with AI retrieval context validation"
+-- Executable Files
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T2217313358"] = "Executable Files"
+
-- PDF Files
UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T3108466742"] = "PDF Files"
diff --git a/app/MindWork AI Studio/Assistants/LegalCheck/AssistantLegalCheck.razor b/app/MindWork AI Studio/Assistants/LegalCheck/AssistantLegalCheck.razor
index 6ec7afad..1e3d072b 100644
--- a/app/MindWork AI Studio/Assistants/LegalCheck/AssistantLegalCheck.razor
+++ b/app/MindWork AI Studio/Assistants/LegalCheck/AssistantLegalCheck.razor
@@ -6,7 +6,7 @@
}
-
+
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Assistants/TextSummarizer/AssistantTextSummarizer.razor b/app/MindWork AI Studio/Assistants/TextSummarizer/AssistantTextSummarizer.razor
index 277767ef..267982e1 100644
--- a/app/MindWork AI Studio/Assistants/TextSummarizer/AssistantTextSummarizer.razor
+++ b/app/MindWork AI Studio/Assistants/TextSummarizer/AssistantTextSummarizer.razor
@@ -6,7 +6,7 @@
}
-
+
diff --git a/app/MindWork AI Studio/Assistants/Translation/AssistantTranslation.razor b/app/MindWork AI Studio/Assistants/Translation/AssistantTranslation.razor
index d7bf0e52..f8123397 100644
--- a/app/MindWork AI Studio/Assistants/Translation/AssistantTranslation.razor
+++ b/app/MindWork AI Studio/Assistants/Translation/AssistantTranslation.razor
@@ -6,7 +6,8 @@
}
-
+
+
@if (this.liveTranslation)
{
diff --git a/app/MindWork AI Studio/Components/ReadPDFContent.razor b/app/MindWork AI Studio/Components/ReadFileContent.razor
similarity index 77%
rename from app/MindWork AI Studio/Components/ReadPDFContent.razor
rename to app/MindWork AI Studio/Components/ReadFileContent.razor
index bd101740..f9e33fc8 100644
--- a/app/MindWork AI Studio/Components/ReadPDFContent.razor
+++ b/app/MindWork AI Studio/Components/ReadFileContent.razor
@@ -1,4 +1,4 @@
@inherits MSGComponentBase
- @T("Use PDF content as input")
+ @T("Use file content as input")
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Components/ReadFileContent.razor.cs b/app/MindWork AI Studio/Components/ReadFileContent.razor.cs
new file mode 100644
index 00000000..f01b8bc8
--- /dev/null
+++ b/app/MindWork AI Studio/Components/ReadFileContent.razor.cs
@@ -0,0 +1,45 @@
+using AIStudio.Tools.Rust;
+using AIStudio.Tools.Services;
+
+using Microsoft.AspNetCore.Components;
+
+namespace AIStudio.Components;
+
+public partial class ReadFileContent : MSGComponentBase
+{
+ [Parameter]
+ public string FileContent { get; set; } = string.Empty;
+
+ [Parameter]
+ public EventCallback FileContentChanged { get; set; }
+
+ [Inject]
+ private RustService RustService { get; init; } = null!;
+
+ private async Task SelectFile()
+ {
+ var selectedFile = await this.RustService.SelectFile(T("Select file to read its content"));
+ if (selectedFile.UserCancelled)
+ return;
+
+ if(!File.Exists(selectedFile.SelectedFilePath))
+ return;
+
+ var ext = Path.GetExtension(selectedFile.SelectedFilePath).TrimStart('.');
+ if (Array.Exists(FileTypeFilter.Executables.FilterExtensions, x => x.Equals(ext, StringComparison.OrdinalIgnoreCase)))
+ {
+ await MessageBus.INSTANCE.SendError(new(Icons.Material.Filled.AppBlocking, T("Executables are not allowed")));
+ return;
+ }
+
+ if (Array.Exists(FileTypeFilter.AllImages.FilterExtensions, x => x.Equals(ext, StringComparison.OrdinalIgnoreCase)))
+ {
+ await MessageBus.INSTANCE.SendWarning(new(Icons.Material.Filled.ImageNotSupported, T("Images are not supported yet")));
+ return;
+ }
+
+ var streamId = Guid.NewGuid().ToString();
+ var fileContent = await this.RustService.ReadArbitraryFileData(selectedFile.SelectedFilePath, streamId, int.MaxValue);
+ await this.FileContentChanged.InvokeAsync(fileContent);
+ }
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Components/ReadPDFContent.razor.cs b/app/MindWork AI Studio/Components/ReadPDFContent.razor.cs
deleted file mode 100644
index ab050bd3..00000000
--- a/app/MindWork AI Studio/Components/ReadPDFContent.razor.cs
+++ /dev/null
@@ -1,31 +0,0 @@
-using AIStudio.Tools.Rust;
-using AIStudio.Tools.Services;
-
-using Microsoft.AspNetCore.Components;
-
-namespace AIStudio.Components;
-
-public partial class ReadPDFContent : MSGComponentBase
-{
- [Parameter]
- public string PDFContent { get; set; } = string.Empty;
-
- [Parameter]
- public EventCallback PDFContentChanged { get; set; }
-
- [Inject]
- private RustService RustService { get; init; } = null!;
-
- private async Task SelectFile()
- {
- var pdfFile = await this.RustService.SelectFile(T("Select PDF file"), FileTypeFilter.PDF);
- if (pdfFile.UserCancelled)
- return;
-
- if(!File.Exists(pdfFile.SelectedFilePath))
- return;
-
- var pdfText = await this.RustService.GetPDFText(pdfFile.SelectedFilePath);
- await this.PDFContentChanged.InvokeAsync(pdfText);
- }
-}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua b/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua
index 80959004..e5599727 100644
--- a/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua
+++ b/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua
@@ -1650,11 +1650,17 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::PROFILESELECTION::T918741365"] = "Hier k
-- Provider
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::PROVIDERSELECTION::T900237532"] = "Anbieter"
--- Use PDF content as input
-UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READPDFCONTENT::T2849276709"] = "PDF-Inhalt als Eingabe verwenden"
+-- Images are not supported yet
+UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T298062956"] = "Bilder werden derzeit nicht unterstützt"
--- Select PDF file
-UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READPDFCONTENT::T63272795"] = "PDF-Datei auswählen"
+-- Use file content as input
+UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T3499386973"] = "Dokumenteninhalt als Eingabe verwenden"
+
+-- Select file to read its content
+UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T354817589"] = "Datei auswählen, um den Inhalt zu lesen"
+
+-- Executables are not allowed
+UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T4167762413"] = "Ausführbare Dateien sind nicht erlaubt"
-- The content is cleaned using an LLM agent: the main content is extracted, advertisements and other irrelevant things are attempted to be removed; relative links are attempted to be converted into absolute links so that they can be used.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READWEBCONTENT::T1164201762"] = "Der Inhalt wird mithilfe eines LLM-Agents bereinigt: Der Hauptinhalt wird extrahiert, Werbung und andere irrelevante Elemente werden nach Möglichkeit entfernt. Relative Links werden nach Möglichkeit in absolute Links umgewandelt, damit sie verwendet werden können."
@@ -5484,6 +5490,9 @@ UI_TEXT_CONTENT["AISTUDIO::TOOLS::RAG::RAGPROCESSES::AISRCSELWITHRETCTXVAL::T304
-- AI-based data source selection with AI retrieval context validation
UI_TEXT_CONTENT["AISTUDIO::TOOLS::RAG::RAGPROCESSES::AISRCSELWITHRETCTXVAL::T3775725978"] = "KI-basierte Datenquellen-Auswahl mit Validierung des Abrufkontexts"
+-- Executable Files
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T2217313358"] = "Ausführbare Dateien"
+
-- PDF Files
UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T3108466742"] = "PDF-Dateien"
diff --git a/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua b/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua
index 81f8f11c..3cbe2009 100644
--- a/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua
+++ b/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua
@@ -1650,11 +1650,17 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::PROFILESELECTION::T918741365"] = "You can
-- Provider
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::PROVIDERSELECTION::T900237532"] = "Provider"
--- Use PDF content as input
-UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READPDFCONTENT::T2849276709"] = "Use PDF content as input"
+-- Images are not supported yet
+UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T298062956"] = "Images are not supported yet"
--- Select PDF file
-UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READPDFCONTENT::T63272795"] = "Select PDF file"
+-- Use file content as input
+UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T3499386973"] = "Use file content as input"
+
+-- Select file to read its content
+UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T354817589"] = "Select file to read its content"
+
+-- Executables are not allowed
+UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T4167762413"] = "Executables are not allowed"
-- The content is cleaned using an LLM agent: the main content is extracted, advertisements and other irrelevant things are attempted to be removed; relative links are attempted to be converted into absolute links so that they can be used.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READWEBCONTENT::T1164201762"] = "The content is cleaned using an LLM agent: the main content is extracted, advertisements and other irrelevant things are attempted to be removed; relative links are attempted to be converted into absolute links so that they can be used."
@@ -5484,6 +5490,9 @@ UI_TEXT_CONTENT["AISTUDIO::TOOLS::RAG::RAGPROCESSES::AISRCSELWITHRETCTXVAL::T304
-- AI-based data source selection with AI retrieval context validation
UI_TEXT_CONTENT["AISTUDIO::TOOLS::RAG::RAGPROCESSES::AISRCSELWITHRETCTXVAL::T3775725978"] = "AI-based data source selection with AI retrieval context validation"
+-- Executable Files
+UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T2217313358"] = "Executable Files"
+
-- PDF Files
UI_TEXT_CONTENT["AISTUDIO::TOOLS::RUST::FILETYPEFILTER::T3108466742"] = "PDF Files"
diff --git a/app/MindWork AI Studio/Tools/ContentStreamDocumentMetadata.cs b/app/MindWork AI Studio/Tools/ContentStreamDocumentMetadata.cs
new file mode 100644
index 00000000..4b21faeb
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/ContentStreamDocumentMetadata.cs
@@ -0,0 +1,4 @@
+namespace AIStudio.Tools;
+
+// ReSharper disable ClassNeverInstantiated.Global
+public sealed class ContentStreamDocumentMetadata : ContentStreamSseMetadata;
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/ContentStreamImageMetadata.cs b/app/MindWork AI Studio/Tools/ContentStreamImageMetadata.cs
new file mode 100644
index 00000000..0f08d295
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/ContentStreamImageMetadata.cs
@@ -0,0 +1,4 @@
+namespace AIStudio.Tools;
+
+// ReSharper disable ClassNeverInstantiated.Global
+public sealed class ContentStreamImageMetadata: ContentStreamSseMetadata;
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/ContentStreamMetadataJsonConverter.cs b/app/MindWork AI Studio/Tools/ContentStreamMetadataJsonConverter.cs
new file mode 100644
index 00000000..e3308c78
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/ContentStreamMetadataJsonConverter.cs
@@ -0,0 +1,32 @@
+using System.Text.Json;
+using System.Text.Json.Serialization;
+
+namespace AIStudio.Tools;
+
+public sealed class ContentStreamMetadataJsonConverter : JsonConverter
+{
+ public override ContentStreamSseMetadata? Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
+ {
+ using var jsonDoc = JsonDocument.ParseValue(ref reader);
+ var root = jsonDoc.RootElement;
+ var rawText = root.GetRawText();
+
+ var propertyName = root.EnumerateObject()
+ .Select(p => p.Name)
+ .FirstOrDefault();
+
+ return propertyName switch
+ {
+ "Text" => JsonSerializer.Deserialize(rawText, options),
+ "Pdf" => JsonSerializer.Deserialize(rawText, options),
+ "Spreadsheet" => JsonSerializer.Deserialize(rawText, options),
+ "Presentation" => JsonSerializer.Deserialize(rawText, options),
+ "Image" => JsonSerializer.Deserialize(rawText, options),
+ "Document" => JsonSerializer.Deserialize(rawText, options),
+
+ _ => null
+ };
+ }
+
+ public override void Write(Utf8JsonWriter writer, ContentStreamSseMetadata value, JsonSerializerOptions options) => JsonSerializer.Serialize(writer, value, value.GetType(), options);
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/ContentStreamPdfDetails.cs b/app/MindWork AI Studio/Tools/ContentStreamPdfDetails.cs
new file mode 100644
index 00000000..dd727ce2
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/ContentStreamPdfDetails.cs
@@ -0,0 +1,11 @@
+using System.Text.Json.Serialization;
+
+namespace AIStudio.Tools;
+
+// ReSharper disable UnusedAutoPropertyAccessor.Global
+// ReSharper disable ClassNeverInstantiated.Global
+public sealed class ContentStreamPdfDetails
+{
+ [JsonPropertyName("page_number")]
+ public int? PageNumber { get; init; }
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/ContentStreamPdfMetadata.cs b/app/MindWork AI Studio/Tools/ContentStreamPdfMetadata.cs
new file mode 100644
index 00000000..172adf44
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/ContentStreamPdfMetadata.cs
@@ -0,0 +1,11 @@
+using System.Text.Json.Serialization;
+
+namespace AIStudio.Tools;
+
+// ReSharper disable UnusedAutoPropertyAccessor.Global
+// ReSharper disable ClassNeverInstantiated.Global
+public sealed class ContentStreamPdfMetadata : ContentStreamSseMetadata
+{
+ [JsonPropertyName("Pdf")]
+ public ContentStreamPdfDetails? Pdf { get; init; }
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/ContentStreamPptxImageData.cs b/app/MindWork AI Studio/Tools/ContentStreamPptxImageData.cs
new file mode 100644
index 00000000..9cc85eab
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/ContentStreamPptxImageData.cs
@@ -0,0 +1,18 @@
+using System.Text.Json.Serialization;
+
+namespace AIStudio.Tools;
+
+public sealed class ContentStreamPptxImageData
+{
+ [JsonPropertyName("id")]
+ public string? Id { get; init; }
+
+ [JsonPropertyName("content")]
+ public string? Content { get; init; }
+
+ [JsonPropertyName("segment")]
+ public int? Segment { get; init; }
+
+ [JsonPropertyName("is_end")]
+ public bool IsEnd { get; init; }
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/ContentStreamPresentationDetails.cs b/app/MindWork AI Studio/Tools/ContentStreamPresentationDetails.cs
new file mode 100644
index 00000000..e0b39fb8
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/ContentStreamPresentationDetails.cs
@@ -0,0 +1,14 @@
+using System.Text.Json.Serialization;
+
+namespace AIStudio.Tools;
+
+// ReSharper disable UnusedAutoPropertyAccessor.Global
+// ReSharper disable ClassNeverInstantiated.Global
+public sealed class ContentStreamPresentationDetails
+{
+ [JsonPropertyName("slide_number")]
+ public int? SlideNumber { get; init; }
+
+ [JsonPropertyName("image")]
+ public ContentStreamPptxImageData? Image { get; init; }
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/ContentStreamPresentationMetadata.cs b/app/MindWork AI Studio/Tools/ContentStreamPresentationMetadata.cs
new file mode 100644
index 00000000..3ce7305e
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/ContentStreamPresentationMetadata.cs
@@ -0,0 +1,11 @@
+using System.Text.Json.Serialization;
+
+namespace AIStudio.Tools;
+
+// ReSharper disable UnusedAutoPropertyAccessor.Global
+// ReSharper disable ClassNeverInstantiated.Global
+public sealed class ContentStreamPresentationMetadata : ContentStreamSseMetadata
+{
+ [JsonPropertyName("Presentation")]
+ public ContentStreamPresentationDetails? Presentation { get; init; }
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/ContentStreamSpreadsheetDetails.cs b/app/MindWork AI Studio/Tools/ContentStreamSpreadsheetDetails.cs
new file mode 100644
index 00000000..71727799
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/ContentStreamSpreadsheetDetails.cs
@@ -0,0 +1,14 @@
+using System.Text.Json.Serialization;
+
+namespace AIStudio.Tools;
+
+// ReSharper disable UnusedAutoPropertyAccessor.Global
+// ReSharper disable ClassNeverInstantiated.Global
+public sealed class ContentStreamSpreadsheetDetails
+{
+ [JsonPropertyName("sheet_name")]
+ public string? SheetName { get; init; }
+
+ [JsonPropertyName("row_number")]
+ public int? RowNumber { get; init; }
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/ContentStreamSpreadsheetMetadata.cs b/app/MindWork AI Studio/Tools/ContentStreamSpreadsheetMetadata.cs
new file mode 100644
index 00000000..2a8452c1
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/ContentStreamSpreadsheetMetadata.cs
@@ -0,0 +1,11 @@
+using System.Text.Json.Serialization;
+
+namespace AIStudio.Tools;
+
+// ReSharper disable UnusedAutoPropertyAccessor.Global
+// ReSharper disable ClassNeverInstantiated.Global
+public sealed class ContentStreamSpreadsheetMetadata : ContentStreamSseMetadata
+{
+ [JsonPropertyName("Spreadsheet")]
+ public ContentStreamSpreadsheetDetails? Spreadsheet { get; init; }
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/ContentStreamSseEvent.cs b/app/MindWork AI Studio/Tools/ContentStreamSseEvent.cs
new file mode 100644
index 00000000..2c47551f
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/ContentStreamSseEvent.cs
@@ -0,0 +1,15 @@
+using System.Text.Json.Serialization;
+
+namespace AIStudio.Tools;
+
+public sealed class ContentStreamSseEvent
+{
+ [JsonPropertyName("content")]
+ public string? Content { get; init; }
+
+ [JsonPropertyName("stream_id")]
+ public string? StreamId { get; init; }
+
+ [JsonPropertyName("metadata")]
+ public ContentStreamSseMetadata? Metadata { get; init; }
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/ContentStreamSseHandler.cs b/app/MindWork AI Studio/Tools/ContentStreamSseHandler.cs
new file mode 100644
index 00000000..ba640f01
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/ContentStreamSseHandler.cs
@@ -0,0 +1,121 @@
+using System.Collections.Concurrent;
+using System.Text;
+
+namespace AIStudio.Tools;
+
+public static class ContentStreamSseHandler
+{
+ private static readonly ConcurrentDictionary> CHUNKED_IMAGES = new();
+ private static readonly ConcurrentDictionary CURRENT_SLIDE_NUMBERS = new();
+
+ public static string ProcessEvent(ContentStreamSseEvent? sseEvent, bool extractImages = true)
+ {
+ switch (sseEvent)
+ {
+ case { Content: not null, Metadata: not null }:
+ switch (sseEvent.Metadata)
+ {
+ case ContentStreamTextMetadata:
+ return $"{sseEvent.Content}\n";
+
+ case ContentStreamPdfMetadata pdfMetadata:
+ var pageNumber = pdfMetadata.Pdf?.PageNumber ?? 0;
+ return $"# Page {pageNumber}\n{sseEvent.Content}";
+
+ case ContentStreamSpreadsheetMetadata spreadsheetMetadata:
+ var sheetName = spreadsheetMetadata.Spreadsheet?.SheetName;
+ var rowNumber = spreadsheetMetadata.Spreadsheet?.RowNumber;
+ var spreadSheetResult = new StringBuilder();
+ if (rowNumber == 1)
+ spreadSheetResult.AppendLine($"\n# {sheetName}");
+
+ spreadSheetResult.AppendLine($"{sseEvent.Content}");
+ return spreadSheetResult.ToString();
+
+ case ContentStreamDocumentMetadata:
+ case ContentStreamImageMetadata:
+ return $"{sseEvent.Content}";
+
+ case ContentStreamPresentationMetadata presentationMetadata:
+ var slideNumber = presentationMetadata.Presentation?.SlideNumber ?? 0;
+ var image = presentationMetadata.Presentation?.Image ?? null;
+ var presentationResult = new StringBuilder();
+ var streamId = sseEvent.StreamId;
+
+ CURRENT_SLIDE_NUMBERS.TryGetValue(streamId!, out var currentSlideNumber);
+
+ if (slideNumber != currentSlideNumber)
+ presentationResult.AppendLine($"# Slide {slideNumber}");
+
+ presentationResult.Append($"{sseEvent.Content}");
+
+ if (image is not null)
+ {
+ var imageId = $"{streamId}-{image.Id!}";
+ var isEnd = ProcessImageSegment(imageId, image);
+ if (isEnd && extractImages)
+ presentationResult.AppendLine(BuildImage(imageId));
+ }
+
+ CURRENT_SLIDE_NUMBERS[streamId!] = slideNumber;
+
+ return presentationResult.ToString();
+ default:
+ return sseEvent.Content;
+ }
+
+ case { Content: not null, Metadata: null }:
+ return sseEvent.Content;
+
+ default:
+ return string.Empty;
+ }
+ }
+
+ private static bool ProcessImageSegment(string imageId, ContentStreamPptxImageData contentStreamPptxImageData)
+ {
+ if (string.IsNullOrWhiteSpace(contentStreamPptxImageData.Id) || string.IsNullOrWhiteSpace(imageId))
+ return false;
+
+ var segment = contentStreamPptxImageData.Segment ?? 0;
+ var content = contentStreamPptxImageData.Content ?? string.Empty;
+ var isEnd = contentStreamPptxImageData.IsEnd;
+
+ var imageSegment = new ContentStreamPptxImageData
+ {
+ Id = imageId,
+ Content = content,
+ Segment = segment,
+ IsEnd = isEnd,
+ };
+
+ CHUNKED_IMAGES.AddOrUpdate(
+ imageId,
+ _ => [imageSegment],
+ (_, existingList) =>
+ {
+ existingList.Add(imageSegment);
+ return existingList;
+ }
+ );
+
+ return isEnd;
+ }
+
+ private static string BuildImage(string id)
+ {
+ if (!CHUNKED_IMAGES.TryGetValue(id, out var imageSegments))
+ return string.Empty;
+
+ var sortedSegments = imageSegments
+ .OrderBy(item => item.Segment)
+ .ToList();
+
+ var base64Image = string.Join(string.Empty, sortedSegments
+ .Where(item => item.Content != null)
+ .Select(item => item.Content));
+
+ CHUNKED_IMAGES.Remove(id, out _);
+ return base64Image;
+ }
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/ContentStreamSseMetadata.cs b/app/MindWork AI Studio/Tools/ContentStreamSseMetadata.cs
new file mode 100644
index 00000000..70da325b
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/ContentStreamSseMetadata.cs
@@ -0,0 +1,6 @@
+using System.Text.Json.Serialization;
+
+namespace AIStudio.Tools;
+
+[JsonConverter(typeof(ContentStreamMetadataJsonConverter))]
+public abstract class ContentStreamSseMetadata;
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/ContentStreamTextDetails.cs b/app/MindWork AI Studio/Tools/ContentStreamTextDetails.cs
new file mode 100644
index 00000000..25d1f6a5
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/ContentStreamTextDetails.cs
@@ -0,0 +1,10 @@
+using System.Text.Json.Serialization;
+
+namespace AIStudio.Tools;
+
+// ReSharper disable ClassNeverInstantiated.Global
+public sealed class ContentStreamTextDetails
+{
+ [JsonPropertyName("line_number")]
+ public int? LineNumber { get; init; }
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/ContentStreamTextMetadata.cs b/app/MindWork AI Studio/Tools/ContentStreamTextMetadata.cs
new file mode 100644
index 00000000..b9d30495
--- /dev/null
+++ b/app/MindWork AI Studio/Tools/ContentStreamTextMetadata.cs
@@ -0,0 +1,10 @@
+using System.Text.Json.Serialization;
+
+namespace AIStudio.Tools;
+
+// ReSharper disable ClassNeverInstantiated.Global
+public sealed class ContentStreamTextMetadata : ContentStreamSseMetadata
+{
+ [JsonPropertyName("Text")]
+ public ContentStreamTextDetails? Text { get; init; }
+}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/EnterpriseEnvironment.cs b/app/MindWork AI Studio/Tools/EnterpriseEnvironment.cs
index fd7e3fbe..952ec3b2 100644
--- a/app/MindWork AI Studio/Tools/EnterpriseEnvironment.cs
+++ b/app/MindWork AI Studio/Tools/EnterpriseEnvironment.cs
@@ -4,5 +4,5 @@ namespace AIStudio.Tools;
public readonly record struct EnterpriseEnvironment(string ConfigurationServerUrl, Guid ConfigurationId, EntityTagHeaderValue? ETag)
{
- public bool IsActive => !string.IsNullOrEmpty(this.ConfigurationServerUrl) && this.ConfigurationId != Guid.Empty;
+ public bool IsActive => !string.IsNullOrWhiteSpace(this.ConfigurationServerUrl) && this.ConfigurationId != Guid.Empty;
}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/Rust/FileTypeFilter.cs b/app/MindWork AI Studio/Tools/Rust/FileTypeFilter.cs
index e542cec6..bf0ac5b2 100644
--- a/app/MindWork AI Studio/Tools/Rust/FileTypeFilter.cs
+++ b/app/MindWork AI Studio/Tools/Rust/FileTypeFilter.cs
@@ -20,4 +20,6 @@ public readonly record struct FileTypeFilter(string FilterName, string[] FilterE
public static FileTypeFilter AllOffice => new(TB("All Office Files"), ["docx", "xlsx", "pptx", "doc", "xls", "ppt", "pdf"]);
public static FileTypeFilter AllImages => new(TB("All Image Files"), ["jpg", "jpeg", "png", "gif", "bmp", "tiff"]);
+
+ public static FileTypeFilter Executables => new(TB("Executable Files"), ["exe", "app", "bin", "appimage"]);
}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs b/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs
index 4a201564..3d43eb72 100644
--- a/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs
+++ b/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs
@@ -1,16 +1,52 @@
+using System.Text;
+using System.Text.Json;
+
namespace AIStudio.Tools.Services;
public sealed partial class RustService
{
- public async Task GetPDFText(string filePath)
+ public async Task ReadArbitraryFileData(string path, string streamId, int maxChunks)
{
- var response = await this.http.GetAsync($"/retrieval/fs/read/pdf?file_path={filePath}");
- if (!response.IsSuccessStatusCode)
- {
- this.logger!.LogError($"Failed to read the PDF file due to an network error: '{response.StatusCode}'");
- return string.Empty;
- }
+ var requestUri = $"/retrieval/fs/extract?path={Uri.EscapeDataString(path)}&stream_id={streamId}";
+ var request = new HttpRequestMessage(HttpMethod.Get, requestUri);
+ var response = await this.http.SendAsync(request, HttpCompletionOption.ResponseHeadersRead);
- return await response.Content.ReadAsStringAsync();
+ if (!response.IsSuccessStatusCode)
+ return string.Empty;
+
+ await using var stream = await response.Content.ReadAsStreamAsync();
+ using var reader = new StreamReader(stream);
+
+ var resultBuilder = new StringBuilder();
+ var chunkCount = 0;
+
+ while (!reader.EndOfStream && chunkCount < maxChunks)
+ {
+ var line = await reader.ReadLineAsync();
+ if (string.IsNullOrWhiteSpace(line))
+ continue;
+
+ if (!line.StartsWith("data:", StringComparison.InvariantCulture))
+ continue;
+
+ var jsonContent = line[5..];
+
+ try
+ {
+ var sseEvent = JsonSerializer.Deserialize(jsonContent);
+ if (sseEvent is not null)
+ {
+ var content = ContentStreamSseHandler.ProcessEvent(sseEvent, false);
+ resultBuilder.Append(content);
+ chunkCount++;
+ }
+ }
+ catch (JsonException)
+ {
+ this.logger?.LogError("Failed to deserialize SSE event: {JsonContent}", jsonContent);
+ }
+ }
+
+ return resultBuilder.ToString();
}
}
\ No newline at end of file
diff --git a/runtime/src/file_data.rs b/runtime/src/file_data.rs
index d75194e3..7333f963 100644
--- a/runtime/src/file_data.rs
+++ b/runtime/src/file_data.rs
@@ -23,16 +23,35 @@ use tokio_stream::wrappers::ReceiverStream;
#[derive(Debug, Serialize)]
pub struct Chunk {
pub content: String,
+ pub stream_id: String,
pub metadata: Metadata,
}
+impl Chunk {
+ pub fn new(content: String, metadata: Metadata) -> Self {
+ Chunk { content, stream_id: String::new(), metadata }
+ }
+
+ pub fn set_stream_id(&mut self, stream_id: &str) { self.stream_id = stream_id.to_string(); }
+}
#[derive(Debug, Serialize)]
pub enum Metadata {
- Text { line_number: usize },
- Pdf { page_number: usize },
- Spreadsheet { sheet_name: String, row_number: usize },
- Document,
- Image,
+ Text {
+ line_number: usize
+ },
+
+ Pdf {
+ page_number: usize
+ },
+
+ Spreadsheet {
+ sheet_name: String,
+ row_number: usize,
+ },
+
+ Document {},
+ Image {},
+
Presentation {
slide_number: u32,
image: Option,
@@ -61,18 +80,23 @@ const IMAGE_SEGMENT_SIZE_IN_CHARS: usize = 8_192; // equivalent to ~ 5500 token
type Result = std::result::Result>;
type ChunkStream = Pin> + Send>>;
-#[get("/retrieval/fs/extract?")]
-pub async fn extract_data(_token: APIToken, path: String, mut end: Shutdown) -> EventStream![] {
+#[get("/retrieval/fs/extract?&")]
+pub async fn extract_data(_token: APIToken, path: String, stream_id: String, mut end: Shutdown) -> EventStream![] {
EventStream! {
let stream_result = stream_data(&path).await;
+ let id_ref = &stream_id;
+
match stream_result {
Ok(mut stream) => {
loop {
let chunk = select! {
chunk = stream.next() => match chunk {
- Some(Ok(chunk)) => chunk,
+ Some(Ok(mut chunk)) => {
+ chunk.set_stream_id(id_ref);
+ chunk
+ },
Some(Err(e)) => {
- yield Event::json(&format!("Error: {}", e));
+ yield Event::json(&format!("Error: {e}"));
break;
},
None => break,
@@ -85,7 +109,7 @@ pub async fn extract_data(_token: APIToken, path: String, mut end: Shutdown) ->
},
Err(e) => {
- yield Event::json(&format!("Error starting stream: {}", e));
+ yield Event::json(&format!("Error starting stream: {e}"));
}
}
}
@@ -160,40 +184,16 @@ async fn stream_text_file(file_path: &str) -> Result {
let stream = stream! {
while let Ok(Some(line)) = lines.next_line().await {
line_number += 1;
- yield Ok(Chunk {
- content: line,
- metadata: Metadata::Text { line_number },
- });
+ yield Ok(Chunk::new(
+ line,
+ Metadata::Text { line_number }
+ ));
}
};
Ok(Box::pin(stream))
}
-#[get("/retrieval/fs/read/pdf?")]
-pub fn read_pdf(_token: APIToken, file_path: String) -> String {
- let pdfium = Pdfium::ai_studio_init();
- let doc = match pdfium.load_pdf_from_file(&file_path, None) {
- Ok(document) => document,
- Err(e) => return e.to_string(),
- };
-
- let mut pdf_content = String::new();
- for page in doc.pages().iter() {
- let content = match page.text().map(|text_content| text_content.all()) {
- Ok(content) => content,
- Err(_) => {
- continue
- }
- };
-
- pdf_content.push_str(&content);
- pdf_content.push_str("\n\n");
- }
-
- pdf_content
-}
-
async fn stream_pdf(file_path: &str) -> Result {
let path = file_path.to_owned();
let (tx, rx) = mpsc::channel(10);
@@ -217,10 +217,10 @@ async fn stream_pdf(file_path: &str) -> Result {
}
};
- if tx.blocking_send(Ok(Chunk {
- content,
- metadata: Metadata::Pdf { page_number: num_page + 1 },
- })).is_err() {
+ if tx.blocking_send(Ok(Chunk::new(
+ content,
+ Metadata::Pdf { page_number: num_page + 1 }
+ ))).is_err() {
break;
}
}
@@ -257,13 +257,13 @@ async fn stream_spreadsheet_as_csv(file_path: &str) -> Result {
.collect::>()
.join(",");
- if tx.blocking_send(Ok(Chunk {
+ if tx.blocking_send(Ok(Chunk::new(
content,
- metadata: Metadata::Spreadsheet {
+ Metadata::Spreadsheet {
sheet_name: sheet_name.clone(),
row_number: row_idx + 1,
- },
- })).is_err() {
+ }
+ ))).is_err() {
return;
}
}
@@ -288,10 +288,10 @@ async fn convert_with_pandoc(
let stream = stream! {
if output.status.success() {
match String::from_utf8(output.stdout.clone()) {
- Ok(content) => yield Ok(Chunk {
+ Ok(content) => yield Ok(Chunk::new(
content,
- metadata: Metadata::Document,
- }),
+ Metadata::Document {}
+ )),
Err(e) => yield Err(e.into()),
}
} else {
@@ -310,10 +310,10 @@ async fn chunk_image(file_path: &str) -> Result {
let base64 = general_purpose::STANDARD.encode(&data);
let stream = stream! {
- yield Ok(Chunk {
- content: base64,
- metadata: Metadata::Image,
- });
+ yield Ok(Chunk::new(
+ base64,
+ Metadata::Image {},
+ ));
};
Ok(Box::pin(stream))
@@ -340,13 +340,13 @@ async fn stream_pptx(file_path: &str) -> Result {
match slide_result {
Ok(slide) => {
if let Some(md_content) = slide.convert_to_md() {
- let chunk = Chunk {
- content: md_content,
- metadata: Metadata::Presentation {
+ let chunk = Chunk::new(
+ md_content,
+ Metadata::Presentation {
slide_number: slide.slide_number,
image: None,
- },
- };
+ }
+ );
if tx.send(Ok(chunk)).await.is_err() {
break;
@@ -373,13 +373,13 @@ async fn stream_pptx(file_path: &str) -> Result {
is_end
);
- let chunk = Chunk {
- content: String::new(),
- metadata: Metadata::Presentation {
+ let chunk = Chunk::new(
+ String::new(),
+ Metadata::Presentation {
slide_number: slide.slide_number,
image: Some(base64_image),
- },
- };
+ }
+ );
if tx.send(Ok(chunk)).await.is_err() {
break;
diff --git a/runtime/src/runtime_api.rs b/runtime/src/runtime_api.rs
index eece5973..b700af5b 100644
--- a/runtime/src/runtime_api.rs
+++ b/runtime/src/runtime_api.rs
@@ -82,7 +82,6 @@ pub fn start_runtime_api() {
crate::environment::delete_enterprise_env_config_id,
crate::environment::read_enterprise_env_config_server_url,
crate::file_data::extract_data,
- crate::file_data::read_pdf,
crate::log::get_log_paths,
])
.ignite().await.unwrap()