Merge branch 'refs/heads/main' into 48-Load-data-from-files-into-assistants

# Conflicts:
#	runtime/src/file_data.rs
This commit is contained in:
krut_ni 2025-06-24 10:27:15 +02:00
commit f18570634d
8 changed files with 322 additions and 28 deletions

View File

@ -4348,6 +4348,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T64689067"] = "This Rust library is use
-- installed by AI Studio
UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T833849470"] = "installed by AI Studio"
-- We use this library to be able to read PowerPoint files. This allows us to insert content from slides into prompts and take PowerPoint files into account in RAG processes. We thank Nils Kruthoff for his work on this Rust crate.
UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T855925638"] = "We use this library to be able to read PowerPoint files. This allows us to insert content from slides into prompts and take PowerPoint files into account in RAG processes. We thank Nils Kruthoff for his work on this Rust crate."
-- For some data transfers, we need to encode the data in base64. This Rust library is great for this purpose.
UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T870640199"] = "For some data transfers, we need to encode the data in base64. This Rust library is great for this purpose."

View File

@ -114,6 +114,7 @@
<ThirdPartyComponent Name="async-stream" Developer="Carl Lerche, Taiki Endo & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/tokio-rs/async-stream/blob/master/LICENSE" RepositoryUrl="https://github.com/tokio-rs/async-stream" UseCase="@T("This library is used to create asynchronous streams in Rust. It allows us to work with streams of data that can be produced asynchronously, making it easier to handle events or data that arrive over time. We use this, e.g., to stream arbitrary data from the file system to the embedding system.")"/>
<ThirdPartyComponent Name="flexi_logger" Developer="emabee & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/emabee/flexi_logger/blob/master/LICENSE-MIT" RepositoryUrl="https://github.com/emabee/flexi_logger" UseCase="@T("This Rust library is used to output the app's messages to the terminal. This is helpful during development and troubleshooting. This feature is initially invisible; when the app is started via the terminal, the messages become visible.")"/>
<ThirdPartyComponent Name="rand" Developer="Rust developers & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/rust-random/rand/blob/master/LICENSE-MIT" RepositoryUrl="https://github.com/rust-random/rand" UseCase="@T("We must generate random numbers, e.g., for securing the interprocess communication between the user interface and the runtime. The rand library is great for this purpose.")"/>
<ThirdPartyComponent Name="pptx-to-md" Developer="Nils Kruthoff & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/nilskruthoff/pptx-parser/blob/master/LICENCE-MIT" RepositoryUrl="https://github.com/nilskruthoff/pptx-parser" UseCase="@T("We use this library to be able to read PowerPoint files. This allows us to insert content from slides into prompts and take PowerPoint files into account in RAG processes. We thank Nils Kruthoff for his work on this Rust crate.")"/>
<ThirdPartyComponent Name="base64" Developer="Marshall Pierce, Alice Maz & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/marshallpierce/rust-base64/blob/master/LICENSE-MIT" RepositoryUrl="https://github.com/marshallpierce/rust-base64" UseCase="@T("For some data transfers, we need to encode the data in base64. This Rust library is great for this purpose.")"/>
<ThirdPartyComponent Name="Rust Crypto" Developer="Artyom Pavlov, Tony Arcieri, Brian Warner, Arthur Gautier, Vlad Filippov, Friedel Ziegelmayer, Nicolas Stalder & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/RustCrypto/traits/blob/master/cipher/LICENSE-MIT" RepositoryUrl="https://github.com/RustCrypto" UseCase="@T("When transferring sensitive data between Rust runtime and .NET app, we encrypt the data. We use some libraries from the Rust Crypto project for this purpose: cipher, aes, cbc, pbkdf2, hmac, and sha2. We are thankful for the great work of the Rust Crypto project.")"/>
<ThirdPartyComponent Name="rcgen" Developer="RustTLS developers, est31 & Open Source Community" LicenseName="MIT" LicenseUrl="https://github.com/rustls/rcgen/blob/main/LICENSE" RepositoryUrl="https://github.com/rustls/rcgen" UseCase="@T("For the secure communication between the user interface and the runtime, we need to create certificates. This Rust library is great for this purpose.")"/>

View File

@ -4212,7 +4212,7 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T2173617769"] = "Diese Bibliothek wird
-- For the secure communication between the user interface and the runtime, we need to create certificates. This Rust library is great for this purpose.
UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T2174764529"] = "Für die sichere Kommunikation zwischen der Benutzeroberfläche und der Laufzeit müssen wir Zertifikate erstellen. Diese Rust-Bibliothek eignet sich hervorragend dafür."
-- AI Studio runs without an enterprise configuration.
-- This is a private AI Studio installation. It runs without an enterprise configuration.
UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T2244723851"] = "Dies ist eine private AI Studio-Installation. Es wird keine Konfiguration einer Organisation verwendet."
-- OK
@ -4350,6 +4350,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T64689067"] = "Diese Rust-Bibliothek wi
-- installed by AI Studio
UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T833849470"] = "installiert von AI Studio"
-- We use this library to be able to read PowerPoint files. This allows us to insert content from slides into prompts and take PowerPoint files into account in RAG processes. We thank Nils Kruthoff for his work on this Rust crate.
UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T855925638"] = "Wir verwenden diese Bibliothek, um PowerPoint-Dateien lesen zu können. So ist es möglich, Inhalte aus Folien in Prompts einzufügen und PowerPoint-Dateien in RAG-Prozessen zu berücksichtigen. Wir danken Nils Kruthoff für seine Arbeit an diesem Rust-Crate."
-- For some data transfers, we need to encode the data in base64. This Rust library is great for this purpose.
UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T870640199"] = "Für einige Datenübertragungen müssen wir die Daten in Base64 kodieren. Diese Rust-Bibliothek eignet sich dafür hervorragend."

View File

@ -4212,7 +4212,7 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T2173617769"] = "This library is used t
-- For the secure communication between the user interface and the runtime, we need to create certificates. This Rust library is great for this purpose.
UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T2174764529"] = "For the secure communication between the user interface and the runtime, we need to create certificates. This Rust library is great for this purpose."
-- AI Studio runs without an enterprise configuration.
-- This is a private AI Studio installation. It runs without an enterprise configuration.
UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T2244723851"] = "This is a private AI Studio installation. It runs without an enterprise configuration."
-- OK
@ -4350,6 +4350,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T64689067"] = "This Rust library is use
-- installed by AI Studio
UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T833849470"] = "installed by AI Studio"
-- We use this library to be able to read PowerPoint files. This allows us to insert content from slides into prompts and take PowerPoint files into account in RAG processes. We thank Nils Kruthoff for his work on this Rust crate.
UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T855925638"] = "We use this library to be able to read PowerPoint files. This allows us to insert content from slides into prompts and take PowerPoint files into account in RAG processes. We thank Nils Kruthoff for his work on this Rust crate."
-- For some data transfers, we need to encode the data in base64. This Rust library is great for this purpose.
UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T870640199"] = "For some data transfers, we need to encode the data in base64. This Rust library is great for this purpose."

View File

@ -1,2 +1,3 @@
# v0.9.49, build 224 (2025-06-xx xx:xx UTC)
- Added a library by Nils Kruthoff (`nilskruthoff`) that allows AI Studio to read PowerPoint files. This feature is not yet available in the UI, but it will soon be available. Thanks, Nils, for that great contribution.
- Changed the timestamp display to use the local datetime format for the chats and assistants.

204
runtime/Cargo.lock generated
View File

@ -410,6 +410,25 @@ dependencies = [
"serde",
]
[[package]]
name = "bzip2"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47"
dependencies = [
"bzip2-sys",
]
[[package]]
name = "bzip2-sys"
version = "0.1.13+1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14"
dependencies = [
"cc",
"pkg-config",
]
[[package]]
name = "cairo-rs"
version = "0.15.12"
@ -632,6 +651,12 @@ dependencies = [
"web-sys",
]
[[package]]
name = "constant_time_eq"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
[[package]]
name = "convert_case"
version = "0.4.0"
@ -708,6 +733,21 @@ dependencies = [
"libc",
]
[[package]]
name = "crc"
version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675"
dependencies = [
"crc-catalog",
]
[[package]]
name = "crc-catalog"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
[[package]]
name = "crc32fast"
version = "1.4.2"
@ -864,10 +904,16 @@ dependencies = [
]
[[package]]
name = "deranged"
version = "0.3.11"
name = "deflate64"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4"
checksum = "da692b8d1080ea3045efaab14434d40468c3d8657e42abddfffca87b428f4c1b"
[[package]]
name = "deranged"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e"
dependencies = [
"powerfmt",
"serde",
@ -1155,7 +1201,7 @@ dependencies = [
"log",
"nu-ansi-term 0.50.1",
"regex",
"thiserror 2.0.9",
"thiserror 2.0.12",
]
[[package]]
@ -1453,8 +1499,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8"
dependencies = [
"cfg-if",
"js-sys",
"libc",
"wasi 0.13.3+wasi-0.2.2",
"wasm-bindgen",
"windows-targets 0.52.6",
]
@ -2142,7 +2190,13 @@ dependencies = [
"bytemuck",
"byteorder",
"color_quant",
"exr",
"gif",
"jpeg-decoder",
"num-traits",
"png",
"qoi",
"tiff",
]
[[package]]
@ -2355,6 +2409,9 @@ name = "jpeg-decoder"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f5d4a7da358eff58addd2877a45865158f0d78c911d43a5784ceb7bbf52833b0"
dependencies = [
"rayon",
]
[[package]]
name = "js-sys"
@ -2542,6 +2599,27 @@ dependencies = [
"imgref",
]
[[package]]
name = "lzma-rs"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "297e814c836ae64db86b36cf2a557ba54368d03f6afcd7d947c266692f71115e"
dependencies = [
"byteorder",
"crc",
]
[[package]]
name = "lzma-sys"
version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]]
name = "mac"
version = "0.1.1"
@ -2645,6 +2723,7 @@ dependencies = [
"openssl",
"pbkdf2",
"pdfium-render",
"pptx-to-md",
"rand 0.9.1",
"rand_chacha 0.9.0",
"rcgen",
@ -3469,6 +3548,20 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
name = "pptx-to-md"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e26f6df203425a22367de642b415c18f1456de2bc870fbd7d2be83d5f57ae058"
dependencies = [
"base64 0.22.1",
"image 0.24.9",
"rayon",
"roxmltree",
"thiserror 2.0.12",
"zip 2.5.0",
]
[[package]]
name = "ppv-lite86"
version = "0.2.17"
@ -4115,6 +4208,12 @@ dependencies = [
"uncased",
]
[[package]]
name = "roxmltree"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c20b6793b5c2fa6553b250154b78d6d0db37e72700ae35fad9387a46f487c97"
[[package]]
name = "rustc-demangle"
version = "0.1.24"
@ -4458,6 +4557,17 @@ dependencies = [
"stable_deref_trait",
]
[[package]]
name = "sha1"
version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]]
name = "sha2"
version = "0.10.8"
@ -5110,11 +5220,11 @@ dependencies = [
[[package]]
name = "thiserror"
version = "2.0.9"
version = "2.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f072643fd0190df67a8bab670c20ef5d8737177d6ac6b2e9a236cb096206b2cc"
checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708"
dependencies = [
"thiserror-impl 2.0.9",
"thiserror-impl 2.0.12",
]
[[package]]
@ -5130,9 +5240,9 @@ dependencies = [
[[package]]
name = "thiserror-impl"
version = "2.0.9"
version = "2.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b50fa271071aae2e6ee85f842e2e28ba8cd2c5fb67f11fcb1fd70b276f9e7d4"
checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d"
dependencies = [
"proc-macro2",
"quote",
@ -5162,9 +5272,9 @@ dependencies = [
[[package]]
name = "time"
version = "0.3.36"
version = "0.3.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885"
checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40"
dependencies = [
"deranged",
"itoa 1.0.11",
@ -5177,15 +5287,15 @@ dependencies = [
[[package]]
name = "time-core"
version = "0.1.2"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c"
[[package]]
name = "time-macros"
version = "0.2.18"
version = "0.2.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf"
checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49"
dependencies = [
"num-conv",
"time-core",
@ -6483,6 +6593,15 @@ dependencies = [
"rustix",
]
[[package]]
name = "xz2"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
dependencies = [
"lzma-sys",
]
[[package]]
name = "yansi"
version = "1.0.1"
@ -6571,6 +6690,20 @@ name = "zeroize"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
dependencies = [
"zeroize_derive",
]
[[package]]
name = "zeroize_derive"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.93",
]
[[package]]
name = "zerovec"
@ -6611,13 +6744,26 @@ version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "27c03817464f64e23f6f37574b4fdc8cf65925b5bfd2b0f2aedf959791941f88"
dependencies = [
"aes",
"arbitrary",
"bzip2",
"constant_time_eq",
"crc32fast",
"crossbeam-utils",
"deflate64",
"flate2",
"getrandom 0.3.1",
"hmac",
"indexmap 2.7.0",
"lzma-rs",
"memchr",
"pbkdf2",
"sha1",
"time",
"xz2",
"zeroize",
"zopfli",
"zstd",
]
[[package]]
@ -6634,6 +6780,34 @@ dependencies = [
"simd-adler32",
]
[[package]]
name = "zstd"
version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "7.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
dependencies = [
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.15+zstd.1.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237"
dependencies = [
"cc",
"pkg-config",
]
[[package]]
name = "zune-core"
version = "0.4.12"

View File

@ -38,6 +38,7 @@ calamine = "0.27.0"
pdfium-render = "0.8.31"
sys-locale = "0.3.2"
cfg-if = "1.0.0"
pptx-to-md = "0.3.0"
# Fixes security vulnerability downstream, where the upstream is not fixed yet:
url = "2.5"

View File

@ -1,22 +1,24 @@
use std::path::Path;
use std::pin::Pin;
use std::cmp::min;
use crate::api_token::APIToken;
use crate::pandoc::PandocProcessBuilder;
use crate::pdfium::PdfiumInit;
use async_stream::stream;
use base64::{engine::general_purpose, Engine as _};
use calamine::{open_workbook_auto, Reader};
use file_format::{FileFormat, Kind};
use futures::{Stream, StreamExt};
use pdfium_render::prelude::Pdfium;
use pptx_to_md::{ImageHandlingMode, ParserConfig, PptxContainer};
use rocket::get;
use rocket::response::stream::{Event, EventStream};
use rocket::serde::Serialize;
use rocket::tokio::select;
use rocket::Shutdown;
use std::path::Path;
use std::pin::Pin;
use tokio::io::AsyncBufReadExt;
use tokio::sync::mpsc;
use tokio_stream::wrappers::ReceiverStream;
use rocket::Shutdown;
use rocket::response::stream::{EventStream, Event};
use rocket::tokio::select;
use rocket::serde::Serialize;
use rocket::get;
use crate::api_token::APIToken;
use crate::pandoc::PandocProcessBuilder;
use crate::pdfium::PdfiumInit;
#[derive(Debug, Serialize)]
pub struct Chunk {
@ -29,6 +31,26 @@ pub enum Metadata {
Text { line_number: usize },
Pdf { page_number: usize },
Spreadsheet { sheet_name: String, row_number: usize },
Document,
Image,
Presentation {
slide_number: u32,
image: Option<Base64Image>,
},
}
#[derive(Debug, Serialize)]
pub struct Base64Image {
pub id: String,
pub content: String,
pub segment: usize,
pub is_end: bool
}
impl Base64Image {
fn new(id: String, content: String, segment: usize, is_end: bool) -> Self {
Self { id, content, segment, is_end }
}
Document {},
Image {},
}
@ -36,6 +58,7 @@ pub enum Metadata {
const TO_MARKDOWN: &str = "markdown";
const DOCX: &str = "docx";
const ODT: &str = "odt";
const IMAGE_SEGMENT_SIZE_IN_CHARS: usize = 8_192; // equivalent to ~ 5500 token
type Result<T> = std::result::Result<T, Box<dyn std::error::Error + Send + Sync>>;
type ChunkStream = Pin<Box<dyn Stream<Item = Result<Chunk>> + Send>>;
@ -86,6 +109,8 @@ async fn stream_data(file_path: &str) -> Result<ChunkStream> {
let from = if ext == DOCX { "docx" } else { "odt" };
convert_with_pandoc(file_path, from, TO_MARKDOWN).await?
}
"pptx" => stream_pptx(file_path).await?,
"xlsx" | "ods" | "xls" | "xlsm" | "xlsb" | "xla" | "xlam" => {
stream_spreadsheet_as_csv(file_path).await?
@ -115,7 +140,7 @@ async fn stream_data(file_path: &str) -> Result<ChunkStream> {
Kind::Presentation => match fmt {
FileFormat::OfficeOpenXmlPresentation => {
convert_with_pandoc(file_path, fmt.extension(), TO_MARKDOWN).await?
stream_pptx(file_path).await?
}
_ => stream_text_file(file_path).await?,
},
@ -294,4 +319,87 @@ async fn chunk_image(file_path: &str) -> Result<ChunkStream> {
};
Ok(Box::pin(stream))
}
async fn stream_pptx(file_path: &str) -> Result<ChunkStream> {
let path = Path::new(file_path).to_owned();
let parser_config = ParserConfig::builder()
.extract_images(true)
.compress_images(true)
.quality(75)
.image_handling_mode(ImageHandlingMode::Manually)
.build();
let mut streamer = tokio::task::spawn_blocking(move || {
PptxContainer::open(&path, parser_config).map_err(|e| Box::new(e) as Box<dyn std::error::Error + Send + Sync>)
}).await??;
let (tx, rx) = mpsc::channel(32);
tokio::spawn(async move {
for slide_result in streamer.iter_slides() {
match slide_result {
Ok(slide) => {
if let Some(md_content) = slide.convert_to_md() {
let chunk = Chunk {
content: md_content,
metadata: Metadata::Presentation {
slide_number: slide.slide_number,
image: None,
},
};
if tx.send(Ok(chunk)).await.is_err() {
break;
}
}
if let Some(images) = slide.load_images_manually() {
for image in images.iter() {
let base64_data = &image.base64_content;
let total_length = base64_data.len();
let mut offset = 0;
let mut segment_index = 0;
while offset < total_length {
let end = min(offset + IMAGE_SEGMENT_SIZE_IN_CHARS, total_length);
let segment_content = &base64_data[offset..end];
let is_end = end == total_length;
let base64_image = Base64Image::new(
image.img_ref.id.clone(),
segment_content.to_string(),
segment_index,
is_end
);
let chunk = Chunk {
content: String::new(),
metadata: Metadata::Presentation {
slide_number: slide.slide_number,
image: Some(base64_image),
},
};
if tx.send(Ok(chunk)).await.is_err() {
break;
}
offset = end;
segment_index += 1;
}
}
}
},
Err(e) => {
let _ = tx.send(Err(Box::new(e) as Box<dyn std::error::Error + Send + Sync>)).await;
break;
}
}
}
});
Ok(Box::pin(ReceiverStream::new(rx)))
}