From ef8d1051ae13c0602a1af519dad614bf76f51dee Mon Sep 17 00:00:00 2001 From: nilsk Date: Mon, 24 Mar 2025 21:26:41 +0100 Subject: [PATCH] WIP: included mod to read arbitrary data from the file system and send the chunks with server sent events to the front end --- runtime/Cargo.lock | 631 ++++++++++++++++++++++++++++++++++++- runtime/Cargo.toml | 4 + runtime/src/file_data.rs | 325 +++++++++++++++++++ runtime/src/lib.rs | 3 +- runtime/src/runtime_api.rs | 1 + 5 files changed, 953 insertions(+), 11 deletions(-) create mode 100644 runtime/src/file_data.rs diff --git a/runtime/Cargo.lock b/runtime/Cargo.lock index a7c4a12f..dda5652b 100644 --- a/runtime/Cargo.lock +++ b/runtime/Cargo.lock @@ -17,6 +17,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + [[package]] name = "aes" version = "0.8.4" @@ -37,6 +43,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "aligned-vec" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4aa90d7ce82d4be67b64039a3d588d38dbcc6736577de4a847025ce5b0c468d1" + [[package]] name = "alloc-no-stdlib" version = "2.0.4" @@ -73,6 +85,15 @@ version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +[[package]] +name = "arbitrary" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223" +dependencies = [ + "derive_arbitrary", +] + [[package]] name = "arboard" version = "3.4.1" @@ -91,6 +112,23 @@ dependencies = [ "x11rb", ] +[[package]] +name = "arg_enum_proc_macro" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.93", +] + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + [[package]] name = "async-stream" version = "0.3.5" @@ -175,6 +213,29 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" +[[package]] +name = "av1-grain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6678909d8c5d46a42abcf571271e15fdbc0a225e3646cf23762cd415046c78bf" +dependencies = [ + "anyhow", + "arrayvec", + "log", + "nom", + "num-rational", + "v_frame", +] + +[[package]] +name = "avif-serialize" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98922d6a4cfbcb08820c69d8eeccc05bb1f29bfa06b4f5b1dbfe9a868bd7608e" +dependencies = [ + "arrayvec", +] + [[package]] name = "backtrace" version = "0.3.73" @@ -185,7 +246,7 @@ dependencies = [ "cc", "cfg-if", "libc", - "miniz_oxide", + "miniz_oxide 0.7.4", "object", "rustc-demangle", ] @@ -223,6 +284,12 @@ dependencies = [ "serde", ] +[[package]] +name = "bit_field" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc827186963e592360843fb5ba4b973e145841266c1357f7180c43526f2e5b61" + [[package]] name = "bitflags" version = "1.3.2" @@ -235,6 +302,12 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +[[package]] +name = "bitstream-io" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6099cdc01846bc367c4e7dd630dc5966dccf36b652fae7a74e17b640411a91b2" + [[package]] name = "block" version = "0.1.6" @@ -299,6 +372,12 @@ dependencies = [ "serde", ] +[[package]] +name = "built" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56ed6191a7e78c36abdb16ab65341eefd73d64d303fffccdbb00d51e4205967b" + [[package]] name = "bumpalo" version = "3.16.0" @@ -356,6 +435,22 @@ dependencies = [ "system-deps 6.2.2", ] +[[package]] +name = "calamine" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138646b9af2c5d7f1804ea4bf93afc597737d2bd4f7341d67c48b03316976eb1" +dependencies = [ + "byteorder", + "chrono", + "codepage", + "encoding_rs", + "log", + "quick-xml 0.31.0", + "serde", + "zip 2.5.0", +] + [[package]] name = "cargo_toml" version = "0.15.3" @@ -380,6 +475,10 @@ name = "cc" version = "1.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2aba8f4e9906c7ce3c73463f62a7f0c65183ada1a2d47e397cc8810827f9694f" +dependencies = [ + "jobserver", + "libc", +] [[package]] name = "cesu8" @@ -425,15 +524,17 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.38" +version = "0.4.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c" dependencies = [ "android-tzdata", "iana-time-zone", + "js-sys", "num-traits", "serde", - "windows-targets 0.52.6", + "wasm-bindgen", + "windows-link", ] [[package]] @@ -485,6 +586,15 @@ dependencies = [ "objc", ] +[[package]] +name = "codepage" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48f68d061bc2828ae826206326e61251aca94c1e4a5305cf52d9138639c918b4" +dependencies = [ + "encoding_rs", +] + [[package]] name = "color_quant" version = "1.1.0" @@ -501,6 +611,26 @@ dependencies = [ "memchr", ] +[[package]] +name = "console_error_panic_hook" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" +dependencies = [ + "cfg-if", + "wasm-bindgen", +] + +[[package]] +name = "console_log" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be8aed40e4edbf4d3b4431ab260b63fdc40f5780a4766824329ea0f1eefe3c0f" +dependencies = [ + "log", + "web-sys", +] + [[package]] name = "convert_case" version = "0.4.0" @@ -629,9 +759,15 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.20" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" [[package]] name = "crypto-common" @@ -749,6 +885,17 @@ dependencies = [ "serde", ] +[[package]] +name = "derive_arbitrary" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.93", +] + [[package]] name = "derive_more" version = "0.99.18" @@ -922,6 +1069,21 @@ version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a0474425d51df81997e2f90a21591180b38eccf27292d755f3e30750225c175b" +[[package]] +name = "exr" +version = "1.73.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83197f59927b46c04a183a619b7c29df34e63e63c7869320862268c0ef687e0" +dependencies = [ + "bit_field", + "half", + "lebe", + "miniz_oxide 0.8.5", + "rayon-core", + "smallvec", + "zune-inflate", +] + [[package]] name = "fastrand" version = "2.1.0" @@ -961,6 +1123,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "file-format" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ef3d5e8ae27277c8285ac43ed153158178ef0f79567f32024ca8140a0c7cd8" + [[package]] name = "filetime" version = "0.2.23" @@ -980,7 +1148,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" dependencies = [ "crc32fast", - "miniz_oxide", + "miniz_oxide 0.7.4", ] [[package]] @@ -1310,6 +1478,16 @@ dependencies = [ "wasi 0.11.0+wasi-snapshot-preview1", ] +[[package]] +name = "gif" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb2d69b19215e18bb912fa30f7ce15846e301408695e44e0ef719f1da9e19f2" +dependencies = [ + "color_quant", + "weezl", +] + [[package]] name = "gimli" version = "0.29.0" @@ -1514,6 +1692,16 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7db2ff139bba50379da6aa0766b52fdcb62cb5b263009b09ed58ba604e14bbd1" +dependencies = [ + "cfg-if", + "crunchy", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -1969,11 +2157,37 @@ checksum = "99314c8a2152b8ddb211f924cdae532d8c5e4c8bb54728e12fff1b0cd5963a10" dependencies = [ "bytemuck", "byteorder-lite", + "color_quant", + "exr", + "gif", + "image-webp", "num-traits", "png", + "qoi", + "ravif", + "rayon", + "rgb", "tiff", + "zune-core", + "zune-jpeg", ] +[[package]] +name = "image-webp" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f79afb8cbee2ef20f59ccd477a218c12a93943d075b492015ecb1bb81f8ee904" +dependencies = [ + "byteorder-lite", + "quick-error", +] + +[[package]] +name = "imgref" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0263a3d970d5c054ed9312c0057b4f3bde9c0b33836d3637361d4a9e6e7a408" + [[package]] name = "indexmap" version = "1.9.3" @@ -2030,6 +2244,17 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "interpolate_name" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.93", +] + [[package]] name = "ipnet" version = "2.9.0" @@ -2047,6 +2272,24 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "0.4.8" @@ -2102,6 +2345,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + [[package]] name = "jpeg-decoder" version = "0.3.1" @@ -2184,6 +2436,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "lebe" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8" + [[package]] name = "libc" version = "0.2.155" @@ -2199,6 +2457,26 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "libfuzzer-sys" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf78f52d400cf2d84a3a973a78a592b4adc535739e0a5597a0da6f0c357adc75" +dependencies = [ + "arbitrary", + "cc", +] + +[[package]] +name = "libloading" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" +dependencies = [ + "cfg-if", + "windows-targets 0.52.6", +] + [[package]] name = "libredox" version = "0.1.3" @@ -2231,6 +2509,12 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "lockfree-object-pool" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9374ef4228402d4b7e403e5838cb880d9ee663314b0a900d5a6aabf0c213552e" + [[package]] name = "log" version = "0.4.22" @@ -2252,6 +2536,15 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "loop9" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fae87c125b03c1d2c0150c90365d7d6bcc53fb73a9acaef207d2d065860f062" +dependencies = [ + "imgref", +] + [[package]] name = "mac" version = "0.1.1" @@ -2296,6 +2589,21 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" +[[package]] +name = "maybe-owned" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4facc753ae494aeb6e3c22f839b158aebd4f9270f55cd3c79906c45476c47ab4" + +[[package]] +name = "maybe-rayon" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519" +dependencies = [ + "cfg-if", +] + [[package]] name = "memchr" version = "2.7.4" @@ -2324,14 +2632,18 @@ dependencies = [ "aes", "arboard", "base64 0.22.1", + "calamine", "cbc", + "chrono", "cipher", + "file-format", "flexi_logger", "hmac", "keyring", "log", "once_cell", "pbkdf2", + "pdfium-render", "rand 0.8.5", "rand_chacha 0.3.1", "rcgen", @@ -2347,6 +2659,12 @@ dependencies = [ "url", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "minisign-verify" version = "0.2.1" @@ -2363,6 +2681,15 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "miniz_oxide" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5" +dependencies = [ + "adler2", +] + [[package]] name = "mio" version = "1.0.1" @@ -2451,6 +2778,22 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "noop_proc_macro" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0676bb32a98c1a483ce53e500a81ad9c3d5b3f7c920c28c24e9cb0980d0b5bc8" + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -2509,6 +2852,17 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +[[package]] +name = "num-derive" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.93", +] + [[package]] name = "num-integer" version = "0.1.46" @@ -2861,6 +3215,12 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "pathdiff" version = "0.2.1" @@ -2877,6 +3237,32 @@ dependencies = [ "hmac", ] +[[package]] +name = "pdfium-render" +version = "0.8.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5cbb29c282cfbd0a8142ccd3bb0ce8da53e59141ce02a023b980bc72b6c0eec" +dependencies = [ + "bitflags 2.6.0", + "bytemuck", + "bytes", + "chrono", + "console_error_panic_hook", + "console_log", + "image 0.25.2", + "itertools 0.14.0", + "js-sys", + "libloading", + "log", + "maybe-owned", + "once_cell", + "utf16string", + "vecmath", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "pear" version = "0.2.9" @@ -3082,6 +3468,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "piston-float" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad78bf43dcf80e8f950c92b84f938a0fc7590b7f6866fbcbeca781609c115590" + [[package]] name = "pkg-config" version = "0.3.30" @@ -3096,7 +3488,7 @@ checksum = "42cf17e9a1800f5f396bc67d193dc9411b59012a5876445ef450d449881e1016" dependencies = [ "base64 0.22.1", "indexmap 2.7.0", - "quick-xml", + "quick-xml 0.32.0", "serde", "time", ] @@ -3111,7 +3503,7 @@ dependencies = [ "crc32fast", "fdeflate", "flate2", - "miniz_oxide", + "miniz_oxide 0.7.4", ] [[package]] @@ -3194,6 +3586,50 @@ dependencies = [ "yansi", ] +[[package]] +name = "profiling" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afbdc74edc00b6f6a218ca6a5364d6226a259d4b8ea1af4a0ea063f27e179f4d" +dependencies = [ + "profiling-procmacros", +] + +[[package]] +name = "profiling-procmacros" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a65f2e60fbf1063868558d69c6beacf412dc755f9fc020f514b7955fc914fe30" +dependencies = [ + "quote", + "syn 2.0.93", +] + +[[package]] +name = "qoi" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6d64c71eb498fe9eae14ce4ec935c555749aef511cca85b5568910d6e48001" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "quick-error" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" + +[[package]] +name = "quick-xml" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" +dependencies = [ + "encoding_rs", + "memchr", +] + [[package]] name = "quick-xml" version = "0.32.0" @@ -3293,12 +3729,81 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "rav1e" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd87ce80a7665b1cce111f8a16c1f3929f6547ce91ade6addf4ec86a8dda5ce9" +dependencies = [ + "arbitrary", + "arg_enum_proc_macro", + "arrayvec", + "av1-grain", + "bitstream-io", + "built", + "cfg-if", + "interpolate_name", + "itertools 0.12.1", + "libc", + "libfuzzer-sys", + "log", + "maybe-rayon", + "new_debug_unreachable", + "noop_proc_macro", + "num-derive", + "num-traits", + "once_cell", + "paste", + "profiling", + "rand 0.8.5", + "rand_chacha 0.3.1", + "simd_helpers", + "system-deps 6.2.2", + "thiserror 1.0.63", + "v_frame", + "wasm-bindgen", +] + +[[package]] +name = "ravif" +version = "0.11.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2413fd96bd0ea5cdeeb37eaf446a22e6ed7b981d792828721e74ded1980a45c6" +dependencies = [ + "avif-serialize", + "imgref", + "loop9", + "quick-error", + "rav1e", + "rgb", +] + [[package]] name = "raw-window-handle" version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2ff9a1f06a88b01621b7ae906ef0211290d1c8a168a15542486a8f61c0833b9" +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "rcgen" version = "0.13.2" @@ -3513,6 +4018,15 @@ dependencies = [ "windows 0.37.0", ] +[[package]] +name = "rgb" +version = "0.8.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57397d16646700483b67d2dd6511d79318f9d057fdbd21a4066aeac8b41d310a" +dependencies = [ + "bytemuck", +] + [[package]] name = "ring" version = "0.17.8" @@ -3977,6 +4491,15 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +[[package]] +name = "simd_helpers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95890f873bec569a0362c235787f3aca6e1e887302ba4840839bcc6459c42da6" +dependencies = [ + "quote", +] + [[package]] name = "siphasher" version = "0.3.11" @@ -4333,7 +4856,7 @@ dependencies = [ "webkit2gtk", "webview2-com", "windows 0.39.0", - "zip", + "zip 0.6.6", ] [[package]] @@ -4928,6 +5451,15 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" +[[package]] +name = "utf16string" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b62a1e85e12d5d712bf47a85f426b73d303e2d00a90de5f3004df3596e9d216" +dependencies = [ + "byteorder", +] + [[package]] name = "utf8_iter" version = "1.0.4" @@ -4943,6 +5475,17 @@ dependencies = [ "getrandom 0.2.15", ] +[[package]] +name = "v_frame" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6f32aaa24bacd11e488aa9ba66369c7cd514885742c9fe08cfe85884db3e92b" +dependencies = [ + "aligned-vec", + "num-traits", + "wasm-bindgen", +] + [[package]] name = "valuable" version = "0.1.0" @@ -4955,6 +5498,15 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "vecmath" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "956ae1e0d85bca567dee1dcf87fb1ca2e792792f66f87dced8381f99cd91156a" +dependencies = [ + "piston-float", +] + [[package]] name = "version-compare" version = "0.0.11" @@ -5300,6 +5852,12 @@ dependencies = [ "windows-tokens", ] +[[package]] +name = "windows-link" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" + [[package]] name = "windows-metadata" version = "0.39.0" @@ -5818,3 +6376,56 @@ dependencies = [ "crc32fast", "crossbeam-utils", ] + +[[package]] +name = "zip" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27c03817464f64e23f6f37574b4fdc8cf65925b5bfd2b0f2aedf959791941f88" +dependencies = [ + "arbitrary", + "crc32fast", + "crossbeam-utils", + "flate2", + "indexmap 2.7.0", + "memchr", + "zopfli", +] + +[[package]] +name = "zopfli" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5019f391bac5cf252e93bbcc53d039ffd62c7bfb7c150414d61369afe57e946" +dependencies = [ + "bumpalo", + "crc32fast", + "lockfree-object-pool", + "log", + "once_cell", + "simd-adler32", +] + +[[package]] +name = "zune-core" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a" + +[[package]] +name = "zune-inflate" +version = "0.2.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73ab332fe2f6680068f3582b16a24f90ad7096d5d39b974d1c0aff0125116f02" +dependencies = [ + "simd-adler32", +] + +[[package]] +name = "zune-jpeg" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99a5bab8d7dedf81405c4bb1f2b83ea057643d9cb28778cea9eecddeedd2e028" +dependencies = [ + "zune-core", +] diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index c208b1b6..21d28d93 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -30,6 +30,10 @@ pbkdf2 = "0.12.2" hmac = "0.12.1" sha2 = "0.10.8" rcgen = { version = "0.13.2", features = ["pem"] } +file-format = "0.26.0" +calamine = { version = "0.26.1", features = ["dates"] } +chrono = "0.4.39" +pdfium-render = "0.8.27" # Fixes security vulnerability downstream, where the upstream is not fixed yet: url = "2.5" diff --git a/runtime/src/file_data.rs b/runtime/src/file_data.rs new file mode 100644 index 00000000..8701eeab --- /dev/null +++ b/runtime/src/file_data.rs @@ -0,0 +1,325 @@ +use std::path::Path; +use std::io::{BufRead, BufReader}; +use base64::{engine::general_purpose, Engine as _}; +use calamine::{open_workbook_auto, Reader}; +use file_format::{FileFormat, Kind}; +use pdfium_render::prelude::Pdfium; +use std::error::Error; + +use std::fs::File; +use std::io::Read; +use std::process::Command; +use rocket::post; +use rocket::response::stream::{Event, EventStream}; +use rocket::{State, Shutdown}; +use rocket::fs::{relative, FileServer}; +use rocket::form::Form; +use rocket::serde::{Serialize, Deserialize}; +use rocket::tokio::sync::broadcast::{channel, Sender, error::RecvError}; +use rocket::tokio::select; + +#[derive(Debug)] +pub struct Chunk { + pub content: String, + pub metadata: Metadata, +} + +#[derive(Debug)] +pub enum Metadata { + Text { line_number: usize }, + Pdf { page_number: usize }, + Spreadsheet { sheet_name: String, row_number: usize }, + Document, + Image, +} + +const TO_MARKDOWN: &str = "markdown"; +const DOCX: &str = "docx"; +const ODT: &str = "odt"; + +#[post("/system/file-data/extract", data = "")] +pub async fn extract_file_data( + file_path: String, + queue: &State>, + mut shutdown: Shutdown +) -> EventStream![] { + let mut rx = queue.subscribe(); + let path = file_path.clone(); + + // Start extraction in a separate task + let extractor = rocket::tokio::task::spawn_blocking(move || { + stream_data(&path).map(|iter| { + iter.map(|chunk| { + chunk.map_err(|e| format!("Chunk error: {}", e)) + }) + }) + }); + + EventStream! { + let mut extraction_stream = match extractor.await { + Ok(Ok(stream)) => stream, + Ok(Err(e)) => { + yield Event::json(&ExtractEvent::Error(e.to_string())); + return; + } + Err(e) => { + yield Event::json(&ExtractEvent::Error(format!("Task failed: {}", e))); + return; + } + }; + + loop { + let chunk = select! { + chunk = extraction_stream.next() => chunk, + _ = &mut shutdown => break, + }; + + match chunk { + Some(Ok(chunk)) => { + let event = ExtractEvent::Chunk { + content: chunk.content, + metadata: match chunk.metadata { + Metadata::Text { line_number } => + MetadataRepr::Text { line_number }, + Metadata::Pdf { page_number } => + MetadataRepr::Pdf { page_number }, + } + }; + yield Event::json(&event); + } + Some(Err(e)) => { + yield Event::json(&ExtractEvent::Error(e)); + } + None => break, + } + } + + yield Event::json(&ExtractEvent::Completed); + } +} + + +// Serialisierbare Datentypen +#[derive(serde::Serialize)] +#[serde(tag = "type", content = "data")] +enum ExtractEvent { + Chunk { + content: String, + metadata: MetadataRepr, + }, + Error(String), + Completed, +} + +#[derive(serde::Serialize)] +#[serde(tag = "type")] +enum MetadataRepr { + Text { line_number: usize }, + Pdf { page_number: usize }, + Spreadsheet { sheet_name: String, row_number: usize }, + Document, + Image, +} + + +/// Streams the content of a file in chunks with format-specific metadata. +/// +/// Takes a file path as input and returns a stream of chunks containing +/// content segments with associated metadata. Supports various file types +/// including documents, spreadsheets, presentations, images, and PDFs. +/// +/// The streaming process works as follows: +/// - Verifies the file exists +/// - Detects the file format using content and extension +/// - Processes content incrementally based on format: +/// - Text files: Streams line by line with line numbers +/// - PDFs: Extracts text page by page with page numbers +/// - Spreadsheets: Outputs rows with sheet names and row numbers +/// - Office documents: Converts to Markdown as single chunk +/// - Images: Returns Base64 encoding as single chunk +/// - HTML files: Converts to Markdown as single chunk +/// +/// # Parameters +/// - `file_path`: Path to the file to process (platform independent) +/// +/// # Returns +/// Returns a `Result` containing: +/// - `Ok`: Boxed iterator yielding `Result` items +/// - `Err`: Initial processing error (e.g., file not found) +/// +/// Each iterator item represents either: +/// - `Ok(Chunk)`: Content segment with metadata +/// - `Err`: Error during chunk processing +/// +/// # Chunk Structure +/// - `content`: Text segment or Base64 image data +/// - `metadata`: Context information including: +/// - Line numbers for text files +/// - Page numbers for PDFs +/// - Sheet/row numbers for spreadsheets +/// - Document type markers for office formats +/// - Image type marker for images +/// +/// # Errors +/// - Initial errors: File not found, format detection failures +/// - Chunk-level errors: Format-specific parsing errors +/// - Pandoc conversion failures for office documents +/// +/// # Examples +/// ``` +/// let chunk_stream = stream_data("data.txt")?; +/// for chunk_result in chunk_stream { +/// match chunk_result { +/// Ok(chunk) => { +/// println!("Metadata: {:?}", chunk.metadata); +/// println!("Content: {}", chunk.content); +/// } +/// Err(e) => eprintln!("Error: {}", e), +/// } +/// } +/// ``` +fn stream_data( + file_path: &str, +) -> Result>>>, Box> { + if !Path::new(file_path).exists() { + return Err(Box::from("File does not exist.")); + } + + let fmt = FileFormat::from_file(file_path)?; + let ext = file_path.split('.').last().unwrap_or(""); + + match ext { + DOCX | ODT => { + let from = if ext == DOCX { "docx" } else { "odt" }; + convert_with_pandoc(file_path, from, TO_MARKDOWN) + } + "xlsx" | "ods" | "xls" | "xlsm" | "xlsb" | "xla" | "xlam" => { + stream_spreadsheet_as_csv(file_path) + } + _ => match fmt.kind() { + Kind::Document => match fmt { + FileFormat::PortableDocumentFormat => read_pdf(file_path), + FileFormat::MicrosoftWordDocument => { + convert_with_pandoc(file_path, "docx", TO_MARKDOWN) + } + FileFormat::OfficeOpenXmlDocument => { + convert_with_pandoc(file_path, fmt.extension(), TO_MARKDOWN) + } + _ => stream_text_file(file_path), + }, + Kind::Ebook => Err(Box::from("Ebooks not yet supported")), + Kind::Image => chunk_image(file_path), + Kind::Other => match fmt { + FileFormat::HypertextMarkupLanguage => { + convert_with_pandoc(file_path, fmt.extension(), TO_MARKDOWN) + } + _ => stream_text_file(file_path), + }, + Kind::Presentation => match fmt { + FileFormat::OfficeOpenXmlPresentation => { + convert_with_pandoc(file_path, fmt.extension(), TO_MARKDOWN) + } + _ => stream_text_file(file_path), + }, + Kind::Spreadsheet => stream_spreadsheet_as_csv(file_path), + _ => stream_text_file(file_path), + }, + } +} + +fn stream_text_file(file_path: &str) -> Result>>>, Box> { + let file = File::open(file_path)?; + let reader = BufReader::new(file); + let iter = reader.lines() + .enumerate() + .map(|(i, line)| { + Ok(Chunk { + content: line?, + metadata: Metadata::Text { line_number: i + 1 }, + }) + }); + Ok(Box::new(iter)) +} + +fn read_pdf(file_path: &str) -> Result>>>, Box> { + let pdfium = Pdfium::default(); + let doc = pdfium.load_pdf_from_file(file_path, None)?; + let pages = doc.pages(); + let chunks: Vec<_> = pages.iter() + .enumerate() + .map(|(i, page)| { + let content = page.text()?.all(); + Ok(Chunk { + content, + metadata: Metadata::Pdf { page_number: i + 1 }, + }) + }) + .collect(); + Ok(Box::new(chunks.into_iter())) +} + +fn stream_spreadsheet_as_csv(file_path: &str) -> Result>>>, Box> { + let mut workbook = open_workbook_auto(file_path)?; + let mut chunks = Vec::new(); + + for sheet_name in workbook.sheet_names() { + let range = workbook.worksheet_range(&sheet_name)?; + for (row_idx, row) in range.rows().enumerate() { + let content = row.iter() + .map(|cell| cell.to_string()) + .collect::>() + .join(","); + chunks.push(Ok(Chunk { + content, + metadata: Metadata::Spreadsheet { + sheet_name: sheet_name.clone(), + row_number: row_idx + 1, + }, + })); + } + } + Ok(Box::new(chunks.into_iter())) +} + +fn convert_with_pandoc( + file_path: &str, + from: &str, + to: &str, +) -> Result>>>, Box> { + let output = Command::new("pandoc") + .arg(file_path) + .args(&["-f", from, "-t", to]) + .output()?; + if output.status.success() { + let content = String::from_utf8(output.stdout)?; + Ok(Box::new(std::iter::once(Ok(Chunk { + content, + metadata: Metadata::Document, + })))) + } else { + Err(Box::from(String::from_utf8_lossy(&output.stderr).into_owned())) + } +} + +fn read_img_as_base64(file_path: &str) -> Result> { + let img_result = File::open(file_path); + + match img_result { + Ok(mut img) => { + let mut buff = Vec::new(); + img.read_to_end(&mut buff)?; + + let base64 = general_purpose::STANDARD.encode(&buff); + Ok(base64) + } + Err(e) => Err(Box::from(format!("{}", e))), + } +} + +fn chunk_image(file_path: &str) -> Result>>>, Box> { + let base64 = read_img_as_base64(file_path)?; + Ok(Box::new(std::iter::once(Ok(Chunk { + content: base64, + metadata: Metadata::Image, + })))) +} diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 7cdf018c..a97347d3 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -8,4 +8,5 @@ pub mod app_window; pub mod secret; pub mod clipboard; pub mod runtime_api; -pub mod certificate; \ No newline at end of file +pub mod certificate; +pub mod file_data; \ No newline at end of file diff --git a/runtime/src/runtime_api.rs b/runtime/src/runtime_api.rs index 963900d7..b536c328 100644 --- a/runtime/src/runtime_api.rs +++ b/runtime/src/runtime_api.rs @@ -90,6 +90,7 @@ pub fn start_runtime_api() { crate::secret::delete_secret, crate::environment::get_data_directory, crate::environment::get_config_directory, + crate::file_data::extract_file_data, ]) .ignite().await.unwrap() .launch().await.unwrap();