Formatting

This commit is contained in:
Thorsten Sommer 2025-04-01 19:51:40 +02:00
parent 9776251d2f
commit d9c6574093
Signed by: tsommer
GPG Key ID: 371BBA77A02C0108

View File

@ -42,7 +42,6 @@ type ChunkStream = Pin<Box<dyn Stream<Item = Result<Chunk>> + Send>>;
pub async fn extract_data(path: String, mut end: Shutdown) -> EventStream![] { pub async fn extract_data(path: String, mut end: Shutdown) -> EventStream![] {
EventStream! { EventStream! {
let stream_result = stream_data(&path).await; let stream_result = stream_data(&path).await;
match stream_result { match stream_result {
Ok(mut stream) => { Ok(mut stream) => {
loop { loop {
@ -61,6 +60,7 @@ pub async fn extract_data(path: String, mut end: Shutdown) -> EventStream![] {
yield Event::json(&chunk); yield Event::json(&chunk);
} }
}, },
Err(e) => { Err(e) => {
yield Event::json(&format!("Error starting stream: {}", e)); yield Event::json(&format!("Error starting stream: {}", e));
} }
@ -74,21 +74,21 @@ async fn stream_data(file_path: &str) -> Result<ChunkStream> {
} }
let file_path_clone = file_path.to_owned(); let file_path_clone = file_path.to_owned();
let fmt = tokio::task::spawn_blocking(move || { let fmt = tokio::task::spawn_blocking(move || {
FileFormat::from_file(&file_path_clone) FileFormat::from_file(&file_path_clone)
}).await??; }).await??;
let ext = file_path.split('.').last().unwrap_or(""); let ext = file_path.split('.').last().unwrap_or("");
let stream = match ext { let stream = match ext {
DOCX | ODT => { DOCX | ODT => {
let from = if ext == DOCX { "docx" } else { "odt" }; let from = if ext == DOCX { "docx" } else { "odt" };
convert_with_pandoc(file_path, from, TO_MARKDOWN).await? convert_with_pandoc(file_path, from, TO_MARKDOWN).await?
} }
"xlsx" | "ods" | "xls" | "xlsm" | "xlsb" | "xla" | "xlam" => { "xlsx" | "ods" | "xls" | "xlsm" | "xlsb" | "xla" | "xlam" => {
stream_spreadsheet_as_csv(file_path).await? stream_spreadsheet_as_csv(file_path).await?
} }
_ => match fmt.kind() { _ => match fmt.kind() {
Kind::Document => match fmt { Kind::Document => match fmt {
FileFormat::PortableDocumentFormat => read_pdf(file_path).await?, FileFormat::PortableDocumentFormat => read_pdf(file_path).await?,
@ -100,20 +100,24 @@ async fn stream_data(file_path: &str) -> Result<ChunkStream> {
} }
_ => stream_text_file(file_path).await?, _ => stream_text_file(file_path).await?,
}, },
Kind::Ebook => return Err("Ebooks not yet supported".into()), Kind::Ebook => return Err("Ebooks not yet supported".into()),
Kind::Image => chunk_image(file_path).await?, Kind::Image => chunk_image(file_path).await?,
Kind::Other => match fmt { Kind::Other => match fmt {
FileFormat::HypertextMarkupLanguage => { FileFormat::HypertextMarkupLanguage => {
convert_with_pandoc(file_path, fmt.extension(), TO_MARKDOWN).await? convert_with_pandoc(file_path, fmt.extension(), TO_MARKDOWN).await?
} }
_ => stream_text_file(file_path).await?, _ => stream_text_file(file_path).await?,
}, },
Kind::Presentation => match fmt { Kind::Presentation => match fmt {
FileFormat::OfficeOpenXmlPresentation => { FileFormat::OfficeOpenXmlPresentation => {
convert_with_pandoc(file_path, fmt.extension(), TO_MARKDOWN).await? convert_with_pandoc(file_path, fmt.extension(), TO_MARKDOWN).await?
} }
_ => stream_text_file(file_path).await?, _ => stream_text_file(file_path).await?,
}, },
Kind::Spreadsheet => stream_spreadsheet_as_csv(file_path).await?, Kind::Spreadsheet => stream_spreadsheet_as_csv(file_path).await?,
_ => stream_text_file(file_path).await?, _ => stream_text_file(file_path).await?,
}, },