mirror of
https://github.com/MindWorkAI/AI-Studio.git
synced 2025-04-28 10:59:46 +00:00
Formatting
This commit is contained in:
parent
9776251d2f
commit
d9c6574093
@ -42,7 +42,6 @@ type ChunkStream = Pin<Box<dyn Stream<Item = Result<Chunk>> + Send>>;
|
|||||||
pub async fn extract_data(path: String, mut end: Shutdown) -> EventStream![] {
|
pub async fn extract_data(path: String, mut end: Shutdown) -> EventStream![] {
|
||||||
EventStream! {
|
EventStream! {
|
||||||
let stream_result = stream_data(&path).await;
|
let stream_result = stream_data(&path).await;
|
||||||
|
|
||||||
match stream_result {
|
match stream_result {
|
||||||
Ok(mut stream) => {
|
Ok(mut stream) => {
|
||||||
loop {
|
loop {
|
||||||
@ -61,6 +60,7 @@ pub async fn extract_data(path: String, mut end: Shutdown) -> EventStream![] {
|
|||||||
yield Event::json(&chunk);
|
yield Event::json(&chunk);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
yield Event::json(&format!("Error starting stream: {}", e));
|
yield Event::json(&format!("Error starting stream: {}", e));
|
||||||
}
|
}
|
||||||
@ -74,21 +74,21 @@ async fn stream_data(file_path: &str) -> Result<ChunkStream> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let file_path_clone = file_path.to_owned();
|
let file_path_clone = file_path.to_owned();
|
||||||
|
|
||||||
let fmt = tokio::task::spawn_blocking(move || {
|
let fmt = tokio::task::spawn_blocking(move || {
|
||||||
FileFormat::from_file(&file_path_clone)
|
FileFormat::from_file(&file_path_clone)
|
||||||
}).await??;
|
}).await??;
|
||||||
|
|
||||||
let ext = file_path.split('.').last().unwrap_or("");
|
let ext = file_path.split('.').last().unwrap_or("");
|
||||||
|
|
||||||
let stream = match ext {
|
let stream = match ext {
|
||||||
DOCX | ODT => {
|
DOCX | ODT => {
|
||||||
let from = if ext == DOCX { "docx" } else { "odt" };
|
let from = if ext == DOCX { "docx" } else { "odt" };
|
||||||
convert_with_pandoc(file_path, from, TO_MARKDOWN).await?
|
convert_with_pandoc(file_path, from, TO_MARKDOWN).await?
|
||||||
}
|
}
|
||||||
|
|
||||||
"xlsx" | "ods" | "xls" | "xlsm" | "xlsb" | "xla" | "xlam" => {
|
"xlsx" | "ods" | "xls" | "xlsm" | "xlsb" | "xla" | "xlam" => {
|
||||||
stream_spreadsheet_as_csv(file_path).await?
|
stream_spreadsheet_as_csv(file_path).await?
|
||||||
}
|
}
|
||||||
|
|
||||||
_ => match fmt.kind() {
|
_ => match fmt.kind() {
|
||||||
Kind::Document => match fmt {
|
Kind::Document => match fmt {
|
||||||
FileFormat::PortableDocumentFormat => read_pdf(file_path).await?,
|
FileFormat::PortableDocumentFormat => read_pdf(file_path).await?,
|
||||||
@ -100,20 +100,24 @@ async fn stream_data(file_path: &str) -> Result<ChunkStream> {
|
|||||||
}
|
}
|
||||||
_ => stream_text_file(file_path).await?,
|
_ => stream_text_file(file_path).await?,
|
||||||
},
|
},
|
||||||
|
|
||||||
Kind::Ebook => return Err("Ebooks not yet supported".into()),
|
Kind::Ebook => return Err("Ebooks not yet supported".into()),
|
||||||
Kind::Image => chunk_image(file_path).await?,
|
Kind::Image => chunk_image(file_path).await?,
|
||||||
|
|
||||||
Kind::Other => match fmt {
|
Kind::Other => match fmt {
|
||||||
FileFormat::HypertextMarkupLanguage => {
|
FileFormat::HypertextMarkupLanguage => {
|
||||||
convert_with_pandoc(file_path, fmt.extension(), TO_MARKDOWN).await?
|
convert_with_pandoc(file_path, fmt.extension(), TO_MARKDOWN).await?
|
||||||
}
|
}
|
||||||
_ => stream_text_file(file_path).await?,
|
_ => stream_text_file(file_path).await?,
|
||||||
},
|
},
|
||||||
|
|
||||||
Kind::Presentation => match fmt {
|
Kind::Presentation => match fmt {
|
||||||
FileFormat::OfficeOpenXmlPresentation => {
|
FileFormat::OfficeOpenXmlPresentation => {
|
||||||
convert_with_pandoc(file_path, fmt.extension(), TO_MARKDOWN).await?
|
convert_with_pandoc(file_path, fmt.extension(), TO_MARKDOWN).await?
|
||||||
}
|
}
|
||||||
_ => stream_text_file(file_path).await?,
|
_ => stream_text_file(file_path).await?,
|
||||||
},
|
},
|
||||||
|
|
||||||
Kind::Spreadsheet => stream_spreadsheet_as_csv(file_path).await?,
|
Kind::Spreadsheet => stream_spreadsheet_as_csv(file_path).await?,
|
||||||
_ => stream_text_file(file_path).await?,
|
_ => stream_text_file(file_path).await?,
|
||||||
},
|
},
|
||||||
|
Loading…
Reference in New Issue
Block a user