From 7533e1fcc3b6ac5e6a6b9873226b7f54ef4194b6 Mon Sep 17 00:00:00 2001 From: Thorsten Sommer Date: Mon, 30 Jun 2025 21:41:32 +0200 Subject: [PATCH] Add Markdown fences support in text and CSV streaming --- .../Tools/ContentStreamSseHandler.cs | 2 +- runtime/src/file_data.rs | 59 ++++++++++++++++--- 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/app/MindWork AI Studio/Tools/ContentStreamSseHandler.cs b/app/MindWork AI Studio/Tools/ContentStreamSseHandler.cs index 50cd45f0..4090d49b 100644 --- a/app/MindWork AI Studio/Tools/ContentStreamSseHandler.cs +++ b/app/MindWork AI Studio/Tools/ContentStreamSseHandler.cs @@ -30,7 +30,7 @@ public static class ContentStreamSseHandler var sheetName = spreadsheetMetadata.Spreadsheet?.SheetName; var rowNumber = spreadsheetMetadata.Spreadsheet?.RowNumber; var spreadSheetResult = new StringBuilder(); - if (rowNumber == 1) + if (rowNumber == 0) { spreadSheetResult.AppendLine(); spreadSheetResult.AppendLine($"# {sheetName}"); diff --git a/runtime/src/file_data.rs b/runtime/src/file_data.rs index 146da28e..f05b18b5 100644 --- a/runtime/src/file_data.rs +++ b/runtime/src/file_data.rs @@ -140,6 +140,10 @@ async fn stream_data(file_path: &str, extract_images: bool) -> Result { + stream_text_file(file_path, true, Some("csv".to_string())).await? + }, + "pptx" => stream_pptx(file_path, extract_images).await?, "xlsx" | "ods" | "xls" | "xlsm" | "xlsb" | "xla" | "xlam" => { @@ -158,7 +162,7 @@ async fn stream_data(file_path: &str, extract_images: bool) -> Result stream_text_file(file_path).await?, + _ => stream_text_file(file_path, false, None).await?, }, Kind::Ebook => return Err("Ebooks not yet supported".into()), @@ -167,7 +171,7 @@ async fn stream_data(file_path: &str, extract_images: bool) -> Result Result stream_text_file(file_path).await?, + _ => stream_text_file(file_path, false, None).await?, }, Kind::Presentation => match fmt { @@ -184,25 +188,42 @@ async fn stream_data(file_path: &str, extract_images: bool) -> Result stream_text_file(file_path).await?, + _ => stream_text_file(file_path, false, None).await?, }, Kind::Spreadsheet => stream_spreadsheet_as_csv(file_path).await?, - _ => stream_text_file(file_path).await?, + _ => stream_text_file(file_path, false, None).await?, }, }; Ok(Box::pin(stream)) } -async fn stream_text_file(file_path: &str) -> Result { +async fn stream_text_file(file_path: &str, use_md_fences: bool, fence_language: Option) -> Result { let file = tokio::fs::File::open(file_path).await?; let reader = tokio::io::BufReader::new(file); let mut lines = reader.lines(); let mut line_number = 0; let stream = stream! { + + if use_md_fences { + match fence_language { + Some(lang) if lang.trim().is_empty() => { + yield Ok(Chunk::new("```".to_string(), Metadata::Text { line_number })); + }, + + Some(lang) => { + yield Ok(Chunk::new(format!("```{}", lang.trim()), Metadata::Text { line_number })); + }, + + None => { + yield Ok(Chunk::new("```".to_string(), Metadata::Text { line_number })); + } + }; + } + while let Ok(Some(line)) = lines.next_line().await { line_number += 1; yield Ok(Chunk::new( @@ -210,6 +231,10 @@ async fn stream_text_file(file_path: &str) -> Result { Metadata::Text { line_number } )); } + + if use_md_fences { + yield Ok(Chunk::new("```\n".to_string(), Metadata::Text { line_number })); + } }; Ok(Box::pin(stream)) @@ -272,7 +297,17 @@ async fn stream_spreadsheet_as_csv(file_path: &str) -> Result { } }; - for (row_idx, row) in range.rows().enumerate() { + let mut row_idx = 0; + tx.blocking_send(Ok(Chunk::new( + "```csv".to_string(), + Metadata::Spreadsheet { + sheet_name: sheet_name.clone(), + row_number: row_idx, + } + ))).ok(); + + for row in range.rows() { + row_idx += 1; let content = row.iter() .map(|cell| cell.to_string()) .collect::>() @@ -282,12 +317,20 @@ async fn stream_spreadsheet_as_csv(file_path: &str) -> Result { content, Metadata::Spreadsheet { sheet_name: sheet_name.clone(), - row_number: row_idx + 1, + row_number: row_idx, } ))).is_err() { return; } } + + tx.blocking_send(Ok(Chunk::new( + "```".to_string(), + Metadata::Spreadsheet { + sheet_name: sheet_name.clone(), + row_number: row_idx, + } + ))).ok(); } });