mirror of
https://github.com/MindWorkAI/AI-Studio.git
synced 2025-07-28 06:02:56 +00:00
added a guid as a unique stream id to the file retrieval API
This commit is contained in:
parent
429f9a566e
commit
8240a83662
@ -38,7 +38,8 @@ public partial class ReadFileContent : MSGComponentBase
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
var fileContent = await this.RustService.ReadArbitraryFileData(selectedFile.SelectedFilePath, int.MaxValue);
|
var streamId = Guid.NewGuid().ToString();
|
||||||
|
var fileContent = await this.RustService.ReadArbitraryFileData(selectedFile.SelectedFilePath, streamId, int.MaxValue);
|
||||||
await this.FileContentChanged.InvokeAsync(fileContent);
|
await this.FileContentChanged.InvokeAsync(fileContent);
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -7,6 +7,9 @@ public class ContentStreamSseEvent
|
|||||||
[JsonPropertyName("content")]
|
[JsonPropertyName("content")]
|
||||||
public string? Content { get; init; }
|
public string? Content { get; init; }
|
||||||
|
|
||||||
|
[JsonPropertyName("stream_id")]
|
||||||
|
public string? StreamId { get; init; }
|
||||||
|
|
||||||
[JsonPropertyName("metadata")]
|
[JsonPropertyName("metadata")]
|
||||||
public ContentStreamSseMetadata? Metadata { get; init; }
|
public ContentStreamSseMetadata? Metadata { get; init; }
|
||||||
}
|
}
|
@ -5,9 +5,9 @@ namespace AIStudio.Tools.Services;
|
|||||||
|
|
||||||
public sealed partial class RustService
|
public sealed partial class RustService
|
||||||
{
|
{
|
||||||
public async Task<string> ReadArbitraryFileData(string path, int maxChunks)
|
public async Task<string> ReadArbitraryFileData(string path, string streamId, int maxChunks)
|
||||||
{
|
{
|
||||||
var requestUri = $"/retrieval/fs/extract?path={Uri.EscapeDataString(path)}";
|
var requestUri = $"/retrieval/fs/extract?path={Uri.EscapeDataString(path)}&stream_id={streamId}";
|
||||||
var request = new HttpRequestMessage(HttpMethod.Get, requestUri);
|
var request = new HttpRequestMessage(HttpMethod.Get, requestUri);
|
||||||
var response = await this.http.SendAsync(request, HttpCompletionOption.ResponseHeadersRead);
|
var response = await this.http.SendAsync(request, HttpCompletionOption.ResponseHeadersRead);
|
||||||
|
|
||||||
|
@ -23,8 +23,16 @@ use tokio_stream::wrappers::ReceiverStream;
|
|||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize)]
|
||||||
pub struct Chunk {
|
pub struct Chunk {
|
||||||
pub content: String,
|
pub content: String,
|
||||||
|
pub stream_id: String,
|
||||||
pub metadata: Metadata,
|
pub metadata: Metadata,
|
||||||
}
|
}
|
||||||
|
impl Chunk {
|
||||||
|
pub fn new(content: String, metadata: Metadata) -> Self {
|
||||||
|
Chunk { content, stream_id: String::new(), metadata }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_stream_id(&mut self, stream_id: &str) { self.stream_id = stream_id.to_string(); }
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize)]
|
||||||
pub enum Metadata {
|
pub enum Metadata {
|
||||||
@ -72,16 +80,21 @@ const IMAGE_SEGMENT_SIZE_IN_CHARS: usize = 8_192; // equivalent to ~ 5500 token
|
|||||||
type Result<T> = std::result::Result<T, Box<dyn std::error::Error + Send + Sync>>;
|
type Result<T> = std::result::Result<T, Box<dyn std::error::Error + Send + Sync>>;
|
||||||
type ChunkStream = Pin<Box<dyn Stream<Item = Result<Chunk>> + Send>>;
|
type ChunkStream = Pin<Box<dyn Stream<Item = Result<Chunk>> + Send>>;
|
||||||
|
|
||||||
#[get("/retrieval/fs/extract?<path>")]
|
#[get("/retrieval/fs/extract?<path>&<stream_id>")]
|
||||||
pub async fn extract_data(_token: APIToken, path: String, mut end: Shutdown) -> EventStream![] {
|
pub async fn extract_data(_token: APIToken, path: String, stream_id: String, mut end: Shutdown) -> EventStream![] {
|
||||||
EventStream! {
|
EventStream! {
|
||||||
let stream_result = stream_data(&path).await;
|
let stream_result = stream_data(&path).await;
|
||||||
|
let id_ref = &stream_id;
|
||||||
|
|
||||||
match stream_result {
|
match stream_result {
|
||||||
Ok(mut stream) => {
|
Ok(mut stream) => {
|
||||||
loop {
|
loop {
|
||||||
let chunk = select! {
|
let chunk = select! {
|
||||||
chunk = stream.next() => match chunk {
|
chunk = stream.next() => match chunk {
|
||||||
Some(Ok(chunk)) => chunk,
|
Some(Ok(mut chunk)) => {
|
||||||
|
chunk.set_stream_id(id_ref);
|
||||||
|
chunk
|
||||||
|
},
|
||||||
Some(Err(e)) => {
|
Some(Err(e)) => {
|
||||||
yield Event::json(&format!("Error: {e}"));
|
yield Event::json(&format!("Error: {e}"));
|
||||||
break;
|
break;
|
||||||
@ -171,10 +184,10 @@ async fn stream_text_file(file_path: &str) -> Result<ChunkStream> {
|
|||||||
let stream = stream! {
|
let stream = stream! {
|
||||||
while let Ok(Some(line)) = lines.next_line().await {
|
while let Ok(Some(line)) = lines.next_line().await {
|
||||||
line_number += 1;
|
line_number += 1;
|
||||||
yield Ok(Chunk {
|
yield Ok(Chunk::new(
|
||||||
content: line,
|
line,
|
||||||
metadata: Metadata::Text { line_number },
|
Metadata::Text { line_number }
|
||||||
});
|
));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -204,10 +217,10 @@ async fn stream_pdf(file_path: &str) -> Result<ChunkStream> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if tx.blocking_send(Ok(Chunk {
|
if tx.blocking_send(Ok(Chunk::new(
|
||||||
content,
|
content,
|
||||||
metadata: Metadata::Pdf { page_number: num_page + 1 },
|
Metadata::Pdf { page_number: num_page + 1 }
|
||||||
})).is_err() {
|
))).is_err() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -244,13 +257,13 @@ async fn stream_spreadsheet_as_csv(file_path: &str) -> Result<ChunkStream> {
|
|||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>()
|
||||||
.join(",");
|
.join(",");
|
||||||
|
|
||||||
if tx.blocking_send(Ok(Chunk {
|
if tx.blocking_send(Ok(Chunk::new(
|
||||||
content,
|
content,
|
||||||
metadata: Metadata::Spreadsheet {
|
Metadata::Spreadsheet {
|
||||||
sheet_name: sheet_name.clone(),
|
sheet_name: sheet_name.clone(),
|
||||||
row_number: row_idx + 1,
|
row_number: row_idx + 1,
|
||||||
},
|
}
|
||||||
})).is_err() {
|
))).is_err() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -275,10 +288,10 @@ async fn convert_with_pandoc(
|
|||||||
let stream = stream! {
|
let stream = stream! {
|
||||||
if output.status.success() {
|
if output.status.success() {
|
||||||
match String::from_utf8(output.stdout.clone()) {
|
match String::from_utf8(output.stdout.clone()) {
|
||||||
Ok(content) => yield Ok(Chunk {
|
Ok(content) => yield Ok(Chunk::new(
|
||||||
content,
|
content,
|
||||||
metadata: Metadata::Document {},
|
Metadata::Document {}
|
||||||
}),
|
)),
|
||||||
Err(e) => yield Err(e.into()),
|
Err(e) => yield Err(e.into()),
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -297,10 +310,10 @@ async fn chunk_image(file_path: &str) -> Result<ChunkStream> {
|
|||||||
let base64 = general_purpose::STANDARD.encode(&data);
|
let base64 = general_purpose::STANDARD.encode(&data);
|
||||||
|
|
||||||
let stream = stream! {
|
let stream = stream! {
|
||||||
yield Ok(Chunk {
|
yield Ok(Chunk::new(
|
||||||
content: base64,
|
base64,
|
||||||
metadata: Metadata::Image {},
|
Metadata::Image {},
|
||||||
});
|
));
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(Box::pin(stream))
|
Ok(Box::pin(stream))
|
||||||
@ -327,13 +340,13 @@ async fn stream_pptx(file_path: &str) -> Result<ChunkStream> {
|
|||||||
match slide_result {
|
match slide_result {
|
||||||
Ok(slide) => {
|
Ok(slide) => {
|
||||||
if let Some(md_content) = slide.convert_to_md() {
|
if let Some(md_content) = slide.convert_to_md() {
|
||||||
let chunk = Chunk {
|
let chunk = Chunk::new(
|
||||||
content: md_content,
|
md_content,
|
||||||
metadata: Metadata::Presentation {
|
Metadata::Presentation {
|
||||||
slide_number: slide.slide_number,
|
slide_number: slide.slide_number,
|
||||||
image: None,
|
image: None,
|
||||||
},
|
}
|
||||||
};
|
);
|
||||||
|
|
||||||
if tx.send(Ok(chunk)).await.is_err() {
|
if tx.send(Ok(chunk)).await.is_err() {
|
||||||
break;
|
break;
|
||||||
@ -360,13 +373,13 @@ async fn stream_pptx(file_path: &str) -> Result<ChunkStream> {
|
|||||||
is_end
|
is_end
|
||||||
);
|
);
|
||||||
|
|
||||||
let chunk = Chunk {
|
let chunk = Chunk::new(
|
||||||
content: String::new(),
|
String::new(),
|
||||||
metadata: Metadata::Presentation {
|
Metadata::Presentation {
|
||||||
slide_number: slide.slide_number,
|
slide_number: slide.slide_number,
|
||||||
image: Some(base64_image),
|
image: Some(base64_image),
|
||||||
},
|
}
|
||||||
};
|
);
|
||||||
|
|
||||||
if tx.send(Ok(chunk)).await.is_err() {
|
if tx.send(Ok(chunk)).await.is_err() {
|
||||||
break;
|
break;
|
||||||
|
Loading…
Reference in New Issue
Block a user