mirror of
https://github.com/MindWorkAI/AI-Studio.git
synced 2025-05-03 09:39:47 +00:00
Add endpoint to read and extract text from PDF files without streaming
This commit is contained in:
parent
ecaabfaa2a
commit
7d64767cd3
@ -0,0 +1,16 @@
|
||||
namespace AIStudio.Tools.Services;
|
||||
|
||||
public sealed partial class RustService
|
||||
{
|
||||
public async Task<string> GetPDFText(string filePath)
|
||||
{
|
||||
var response = await this.http.GetAsync($"/retrieval/fs/read/pdf?file_path={filePath}");
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
this.logger!.LogError($"Failed to read the PDF file due to an network error: '{response.StatusCode}'");
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
return await response.Content.ReadAsStringAsync();
|
||||
}
|
||||
}
|
@ -146,6 +146,30 @@ async fn stream_text_file(file_path: &str) -> Result<ChunkStream> {
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
|
||||
#[get("/retrieval/fs/read/pdf?<file_path>")]
|
||||
pub fn read_pdf(_token: APIToken, file_path: String) -> String {
|
||||
let pdfium = Pdfium::default();
|
||||
let doc = match pdfium.load_pdf_from_file(&file_path, None) {
|
||||
Ok(document) => document,
|
||||
Err(e) => return e.to_string(),
|
||||
};
|
||||
|
||||
let mut pdf_content = String::new();
|
||||
for page in doc.pages().iter() {
|
||||
let content = match page.text().map(|text_content| text_content.all()) {
|
||||
Ok(content) => content,
|
||||
Err(_) => {
|
||||
continue
|
||||
}
|
||||
};
|
||||
|
||||
pdf_content.push_str(&content);
|
||||
pdf_content.push_str("\n\n");
|
||||
}
|
||||
|
||||
pdf_content
|
||||
}
|
||||
|
||||
async fn stream_pdf(file_path: &str) -> Result<ChunkStream> {
|
||||
let path = file_path.to_owned();
|
||||
let (tx, rx) = mpsc::channel(10);
|
||||
|
@ -79,6 +79,7 @@ pub fn start_runtime_api() {
|
||||
crate::environment::get_config_directory,
|
||||
crate::environment::read_user_language,
|
||||
crate::file_data::extract_data,
|
||||
crate::file_data::read_pdf,
|
||||
crate::log::get_log_paths,
|
||||
])
|
||||
.ignite().await.unwrap()
|
||||
|
Loading…
Reference in New Issue
Block a user