diff --git a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua index 01fe7f72..7351af97 100644 --- a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua +++ b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua @@ -1660,12 +1660,6 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T354817589"] = "Select f -- Executables are not allowed UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T4167762413"] = "Executables are not allowed" --- Use PDF content as input -UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READPDFCONTENT::T2849276709"] = "Use PDF content as input" - --- Select PDF file -UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READPDFCONTENT::T63272795"] = "Select PDF file" - -- The content is cleaned using an LLM agent: the main content is extracted, advertisements and other irrelevant things are attempted to be removed; relative links are attempted to be converted into absolute links so that they can be used. UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READWEBCONTENT::T1164201762"] = "The content is cleaned using an LLM agent: the main content is extracted, advertisements and other irrelevant things are attempted to be removed; relative links are attempted to be converted into absolute links so that they can be used." diff --git a/app/MindWork AI Studio/Components/ReadPDFContent.razor b/app/MindWork AI Studio/Components/ReadPDFContent.razor deleted file mode 100644 index bd101740..00000000 --- a/app/MindWork AI Studio/Components/ReadPDFContent.razor +++ /dev/null @@ -1,4 +0,0 @@ -@inherits MSGComponentBase - - @T("Use PDF content as input") - \ No newline at end of file diff --git a/app/MindWork AI Studio/Components/ReadPDFContent.razor.cs b/app/MindWork AI Studio/Components/ReadPDFContent.razor.cs deleted file mode 100644 index ab050bd3..00000000 --- a/app/MindWork AI Studio/Components/ReadPDFContent.razor.cs +++ /dev/null @@ -1,31 +0,0 @@ -using AIStudio.Tools.Rust; -using AIStudio.Tools.Services; - -using Microsoft.AspNetCore.Components; - -namespace AIStudio.Components; - -public partial class ReadPDFContent : MSGComponentBase -{ - [Parameter] - public string PDFContent { get; set; } = string.Empty; - - [Parameter] - public EventCallback PDFContentChanged { get; set; } - - [Inject] - private RustService RustService { get; init; } = null!; - - private async Task SelectFile() - { - var pdfFile = await this.RustService.SelectFile(T("Select PDF file"), FileTypeFilter.PDF); - if (pdfFile.UserCancelled) - return; - - if(!File.Exists(pdfFile.SelectedFilePath)) - return; - - var pdfText = await this.RustService.GetPDFText(pdfFile.SelectedFilePath); - await this.PDFContentChanged.InvokeAsync(pdfText); - } -} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs b/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs index 3491568c..7cc05c3f 100644 --- a/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs +++ b/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs @@ -6,18 +6,6 @@ namespace AIStudio.Tools.Services; public sealed partial class RustService { - public async Task GetPDFText(string filePath) - { - var response = await this.http.GetAsync($"/retrieval/fs/read/pdf?file_path={filePath}"); - if (!response.IsSuccessStatusCode) - { - this.logger!.LogError($"Failed to read the PDF file due to an network error: '{response.StatusCode}'"); - return string.Empty; - } - - return await response.Content.ReadAsStringAsync(); - } - public async Task ReadArbitraryFileData(string path, int maxEvents) { var requestUri = $"/retrieval/fs/extract?path={Uri.EscapeDataString(path)}"; diff --git a/runtime/src/file_data.rs b/runtime/src/file_data.rs index c4a69585..3effdbeb 100644 --- a/runtime/src/file_data.rs +++ b/runtime/src/file_data.rs @@ -170,30 +170,6 @@ async fn stream_text_file(file_path: &str) -> Result { Ok(Box::pin(stream)) } -#[get("/retrieval/fs/read/pdf?")] -pub fn read_pdf(_token: APIToken, file_path: String) -> String { - let pdfium = Pdfium::ai_studio_init(); - let doc = match pdfium.load_pdf_from_file(&file_path, None) { - Ok(document) => document, - Err(e) => return e.to_string(), - }; - - let mut pdf_content = String::new(); - for page in doc.pages().iter() { - let content = match page.text().map(|text_content| text_content.all()) { - Ok(content) => content, - Err(_) => { - continue - } - }; - - pdf_content.push_str(&content); - pdf_content.push_str("\n\n"); - } - - pdf_content -} - async fn stream_pdf(file_path: &str) -> Result { let path = file_path.to_owned(); let (tx, rx) = mpsc::channel(10); diff --git a/runtime/src/runtime_api.rs b/runtime/src/runtime_api.rs index eece5973..b700af5b 100644 --- a/runtime/src/runtime_api.rs +++ b/runtime/src/runtime_api.rs @@ -82,7 +82,6 @@ pub fn start_runtime_api() { crate::environment::delete_enterprise_env_config_id, crate::environment::read_enterprise_env_config_server_url, crate::file_data::extract_data, - crate::file_data::read_pdf, crate::log::get_log_paths, ]) .ignite().await.unwrap()