mirror of
https://github.com/MindWorkAI/AI-Studio.git
synced 2025-07-27 22:02:56 +00:00
removed pdf file retrieval
This commit is contained in:
parent
2c646aa2f0
commit
de654de799
@ -1660,12 +1660,6 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T354817589"] = "Select f
|
||||
-- Executables are not allowed
|
||||
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T4167762413"] = "Executables are not allowed"
|
||||
|
||||
-- Use PDF content as input
|
||||
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READPDFCONTENT::T2849276709"] = "Use PDF content as input"
|
||||
|
||||
-- Select PDF file
|
||||
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READPDFCONTENT::T63272795"] = "Select PDF file"
|
||||
|
||||
-- The content is cleaned using an LLM agent: the main content is extracted, advertisements and other irrelevant things are attempted to be removed; relative links are attempted to be converted into absolute links so that they can be used.
|
||||
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READWEBCONTENT::T1164201762"] = "The content is cleaned using an LLM agent: the main content is extracted, advertisements and other irrelevant things are attempted to be removed; relative links are attempted to be converted into absolute links so that they can be used."
|
||||
|
||||
|
@ -1,4 +0,0 @@
|
||||
@inherits MSGComponentBase
|
||||
<MudButton StartIcon="@Icons.Material.Filled.Description" OnClick="async () => await this.SelectFile()" Variant="Variant.Filled" Class="mb-3">
|
||||
@T("Use PDF content as input")
|
||||
</MudButton>
|
@ -1,31 +0,0 @@
|
||||
using AIStudio.Tools.Rust;
|
||||
using AIStudio.Tools.Services;
|
||||
|
||||
using Microsoft.AspNetCore.Components;
|
||||
|
||||
namespace AIStudio.Components;
|
||||
|
||||
public partial class ReadPDFContent : MSGComponentBase
|
||||
{
|
||||
[Parameter]
|
||||
public string PDFContent { get; set; } = string.Empty;
|
||||
|
||||
[Parameter]
|
||||
public EventCallback<string> PDFContentChanged { get; set; }
|
||||
|
||||
[Inject]
|
||||
private RustService RustService { get; init; } = null!;
|
||||
|
||||
private async Task SelectFile()
|
||||
{
|
||||
var pdfFile = await this.RustService.SelectFile(T("Select PDF file"), FileTypeFilter.PDF);
|
||||
if (pdfFile.UserCancelled)
|
||||
return;
|
||||
|
||||
if(!File.Exists(pdfFile.SelectedFilePath))
|
||||
return;
|
||||
|
||||
var pdfText = await this.RustService.GetPDFText(pdfFile.SelectedFilePath);
|
||||
await this.PDFContentChanged.InvokeAsync(pdfText);
|
||||
}
|
||||
}
|
@ -6,18 +6,6 @@ namespace AIStudio.Tools.Services;
|
||||
|
||||
public sealed partial class RustService
|
||||
{
|
||||
public async Task<string> GetPDFText(string filePath)
|
||||
{
|
||||
var response = await this.http.GetAsync($"/retrieval/fs/read/pdf?file_path={filePath}");
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
this.logger!.LogError($"Failed to read the PDF file due to an network error: '{response.StatusCode}'");
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
return await response.Content.ReadAsStringAsync();
|
||||
}
|
||||
|
||||
public async Task<string> ReadArbitraryFileData(string path, int maxEvents)
|
||||
{
|
||||
var requestUri = $"/retrieval/fs/extract?path={Uri.EscapeDataString(path)}";
|
||||
|
@ -170,30 +170,6 @@ async fn stream_text_file(file_path: &str) -> Result<ChunkStream> {
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
|
||||
#[get("/retrieval/fs/read/pdf?<file_path>")]
|
||||
pub fn read_pdf(_token: APIToken, file_path: String) -> String {
|
||||
let pdfium = Pdfium::ai_studio_init();
|
||||
let doc = match pdfium.load_pdf_from_file(&file_path, None) {
|
||||
Ok(document) => document,
|
||||
Err(e) => return e.to_string(),
|
||||
};
|
||||
|
||||
let mut pdf_content = String::new();
|
||||
for page in doc.pages().iter() {
|
||||
let content = match page.text().map(|text_content| text_content.all()) {
|
||||
Ok(content) => content,
|
||||
Err(_) => {
|
||||
continue
|
||||
}
|
||||
};
|
||||
|
||||
pdf_content.push_str(&content);
|
||||
pdf_content.push_str("\n\n");
|
||||
}
|
||||
|
||||
pdf_content
|
||||
}
|
||||
|
||||
async fn stream_pdf(file_path: &str) -> Result<ChunkStream> {
|
||||
let path = file_path.to_owned();
|
||||
let (tx, rx) = mpsc::channel(10);
|
||||
|
@ -82,7 +82,6 @@ pub fn start_runtime_api() {
|
||||
crate::environment::delete_enterprise_env_config_id,
|
||||
crate::environment::read_enterprise_env_config_server_url,
|
||||
crate::file_data::extract_data,
|
||||
crate::file_data::read_pdf,
|
||||
crate::log::get_log_paths,
|
||||
])
|
||||
.ignite().await.unwrap()
|
||||
|
Loading…
Reference in New Issue
Block a user