removed pdf file retrieval

This commit is contained in:
krut_ni 2025-06-24 21:50:39 +02:00
parent 2c646aa2f0
commit de654de799
6 changed files with 0 additions and 78 deletions

View File

@ -1660,12 +1660,6 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T354817589"] = "Select f
-- Executables are not allowed
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T4167762413"] = "Executables are not allowed"
-- Use PDF content as input
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READPDFCONTENT::T2849276709"] = "Use PDF content as input"
-- Select PDF file
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READPDFCONTENT::T63272795"] = "Select PDF file"
-- The content is cleaned using an LLM agent: the main content is extracted, advertisements and other irrelevant things are attempted to be removed; relative links are attempted to be converted into absolute links so that they can be used.
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READWEBCONTENT::T1164201762"] = "The content is cleaned using an LLM agent: the main content is extracted, advertisements and other irrelevant things are attempted to be removed; relative links are attempted to be converted into absolute links so that they can be used."

View File

@ -1,4 +0,0 @@
@inherits MSGComponentBase
<MudButton StartIcon="@Icons.Material.Filled.Description" OnClick="async () => await this.SelectFile()" Variant="Variant.Filled" Class="mb-3">
@T("Use PDF content as input")
</MudButton>

View File

@ -1,31 +0,0 @@
using AIStudio.Tools.Rust;
using AIStudio.Tools.Services;
using Microsoft.AspNetCore.Components;
namespace AIStudio.Components;
public partial class ReadPDFContent : MSGComponentBase
{
[Parameter]
public string PDFContent { get; set; } = string.Empty;
[Parameter]
public EventCallback<string> PDFContentChanged { get; set; }
[Inject]
private RustService RustService { get; init; } = null!;
private async Task SelectFile()
{
var pdfFile = await this.RustService.SelectFile(T("Select PDF file"), FileTypeFilter.PDF);
if (pdfFile.UserCancelled)
return;
if(!File.Exists(pdfFile.SelectedFilePath))
return;
var pdfText = await this.RustService.GetPDFText(pdfFile.SelectedFilePath);
await this.PDFContentChanged.InvokeAsync(pdfText);
}
}

View File

@ -6,18 +6,6 @@ namespace AIStudio.Tools.Services;
public sealed partial class RustService
{
public async Task<string> GetPDFText(string filePath)
{
var response = await this.http.GetAsync($"/retrieval/fs/read/pdf?file_path={filePath}");
if (!response.IsSuccessStatusCode)
{
this.logger!.LogError($"Failed to read the PDF file due to an network error: '{response.StatusCode}'");
return string.Empty;
}
return await response.Content.ReadAsStringAsync();
}
public async Task<string> ReadArbitraryFileData(string path, int maxEvents)
{
var requestUri = $"/retrieval/fs/extract?path={Uri.EscapeDataString(path)}";

View File

@ -170,30 +170,6 @@ async fn stream_text_file(file_path: &str) -> Result<ChunkStream> {
Ok(Box::pin(stream))
}
#[get("/retrieval/fs/read/pdf?<file_path>")]
pub fn read_pdf(_token: APIToken, file_path: String) -> String {
let pdfium = Pdfium::ai_studio_init();
let doc = match pdfium.load_pdf_from_file(&file_path, None) {
Ok(document) => document,
Err(e) => return e.to_string(),
};
let mut pdf_content = String::new();
for page in doc.pages().iter() {
let content = match page.text().map(|text_content| text_content.all()) {
Ok(content) => content,
Err(_) => {
continue
}
};
pdf_content.push_str(&content);
pdf_content.push_str("\n\n");
}
pdf_content
}
async fn stream_pdf(file_path: &str) -> Result<ChunkStream> {
let path = file_path.to_owned();
let (tx, rx) = mpsc::channel(10);

View File

@ -82,7 +82,6 @@ pub fn start_runtime_api() {
crate::environment::delete_enterprise_env_config_id,
crate::environment::read_enterprise_env_config_server_url,
crate::file_data::extract_data,
crate::file_data::read_pdf,
crate::log::get_log_paths,
])
.ignite().await.unwrap()