mirror of
https://github.com/MindWorkAI/AI-Studio.git
synced 2025-07-28 02:02:57 +00:00
removed pdf file retrieval
This commit is contained in:
parent
2c646aa2f0
commit
de654de799
@ -1660,12 +1660,6 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T354817589"] = "Select f
|
|||||||
-- Executables are not allowed
|
-- Executables are not allowed
|
||||||
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T4167762413"] = "Executables are not allowed"
|
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READFILECONTENT::T4167762413"] = "Executables are not allowed"
|
||||||
|
|
||||||
-- Use PDF content as input
|
|
||||||
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READPDFCONTENT::T2849276709"] = "Use PDF content as input"
|
|
||||||
|
|
||||||
-- Select PDF file
|
|
||||||
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READPDFCONTENT::T63272795"] = "Select PDF file"
|
|
||||||
|
|
||||||
-- The content is cleaned using an LLM agent: the main content is extracted, advertisements and other irrelevant things are attempted to be removed; relative links are attempted to be converted into absolute links so that they can be used.
|
-- The content is cleaned using an LLM agent: the main content is extracted, advertisements and other irrelevant things are attempted to be removed; relative links are attempted to be converted into absolute links so that they can be used.
|
||||||
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READWEBCONTENT::T1164201762"] = "The content is cleaned using an LLM agent: the main content is extracted, advertisements and other irrelevant things are attempted to be removed; relative links are attempted to be converted into absolute links so that they can be used."
|
UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READWEBCONTENT::T1164201762"] = "The content is cleaned using an LLM agent: the main content is extracted, advertisements and other irrelevant things are attempted to be removed; relative links are attempted to be converted into absolute links so that they can be used."
|
||||||
|
|
||||||
|
@ -1,4 +0,0 @@
|
|||||||
@inherits MSGComponentBase
|
|
||||||
<MudButton StartIcon="@Icons.Material.Filled.Description" OnClick="async () => await this.SelectFile()" Variant="Variant.Filled" Class="mb-3">
|
|
||||||
@T("Use PDF content as input")
|
|
||||||
</MudButton>
|
|
@ -1,31 +0,0 @@
|
|||||||
using AIStudio.Tools.Rust;
|
|
||||||
using AIStudio.Tools.Services;
|
|
||||||
|
|
||||||
using Microsoft.AspNetCore.Components;
|
|
||||||
|
|
||||||
namespace AIStudio.Components;
|
|
||||||
|
|
||||||
public partial class ReadPDFContent : MSGComponentBase
|
|
||||||
{
|
|
||||||
[Parameter]
|
|
||||||
public string PDFContent { get; set; } = string.Empty;
|
|
||||||
|
|
||||||
[Parameter]
|
|
||||||
public EventCallback<string> PDFContentChanged { get; set; }
|
|
||||||
|
|
||||||
[Inject]
|
|
||||||
private RustService RustService { get; init; } = null!;
|
|
||||||
|
|
||||||
private async Task SelectFile()
|
|
||||||
{
|
|
||||||
var pdfFile = await this.RustService.SelectFile(T("Select PDF file"), FileTypeFilter.PDF);
|
|
||||||
if (pdfFile.UserCancelled)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if(!File.Exists(pdfFile.SelectedFilePath))
|
|
||||||
return;
|
|
||||||
|
|
||||||
var pdfText = await this.RustService.GetPDFText(pdfFile.SelectedFilePath);
|
|
||||||
await this.PDFContentChanged.InvokeAsync(pdfText);
|
|
||||||
}
|
|
||||||
}
|
|
@ -6,18 +6,6 @@ namespace AIStudio.Tools.Services;
|
|||||||
|
|
||||||
public sealed partial class RustService
|
public sealed partial class RustService
|
||||||
{
|
{
|
||||||
public async Task<string> GetPDFText(string filePath)
|
|
||||||
{
|
|
||||||
var response = await this.http.GetAsync($"/retrieval/fs/read/pdf?file_path={filePath}");
|
|
||||||
if (!response.IsSuccessStatusCode)
|
|
||||||
{
|
|
||||||
this.logger!.LogError($"Failed to read the PDF file due to an network error: '{response.StatusCode}'");
|
|
||||||
return string.Empty;
|
|
||||||
}
|
|
||||||
|
|
||||||
return await response.Content.ReadAsStringAsync();
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task<string> ReadArbitraryFileData(string path, int maxEvents)
|
public async Task<string> ReadArbitraryFileData(string path, int maxEvents)
|
||||||
{
|
{
|
||||||
var requestUri = $"/retrieval/fs/extract?path={Uri.EscapeDataString(path)}";
|
var requestUri = $"/retrieval/fs/extract?path={Uri.EscapeDataString(path)}";
|
||||||
|
@ -170,30 +170,6 @@ async fn stream_text_file(file_path: &str) -> Result<ChunkStream> {
|
|||||||
Ok(Box::pin(stream))
|
Ok(Box::pin(stream))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[get("/retrieval/fs/read/pdf?<file_path>")]
|
|
||||||
pub fn read_pdf(_token: APIToken, file_path: String) -> String {
|
|
||||||
let pdfium = Pdfium::ai_studio_init();
|
|
||||||
let doc = match pdfium.load_pdf_from_file(&file_path, None) {
|
|
||||||
Ok(document) => document,
|
|
||||||
Err(e) => return e.to_string(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut pdf_content = String::new();
|
|
||||||
for page in doc.pages().iter() {
|
|
||||||
let content = match page.text().map(|text_content| text_content.all()) {
|
|
||||||
Ok(content) => content,
|
|
||||||
Err(_) => {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
pdf_content.push_str(&content);
|
|
||||||
pdf_content.push_str("\n\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
pdf_content
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn stream_pdf(file_path: &str) -> Result<ChunkStream> {
|
async fn stream_pdf(file_path: &str) -> Result<ChunkStream> {
|
||||||
let path = file_path.to_owned();
|
let path = file_path.to_owned();
|
||||||
let (tx, rx) = mpsc::channel(10);
|
let (tx, rx) = mpsc::channel(10);
|
||||||
|
@ -82,7 +82,6 @@ pub fn start_runtime_api() {
|
|||||||
crate::environment::delete_enterprise_env_config_id,
|
crate::environment::delete_enterprise_env_config_id,
|
||||||
crate::environment::read_enterprise_env_config_server_url,
|
crate::environment::read_enterprise_env_config_server_url,
|
||||||
crate::file_data::extract_data,
|
crate::file_data::extract_data,
|
||||||
crate::file_data::read_pdf,
|
|
||||||
crate::log::get_log_paths,
|
crate::log::get_log_paths,
|
||||||
])
|
])
|
||||||
.ignite().await.unwrap()
|
.ignite().await.unwrap()
|
||||||
|
Loading…
Reference in New Issue
Block a user