From c4d9af18cdad92f7256b94a7572cd0359c2621be Mon Sep 17 00:00:00 2001 From: Thorsten Sommer Date: Fri, 2 May 2025 23:09:50 +0200 Subject: [PATCH] Add component to read PDF files into assistants (#439) --- .github/workflows/build-and-release.yml | 127 ++++++++++++ .gitignore | 5 + app/Build/Commands/CheckRidsCommand.cs | 5 + app/Build/Commands/Library.cs | 3 + app/Build/Commands/Pdfium.cs | 107 ++++++++++ app/Build/Commands/UpdateMetadataCommands.cs | 189 +++++++++--------- app/Build/Commands/UpdateWebAssetsCommand.cs | 2 +- app/Build/Tools/Environment.cs | 34 ++++ app/MindWork AI Studio.sln.DotSettings | 1 + .../Assistants/I18N/allTexts.lua | 9 + .../LegalCheck/AssistantLegalCheck.razor | 6 + .../AssistantTextSummarizer.razor | 6 + .../Translation/AssistantTranslation.razor | 6 + .../Components/ReadPDFContent.razor | 4 + .../Components/ReadPDFContent.razor.cs | 33 +++ .../Components/ReadWebContent.razor | 2 +- .../MindWork AI Studio.csproj | 4 + app/MindWork AI Studio/Pages/About.razor | 2 + app/MindWork AI Studio/Pages/About.razor.cs | 5 +- .../plugin.lua | 9 + .../plugin.lua | 10 + .../Settings/DataModel/PreviewFeatures.cs | 1 + .../DataModel/PreviewFeaturesExtensions.cs | 2 + .../DataModel/PreviewVisibilityExtensions.cs | 1 + .../Tools/Metadata/MetaDataArchitecture.cs | 2 +- .../Metadata/MetaDataLibrariesAttribute.cs | 6 + .../Tools/Services/RustService.Retrieval.cs | 16 ++ .../wwwroot/changelog/v0.9.42.md | 3 +- metadata.txt | 3 +- runtime/Cargo.lock | 34 +++- runtime/Cargo.toml | 4 +- runtime/src/app_window.rs | 50 ++++- runtime/src/file_data.rs | 43 +++- runtime/src/lib.rs | 3 +- runtime/src/main.rs | 18 ++ runtime/src/metadata.rs | 18 ++ runtime/src/runtime_api.rs | 1 + runtime/tauri.conf.json | 6 + 38 files changed, 661 insertions(+), 119 deletions(-) create mode 100644 app/Build/Commands/Library.cs create mode 100644 app/Build/Commands/Pdfium.cs create mode 100644 app/MindWork AI Studio/Components/ReadPDFContent.razor create mode 100644 app/MindWork AI Studio/Components/ReadPDFContent.razor.cs create mode 100644 app/MindWork AI Studio/Tools/Metadata/MetaDataLibrariesAttribute.cs create mode 100644 app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs create mode 100644 runtime/src/metadata.rs diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml index 3479f9f8..b26772e5 100644 --- a/.github/workflows/build-and-release.yml +++ b/.github/workflows/build-and-release.yml @@ -167,6 +167,12 @@ jobs: sed -i '' "10s/.*/${{ matrix.dotnet_runtime }}/" metadata.txt fi + # Next line is the necessary PDFium version. + # The format is '137.0.7123.0'. What we need + # is the '7123' part: + pdfium_version=$(sed -n '11p' metadata.txt) + pdfium_version=$(echo $pdfium_version | cut -d'.' -f3) + # Write the metadata to the environment: echo "APP_VERSION=${app_version}" >> $GITHUB_ENV echo "FORMATTED_APP_VERSION=${formatted_app_version}" >> $GITHUB_ENV @@ -178,6 +184,7 @@ jobs: echo "MUD_BLAZOR_VERSION=${mud_blazor_version}" >> $GITHUB_ENV echo "TAURI_VERSION=${tauri_version}" >> $GITHUB_ENV echo "ARCHITECTURE=${{ matrix.dotnet_runtime }}" >> $GITHUB_ENV + echo "PDFIUM_VERSION=${pdfium_version}" >> $GITHUB_ENV # Log the metadata: echo "App version: '${formatted_app_version}'" @@ -189,6 +196,7 @@ jobs: echo "MudBlazor version: '${mud_blazor_version}'" echo "Tauri version: '${tauri_version}'" echo "Architecture: '${{ matrix.dotnet_runtime }}'" + echo "PDFium version: '${pdfium_version}'" - name: Read and format metadata (Windows) if: matrix.platform == 'windows-latest' @@ -227,6 +235,12 @@ jobs: # Write the changed metadata back to the file: Set-Content -Path metadata.txt -Value $metadata + # Next line is the necessary PDFium version. + # The format is '137.0.7123.0'. What we need + # is the '7123' part: + $pdfium_version = $metadata[10] + $pdfium_version = $pdfium_version.Split('.')[2] + # Write the metadata to the environment: Write-Output "APP_VERSION=${app_version}" >> $env:GITHUB_ENV Write-Output "FORMATTED_APP_VERSION=${formatted_app_version}" >> $env:GITHUB_ENV @@ -237,6 +251,7 @@ jobs: Write-Output "RUST_VERSION=${rust_version}" >> $env:GITHUB_ENV Write-Output "MUD_BLAZOR_VERSION=${mud_blazor_version}" >> $env:GITHUB_ENV Write-Output "ARCHITECTURE=${{ matrix.dotnet_runtime }}" >> $env:GITHUB_ENV + Write-Output "PDFIUM_VERSION=${pdfium_version}" >> $env:GITHUB_ENV # Log the metadata: Write-Output "App version: '${formatted_app_version}'" @@ -248,6 +263,7 @@ jobs: Write-Output "MudBlazor version: '${mud_blazor_version}'" Write-Output "Tauri version: '${tauri_version}'" Write-Output "Architecture: '${{ matrix.dotnet_runtime }}'" + Write-Output "PDFium version: '${pdfium_version}'" - name: Setup .NET uses: actions/setup-dotnet@v4 @@ -255,6 +271,117 @@ jobs: dotnet-version: ${{ env.DOTNET_SDK_VERSION }} cache: true cache-dependency-path: 'app/MindWork AI Studio/packages.lock.json' + + - name: Deploy PDFium (Unix) + if: matrix.platform != 'windows-latest' + env: + PDFIUM_VERSION: ${{ env.PDFIUM_VERSION }} + DOTNET_RUNTIME: ${{ matrix.dotnet_runtime }} + run: | + set -e + + # Target directory: + TLIB_DIR="runtime/resources/libraries" + mkdir -p "$TLIB_DIR" + + case "${DOTNET_RUNTIME}" in + linux-x64) + PDFIUM_FILE="linux-x64.tgz" + LIB_SOURCE="lib/libpdfium.so" + LIB_TARGET="libpdfium.so" + ;; + linux-arm64) + PDFIUM_FILE="linux-arm64.tgz" + LIB_SOURCE="lib/libpdfium.so" + LIB_TARGET="libpdfium.so" + ;; + osx-x64) + PDFIUM_FILE="mac-x64.tgz" + LIB_SOURCE="lib/libpdfium.dylib" + LIB_TARGET="libpdfium.dylib" + ;; + osx-arm64) + PDFIUM_FILE="mac-arm64.tgz" + LIB_SOURCE="lib/libpdfium.dylib" + LIB_TARGET="libpdfium.dylib" + ;; + *) + echo "Unknown platform: ${DOTNET_RUNTIME}" + exit 1 + ;; + esac + + PDFIUM_URL="https://github.com/bblanchon/pdfium-binaries/releases/download/chromium%2F${PDFIUM_VERSION}/pdfium-${PDFIUM_FILE}" + + echo "Download PDFium $PDFIUM_URL ..." + TMP=$(mktemp -d) + ARCHIVE="${TMP}/pdfium.tgz" + + curl -fsSL -o "$ARCHIVE" "$PDFIUM_URL" + + echo "Extracting PDFium ..." + tar xzf "$ARCHIVE" -C "$TMP" + SRC="${TMP}/${LIB_SOURCE}" + + if [ ! -f "$SRC" ]; then + echo "Was not able to find PDFium source: $SRC" + exit 1 + fi + + echo "Copy PDFium from ${LIB_TARGET} to ${TLIB_DIR}/" + cp -f "$SRC" "$TLIB_DIR/$LIB_TARGET" + + echo "Cleaning up ..." + rm -fr "$TMP" + + - name: Install PDFium (Windows) + if: matrix.platform == 'windows-latest' + env: + PDFIUM_VERSION: ${{ env.PDFIUM_VERSION }} + DOTNET_RUNTIME: ${{ matrix.dotnet_runtime }} + run: | + $TLIB_DIR = "runtime\resources\libraries" + New-Item -ItemType Directory -Force -Path $TLIB_DIR | Out-Null + + switch ($env:DOTNET_RUNTIME) { + "win-x64" { + $PDFIUM_FILE = "win-x64.tgz" + $LIB_SOURCE = "bin\pdfium.dll" + $LIB_TARGET = "pdfium.dll" + } + "win-arm64" { + $PDFIUM_FILE = "win-arm64.tgz" + $LIB_SOURCE = "bin\pdfium.dll" + $LIB_TARGET = "pdfium.dll" + } + default { + Write-Error "Unknown platform: $($env:DOTNET_RUNTIME)" + exit 1 + } + } + + $PDFIUM_URL = "https://github.com/bblanchon/pdfium-binaries/releases/download/chromium%2F$($env:PDFIUM_VERSION)/pdfium-$PDFIUM_FILE" + Write-Host "Download $PDFIUM_URL ..." + $TMP = New-TemporaryFile | Split-Path + $ARCHIVE = Join-Path $TMP "pdfium.tgz" + + Invoke-WebRequest -Uri $PDFIUM_URL -OutFile $ARCHIVE + + Write-Host "Extracting PDFium ..." + tar -xzf $ARCHIVE -C $TMP + + $SRC = Join-Path $TMP $LIB_SOURCE + if (!(Test-Path $SRC)) { + Write-Error "Cannot find PDFium source: $SRC" + exit 1 + } + + $DEST = Join-Path $TLIB_DIR $LIB_TARGET + Copy-Item -Path $SRC -Destination $DEST -Force + + Write-Host "Cleaning up ..." + Remove-Item $ARCHIVE -Force + Remove-Item $TMP -Recurse -Force - name: Build .NET project run: | diff --git a/.gitignore b/.gitignore index a2cea037..020153c2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ +# Ignore pdfium library: +libpdfium.dylib +libpdfium.so +libpdfium.dll + # User-specific files *.rsuser *.suo diff --git a/app/Build/Commands/CheckRidsCommand.cs b/app/Build/Commands/CheckRidsCommand.cs index 62bf3662..74912a33 100644 --- a/app/Build/Commands/CheckRidsCommand.cs +++ b/app/Build/Commands/CheckRidsCommand.cs @@ -17,5 +17,10 @@ public sealed class CheckRidsCommand { Console.WriteLine($"- {rid}"); } + + Console.WriteLine(); + Console.WriteLine("The RID for the current OS and CPU is:"); + var currentRid = Environment.GetCurrentRid(); + Console.WriteLine($"- {currentRid}"); } } \ No newline at end of file diff --git a/app/Build/Commands/Library.cs b/app/Build/Commands/Library.cs new file mode 100644 index 00000000..354dc504 --- /dev/null +++ b/app/Build/Commands/Library.cs @@ -0,0 +1,3 @@ +namespace Build.Commands; + +public record Library(string Path, string Filename); \ No newline at end of file diff --git a/app/Build/Commands/Pdfium.cs b/app/Build/Commands/Pdfium.cs new file mode 100644 index 00000000..349e3590 --- /dev/null +++ b/app/Build/Commands/Pdfium.cs @@ -0,0 +1,107 @@ +using System.Formats.Tar; +using System.IO.Compression; + +using SharedTools; + +namespace Build.Commands; + +public static class Pdfium +{ + public static async Task InstallAsync(RID rid, string version) + { + Console.Write($"- Installing Pdfium {version} for {rid.ToUserFriendlyName()} ..."); + + var cwd = Environment.GetRustRuntimeDirectory(); + var pdfiumTmpDownloadPath = Path.GetTempFileName(); + var pdfiumTmpExtractPath = Directory.CreateTempSubdirectory(); + var pdfiumUrl = GetPdfiumDownloadUrl(rid, version); + + // + // Download the file: + // + Console.Write(" downloading ..."); + using (var client = new HttpClient()) + { + var response = await client.GetAsync(pdfiumUrl); + if (!response.IsSuccessStatusCode) + { + Console.WriteLine($" failed to download Pdfium {version} for {rid.ToUserFriendlyName()}"); + return; + } + + await using var fileStream = File.Create(pdfiumTmpDownloadPath); + await response.Content.CopyToAsync(fileStream); + } + + // + // Extract the downloaded file: + // + Console.Write(" extracting ..."); + await using(var tgzStream = File.Open(pdfiumTmpDownloadPath, FileMode.Open, FileAccess.Read, FileShare.Read)) + { + await using var uncompressedStream = new GZipStream(tgzStream, CompressionMode.Decompress); + await TarFile.ExtractToDirectoryAsync(uncompressedStream, pdfiumTmpExtractPath.FullName, true); + } + + // + // Copy the library to the target directory: + // + Console.Write(" deploying ..."); + var library = GetLibraryPath(rid); + if (string.IsNullOrEmpty(library.Path)) + { + Console.WriteLine($" failed to find the library path for {rid.ToUserFriendlyName()}"); + return; + } + + var pdfiumLibSourcePath = Path.Join(pdfiumTmpExtractPath.FullName, library.Path); + var pdfiumLibTargetPath = Path.Join(cwd, "resources", "libraries", library.Filename); + if (!File.Exists(pdfiumLibSourcePath)) + { + Console.WriteLine($" failed to find the library file '{pdfiumLibSourcePath}'"); + return; + } + + Directory.CreateDirectory(Path.Join(cwd, "resources", "libraries")); + if (File.Exists(pdfiumLibTargetPath)) + File.Delete(pdfiumLibTargetPath); + + File.Copy(pdfiumLibSourcePath, pdfiumLibTargetPath); + + // + // Cleanup: + // + Console.Write(" cleaning up ..."); + File.Delete(pdfiumTmpDownloadPath); + Directory.Delete(pdfiumTmpExtractPath.FullName, true); + + Console.WriteLine(" done."); + } + + private static Library GetLibraryPath(RID rid) => rid switch + { + RID.LINUX_ARM64 or RID.LINUX_X64 => new(Path.Join("lib", "libpdfium.so"), "libpdfium.so"), + RID.OSX_ARM64 or RID.OSX_X64 => new(Path.Join("lib", "libpdfium.dylib"), "libpdfium.dylib"), + RID.WIN_ARM64 or RID.WIN_X64 => new(Path.Join("bin", "pdfium.dll"), "pdfium.dll"), + + _ => new(string.Empty, string.Empty), + }; + + private static string GetPdfiumDownloadUrl(RID rid, string version) + { + var baseUrl = $"https://github.com/bblanchon/pdfium-binaries/releases/download/chromium%2F{version}/pdfium-"; + return rid switch + { + RID.LINUX_ARM64 => $"{baseUrl}linux-arm64.tgz", + RID.LINUX_X64 => $"{baseUrl}linux-x64.tgz", + + RID.OSX_ARM64 => $"{baseUrl}mac-arm64.tgz", + RID.OSX_X64 => $"{baseUrl}mac-x64.tgz", + + RID.WIN_ARM64 => $"{baseUrl}win-arm64.zip", + RID.WIN_X64 => $"{baseUrl}win-x64.zip", + + _ => string.Empty, + }; + } +} \ No newline at end of file diff --git a/app/Build/Commands/UpdateMetadataCommands.cs b/app/Build/Commands/UpdateMetadataCommands.cs index a2cc4595..b9b01357 100644 --- a/app/Build/Commands/UpdateMetadataCommands.cs +++ b/app/Build/Commands/UpdateMetadataCommands.cs @@ -105,105 +105,102 @@ public sealed partial class UpdateMetadataCommands // Build the .NET project: // var pathApp = Environment.GetAIStudioDirectory(); - var rids = Environment.GetRidsForCurrentOS(); - foreach (var rid in rids) - { - Console.WriteLine("=============================="); - await this.UpdateArchitecture(rid); - - Console.Write($"- Start .NET build for '{rid.AsMicrosoftRid()}' ..."); - await this.ReadCommandOutput(pathApp, "dotnet", $"clean --configuration release --runtime {rid.AsMicrosoftRid()}"); - var dotnetBuildOutput = await this.ReadCommandOutput(pathApp, "dotnet", $"publish --configuration release --runtime {rid.AsMicrosoftRid()} --disable-build-servers --force"); - var dotnetBuildOutputLines = dotnetBuildOutput.Split([global::System.Environment.NewLine], StringSplitOptions.RemoveEmptyEntries); - var foundIssue = false; - foreach (var buildOutputLine in dotnetBuildOutputLines) - { - if(buildOutputLine.Contains(" error ") || buildOutputLine.Contains("#warning")) - { - if(!foundIssue) - { - foundIssue = true; - Console.WriteLine(); - Console.WriteLine("- Build has issues:"); - } - - Console.Write(" - "); - Console.WriteLine(buildOutputLine); - } - } - - if(foundIssue) - Console.WriteLine(); - else - { - Console.WriteLine(" completed successfully."); - } - - // - // Prepare the .NET artifact to be used by Tauri as sidecar: - // - var os = Environment.GetOS(); - var tauriSidecarArtifactName = rid switch - { - RID.WIN_X64 => "mindworkAIStudioServer-x86_64-pc-windows-msvc.exe", - RID.WIN_ARM64 => "mindworkAIStudioServer-aarch64-pc-windows-msvc.exe", - - RID.LINUX_X64 => "mindworkAIStudioServer-x86_64-unknown-linux-gnu", - RID.LINUX_ARM64 => "mindworkAIStudioServer-aarch64-unknown-linux-gnu", - - RID.OSX_ARM64 => "mindworkAIStudioServer-aarch64-apple-darwin", - RID.OSX_X64 => "mindworkAIStudioServer-x86_64-apple-darwin", - - _ => string.Empty, - }; - - if (string.IsNullOrWhiteSpace(tauriSidecarArtifactName)) - { - Console.WriteLine($"- Error: Unsupported rid '{rid.AsMicrosoftRid()}'."); - return; - } + var rid = Environment.GetCurrentRid(); - var dotnetArtifactPath = Path.Combine(pathApp, "bin", "dist"); - if(!Directory.Exists(dotnetArtifactPath)) - Directory.CreateDirectory(dotnetArtifactPath); - - var dotnetArtifactFilename = os switch + Console.WriteLine("=============================="); + await this.UpdateArchitecture(rid); + + var pdfiumVersion = await this.ReadPdfiumVersion(); + await Pdfium.InstallAsync(rid, pdfiumVersion); + + Console.Write($"- Start .NET build for {rid.ToUserFriendlyName()} ..."); + await this.ReadCommandOutput(pathApp, "dotnet", $"clean --configuration release --runtime {rid.AsMicrosoftRid()}"); + var dotnetBuildOutput = await this.ReadCommandOutput(pathApp, "dotnet", $"publish --configuration release --runtime {rid.AsMicrosoftRid()} --disable-build-servers --force"); + var dotnetBuildOutputLines = dotnetBuildOutput.Split([global::System.Environment.NewLine], StringSplitOptions.RemoveEmptyEntries); + var foundIssue = false; + foreach (var buildOutputLine in dotnetBuildOutputLines) + { + if(buildOutputLine.Contains(" error ") || buildOutputLine.Contains("#warning")) { - "windows" => "mindworkAIStudio.exe", - _ => "mindworkAIStudio", - }; - - var dotnetPublishedPath = Path.Combine(pathApp, "bin", "release", Environment.DOTNET_VERSION, rid.AsMicrosoftRid(), "publish", dotnetArtifactFilename); - var finalDestination = Path.Combine(dotnetArtifactPath, tauriSidecarArtifactName); - - if(File.Exists(dotnetPublishedPath)) - Console.WriteLine("- Published .NET artifact found."); - else - { - Console.WriteLine($"- Error: Published .NET artifact not found: '{dotnetPublishedPath}'."); - return; - } + if(!foundIssue) + { + foundIssue = true; + Console.WriteLine(); + Console.WriteLine("- Build has issues:"); + } - Console.Write($"- Move the .NET artifact to the Tauri sidecar destination ..."); - try - { - File.Move(dotnetPublishedPath, finalDestination, true); - Console.WriteLine(" done."); + Console.Write(" - "); + Console.WriteLine(buildOutputLine); } - catch (Exception e) - { - Console.WriteLine(" failed."); - Console.WriteLine($" - Error: {e.Message}"); - } - + } + + if(foundIssue) Console.WriteLine(); + else + { + Console.WriteLine(" completed successfully."); + } + + // + // Prepare the .NET artifact to be used by Tauri as sidecar: + // + var os = Environment.GetOS(); + var tauriSidecarArtifactName = rid switch + { + RID.WIN_X64 => "mindworkAIStudioServer-x86_64-pc-windows-msvc.exe", + RID.WIN_ARM64 => "mindworkAIStudioServer-aarch64-pc-windows-msvc.exe", + + RID.LINUX_X64 => "mindworkAIStudioServer-x86_64-unknown-linux-gnu", + RID.LINUX_ARM64 => "mindworkAIStudioServer-aarch64-unknown-linux-gnu", + + RID.OSX_ARM64 => "mindworkAIStudioServer-aarch64-apple-darwin", + RID.OSX_X64 => "mindworkAIStudioServer-x86_64-apple-darwin", + + _ => string.Empty, + }; + + if (string.IsNullOrWhiteSpace(tauriSidecarArtifactName)) + { + Console.WriteLine($"- Error: Unsupported rid '{rid.AsMicrosoftRid()}'."); + return; + } + + var dotnetArtifactPath = Path.Combine(pathApp, "bin", "dist"); + if(!Directory.Exists(dotnetArtifactPath)) + Directory.CreateDirectory(dotnetArtifactPath); + + var dotnetArtifactFilename = os switch + { + "windows" => "mindworkAIStudio.exe", + _ => "mindworkAIStudio", + }; + + var dotnetPublishedPath = Path.Combine(pathApp, "bin", "release", Environment.DOTNET_VERSION, rid.AsMicrosoftRid(), "publish", dotnetArtifactFilename); + var finalDestination = Path.Combine(dotnetArtifactPath, tauriSidecarArtifactName); + + if(File.Exists(dotnetPublishedPath)) + Console.WriteLine("- Published .NET artifact found."); + else + { + Console.WriteLine($"- Error: Published .NET artifact not found: '{dotnetPublishedPath}'."); + return; } + Console.Write($"- Move the .NET artifact to the Tauri sidecar destination ..."); + try + { + File.Move(dotnetPublishedPath, finalDestination, true); + Console.WriteLine(" done."); + } + catch (Exception e) + { + Console.WriteLine(" failed."); + Console.WriteLine($" - Error: {e.Message}"); + } + // // Build the Rust project / runtime: // - - Console.WriteLine("=============================="); Console.WriteLine("- Start building the Rust runtime ..."); var pathRuntime = Environment.GetRustRuntimeDirectory(); @@ -314,6 +311,18 @@ public sealed partial class UpdateMetadataCommands await File.WriteAllTextAsync(changelogCodePath, changelogCode, Environment.UTF8_NO_BOM); Console.WriteLine(" done."); } + + private async Task ReadPdfiumVersion() + { + const int PDFIUM_VERSION_INDEX = 10; + + var pathMetadata = Environment.GetMetadataPath(); + var lines = await File.ReadAllLinesAsync(pathMetadata, Encoding.UTF8); + var currentPdfiumVersion = lines[PDFIUM_VERSION_INDEX].Trim(); + var shortVersion = currentPdfiumVersion.Split('.')[2]; + + return shortVersion; + } private async Task UpdateArchitecture(RID rid) { @@ -321,7 +330,7 @@ public sealed partial class UpdateMetadataCommands var pathMetadata = Environment.GetMetadataPath(); var lines = await File.ReadAllLinesAsync(pathMetadata, Encoding.UTF8); - Console.Write("- Updating architecture ..."); + Console.Write($"- Updating architecture to {rid.ToUserFriendlyName()} ..."); lines[ARCHITECTURE_INDEX] = rid.AsMicrosoftRid(); await File.WriteAllLinesAsync(pathMetadata, lines, Environment.UTF8_NO_BOM); diff --git a/app/Build/Commands/UpdateWebAssetsCommand.cs b/app/Build/Commands/UpdateWebAssetsCommand.cs index f56dc795..d044a256 100644 --- a/app/Build/Commands/UpdateWebAssetsCommand.cs +++ b/app/Build/Commands/UpdateWebAssetsCommand.cs @@ -17,7 +17,7 @@ public sealed class UpdateWebAssetsCommand Console.WriteLine("========================="); Console.Write("- Updating web assets ..."); - var rid = Environment.GetRidsForCurrentOS().First(); + var rid = Environment.GetCurrentRid(); var cwd = Environment.GetAIStudioDirectory(); var contentPath = Path.Join(cwd, "bin", "release", Environment.DOTNET_VERSION, rid.AsMicrosoftRid(), "publish", "wwwroot", "_content"); var isMudBlazorDirectoryPresent = Directory.Exists(Path.Join(contentPath, "MudBlazor")); diff --git a/app/Build/Tools/Environment.cs b/app/Build/Tools/Environment.cs index a86000f4..f03ff354 100644 --- a/app/Build/Tools/Environment.cs +++ b/app/Build/Tools/Environment.cs @@ -76,4 +76,38 @@ public static class Environment Console.WriteLine($"Error: Unsupported OS '{RuntimeInformation.OSDescription}'"); return []; } + + public static RID GetCurrentRid() + { + var arch = RuntimeInformation.ProcessArchitecture; + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + return arch switch + { + Architecture.X64 => RID.WIN_X64, + Architecture.Arm64 => RID.WIN_ARM64, + + _ => RID.NONE, + }; + + if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + return arch switch + { + Architecture.X64 => RID.OSX_X64, + Architecture.Arm64 => RID.OSX_ARM64, + + _ => RID.NONE, + }; + + if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + return arch switch + { + Architecture.X64 => RID.LINUX_X64, + Architecture.Arm64 => RID.LINUX_ARM64, + + _ => RID.NONE, + }; + + Console.WriteLine($"Error: Unsupported OS '{RuntimeInformation.OSDescription}'"); + return RID.NONE; + } } \ No newline at end of file diff --git a/app/MindWork AI Studio.sln.DotSettings b/app/MindWork AI Studio.sln.DotSettings index 3eb9acc5..d85707aa 100644 --- a/app/MindWork AI Studio.sln.DotSettings +++ b/app/MindWork AI Studio.sln.DotSettings @@ -9,6 +9,7 @@ LM MSG OS + PDF RAG RID TB diff --git a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua index f23e9219..eefd323f 100644 --- a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua +++ b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua @@ -631,6 +631,12 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::PROFILESELECTION::T918741365"] = "You can -- Provider UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::PROVIDERSELECTION::T900237532"] = "Provider" +-- Use PDF content as input +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READPDFCONTENT::T2849276709"] = "Use PDF content as input" + +-- Select PDF file +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READPDFCONTENT::T63272795"] = "Select PDF file" + -- The content is cleaned using an LLM agent: the main content is extracted, advertisements and other irrelevant things are attempted to be removed; relative links are attempted to be converted into absolute links so that they can be used. UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READWEBCONTENT::T1164201762"] = "The content is cleaned using an LLM agent: the main content is extracted, advertisements and other irrelevant things are attempted to be removed; relative links are attempted to be converted into absolute links so that they can be used." @@ -2116,6 +2122,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T228561878"] = "In order to use any LLM -- The C# language is used for the implementation of the user interface and the backend. To implement the user interface with C#, the Blazor technology from ASP.NET Core is used. All these technologies are integrated into the .NET SDK. UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T2329884315"] = "The C# language is used for the implementation of the user interface and the backend. To implement the user interface with C#, the Blazor technology from ASP.NET Core is used. All these technologies are integrated into the .NET SDK." +-- Used PDFium version +UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T2368247719"] = "Used PDFium version" + -- This library is used to determine the language of the operating system. This is necessary to set the language of the user interface. UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T2557014401"] = "This library is used to determine the language of the operating system. This is necessary to set the language of the user interface." diff --git a/app/MindWork AI Studio/Assistants/LegalCheck/AssistantLegalCheck.razor b/app/MindWork AI Studio/Assistants/LegalCheck/AssistantLegalCheck.razor index fb258aeb..01405125 100644 --- a/app/MindWork AI Studio/Assistants/LegalCheck/AssistantLegalCheck.razor +++ b/app/MindWork AI Studio/Assistants/LegalCheck/AssistantLegalCheck.razor @@ -1,4 +1,5 @@ @attribute [Route(Routes.ASSISTANT_LEGAL_CHECK)] +@using AIStudio.Settings.DataModel @inherits AssistantBaseCore @if (!this.SettingsManager.ConfigurationData.LegalCheck.HideWebContentReader) @@ -6,6 +7,11 @@ } +@if (PreviewFeatures.PRE_READ_PDF_2025.IsEnabled(this.SettingsManager)) +{ + +} + \ No newline at end of file diff --git a/app/MindWork AI Studio/Assistants/TextSummarizer/AssistantTextSummarizer.razor b/app/MindWork AI Studio/Assistants/TextSummarizer/AssistantTextSummarizer.razor index adefc266..3f49d185 100644 --- a/app/MindWork AI Studio/Assistants/TextSummarizer/AssistantTextSummarizer.razor +++ b/app/MindWork AI Studio/Assistants/TextSummarizer/AssistantTextSummarizer.razor @@ -1,4 +1,5 @@ @attribute [Route(Routes.ASSISTANT_SUMMARIZER)] +@using AIStudio.Settings.DataModel @inherits AssistantBaseCore @if (!this.SettingsManager.ConfigurationData.TextSummarizer.HideWebContentReader) @@ -6,6 +7,11 @@ } +@if (PreviewFeatures.PRE_READ_PDF_2025.IsEnabled(this.SettingsManager)) +{ + +} + diff --git a/app/MindWork AI Studio/Assistants/Translation/AssistantTranslation.razor b/app/MindWork AI Studio/Assistants/Translation/AssistantTranslation.razor index a1e6cdcc..27b902ce 100644 --- a/app/MindWork AI Studio/Assistants/Translation/AssistantTranslation.razor +++ b/app/MindWork AI Studio/Assistants/Translation/AssistantTranslation.razor @@ -1,4 +1,5 @@ @attribute [Route(Routes.ASSISTANT_TRANSLATION)] +@using AIStudio.Settings.DataModel @inherits AssistantBaseCore @if (!this.SettingsManager.ConfigurationData.Translation.HideWebContentReader) @@ -6,6 +7,11 @@ } +@if (PreviewFeatures.PRE_READ_PDF_2025.IsEnabled(this.SettingsManager)) +{ + +} + @if (this.liveTranslation) { diff --git a/app/MindWork AI Studio/Components/ReadPDFContent.razor b/app/MindWork AI Studio/Components/ReadPDFContent.razor new file mode 100644 index 00000000..bd101740 --- /dev/null +++ b/app/MindWork AI Studio/Components/ReadPDFContent.razor @@ -0,0 +1,4 @@ +@inherits MSGComponentBase + + @T("Use PDF content as input") + \ No newline at end of file diff --git a/app/MindWork AI Studio/Components/ReadPDFContent.razor.cs b/app/MindWork AI Studio/Components/ReadPDFContent.razor.cs new file mode 100644 index 00000000..1cdefb2b --- /dev/null +++ b/app/MindWork AI Studio/Components/ReadPDFContent.razor.cs @@ -0,0 +1,33 @@ +using AIStudio.Tools.Services; + +using Microsoft.AspNetCore.Components; + +namespace AIStudio.Components; + +public partial class ReadPDFContent : MSGComponentBase +{ + [Parameter] + public string PDFContent { get; set; } = string.Empty; + + [Parameter] + public EventCallback PDFContentChanged { get; set; } + + [Inject] + private RustService RustService { get; init; } = null!; + + private async Task SelectFile() + { + var pdfFile = await this.RustService.SelectFile(T("Select PDF file")); + if (pdfFile.UserCancelled) + return; + + if (!pdfFile.SelectedFilePath.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase)) + return; + + if(!File.Exists(pdfFile.SelectedFilePath)) + return; + + var pdfText = await this.RustService.GetPDFText(pdfFile.SelectedFilePath); + await this.PDFContentChanged.InvokeAsync(pdfText); + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Components/ReadWebContent.razor b/app/MindWork AI Studio/Components/ReadWebContent.razor index 8907f383..f5524a20 100644 --- a/app/MindWork AI Studio/Components/ReadWebContent.razor +++ b/app/MindWork AI Studio/Components/ReadWebContent.razor @@ -1,5 +1,5 @@ @inherits MSGComponentBase - + @if (this.showWebContentReader) { diff --git a/app/MindWork AI Studio/MindWork AI Studio.csproj b/app/MindWork AI Studio/MindWork AI Studio.csproj index c30d9ba2..d190898a 100644 --- a/app/MindWork AI Studio/MindWork AI Studio.csproj +++ b/app/MindWork AI Studio/MindWork AI Studio.csproj @@ -80,6 +80,7 @@ $([System.String]::Copy( $(Metadata) ).Split( ';' )[ 7 ]) $([System.String]::Copy( $(Metadata) ).Split( ';' )[ 8 ]) $([System.String]::Copy( $(Metadata) ).Split( ';' )[ 9 ]) + $([System.String]::Copy( $(Metadata) ).Split( ';' )[ 10 ]) true @@ -104,6 +105,9 @@ <_Parameter1>$(MetaArchitecture) + + <_Parameter1>$(MetaPdfiumVersion) + diff --git a/app/MindWork AI Studio/Pages/About.razor b/app/MindWork AI Studio/Pages/About.razor index 4e7811f5..0f5329d0 100644 --- a/app/MindWork AI Studio/Pages/About.razor +++ b/app/MindWork AI Studio/Pages/About.razor @@ -18,6 +18,7 @@ + @@ -111,6 +112,7 @@ + diff --git a/app/MindWork AI Studio/Pages/About.razor.cs b/app/MindWork AI Studio/Pages/About.razor.cs index 4445a290..66657077 100644 --- a/app/MindWork AI Studio/Pages/About.razor.cs +++ b/app/MindWork AI Studio/Pages/About.razor.cs @@ -21,7 +21,8 @@ public partial class About : MSGComponentBase private static readonly Assembly ASSEMBLY = Assembly.GetExecutingAssembly(); private static readonly MetaDataAttribute META_DATA = ASSEMBLY.GetCustomAttribute()!; - private static readonly MetaDataArchitecture META_DATA_ARCH = ASSEMBLY.GetCustomAttribute()!; + private static readonly MetaDataArchitectureAttribute META_DATA_ARCH = ASSEMBLY.GetCustomAttribute()!; + private static readonly MetaDataLibrariesAttribute META_DATA_LIBRARIES = ASSEMBLY.GetCustomAttribute()!; private string osLanguage = string.Empty; @@ -40,6 +41,8 @@ public partial class About : MSGComponentBase private string VersionDotnetSdk => $"{T("Used .NET SDK")}: v{META_DATA.DotnetSdkVersion}"; private string BuildTime => $"{T("Build time")}: {META_DATA.BuildTime}"; + + private string VersionPdfium => $"{T("Used PDFium version")}: v{META_DATA_LIBRARIES.PdfiumVersion}"; private GetLogPathsResponse logPaths; diff --git a/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua b/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua index 651c5e60..6b7fa4af 100644 --- a/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua +++ b/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua @@ -633,6 +633,12 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::PROFILESELECTION::T918741365"] = "Hier ka -- Provider UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::PROVIDERSELECTION::T900237532"] = "Anbieter" +-- Use PDF content as input +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READPDFCONTENT::T2849276709"] = "PDF-Inhalt als Eingabe verwenden" + +-- Select PDF file +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READPDFCONTENT::T63272795"] = "PDF-Datei auswählen" + -- The content is cleaned using an LLM agent: the main content is extracted, advertisements and other irrelevant things are attempted to be removed; relative links are attempted to be converted into absolute links so that they can be used. UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READWEBCONTENT::T1164201762"] = "Der Inhalt wird mithilfe eines LLM-Agents bereinigt: Der Hauptinhalt wird extrahiert, Werbung und andere irrelevante Elemente werden nach Möglichkeit entfernt. Relative Links werden nach Möglichkeit in absolute Links umgewandelt, damit sie verwendet werden können." @@ -2118,6 +2124,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T228561878"] = "Um ein beliebiges LLM n -- The C# language is used for the implementation of the user interface and the backend. To implement the user interface with C#, the Blazor technology from ASP.NET Core is used. All these technologies are integrated into the .NET SDK. UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T2329884315"] = "Die Programmiersprache C# wird für die Umsetzung der Benutzeroberfläche und des Backends verwendet. Für die Entwicklung der Benutzeroberfläche mit C# kommt die Blazor-Technologie aus ASP.NET Core zum Einsatz. Alle diese Technologien sind im .NET SDK integriert." +-- Used PDFium version +UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T2368247719"] = "Verwendete PDFium-Version" + -- This library is used to determine the language of the operating system. This is necessary to set the language of the user interface. UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T2557014401"] = "Diese Bibliothek wird verwendet, um die Sprache des Betriebssystems zu erkennen. Dies ist notwendig, um die Sprache der Benutzeroberfläche einzustellen." diff --git a/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua b/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua index 1df7ee7b..12abae43 100644 --- a/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua +++ b/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua @@ -633,6 +633,12 @@ UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::PROFILESELECTION::T918741365"] = "You can -- Provider UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::PROVIDERSELECTION::T900237532"] = "Provider" +-- Use PDF content as input +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READPDFCONTENT::T2849276709"] = "Use PDF content as input" + +-- Select PDF file +UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READPDFCONTENT::T63272795"] = "Select PDF file" + -- The content is cleaned using an LLM agent: the main content is extracted, advertisements and other irrelevant things are attempted to be removed; relative links are attempted to be converted into absolute links so that they can be used. UI_TEXT_CONTENT["AISTUDIO::COMPONENTS::READWEBCONTENT::T1164201762"] = "The content is cleaned using an LLM agent: the main content is extracted, advertisements and other irrelevant things are attempted to be removed; relative links are attempted to be converted into absolute links so that they can be used." @@ -2118,6 +2124,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T228561878"] = "In order to use any LLM -- The C# language is used for the implementation of the user interface and the backend. To implement the user interface with C#, the Blazor technology from ASP.NET Core is used. All these technologies are integrated into the .NET SDK. UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T2329884315"] = "The C# language is used for the implementation of the user interface and the backend. To implement the user interface with C#, the Blazor technology from ASP.NET Core is used. All these technologies are integrated into the .NET SDK." +-- Used PDFium version +UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T2368247719"] = "Used PDFium version" + -- This library is used to determine the language of the operating system. This is necessary to set the language of the user interface. UI_TEXT_CONTENT["AISTUDIO::PAGES::ABOUT::T2557014401"] = "This library is used to determine the language of the operating system. This is necessary to set the language of the user interface." @@ -2519,3 +2528,4 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::WRITER::T3948127789"] = "Suggestion" -- Your stage directions UI_TEXT_CONTENT["AISTUDIO::PAGES::WRITER::T779923726"] = "Your stage directions" + diff --git a/app/MindWork AI Studio/Settings/DataModel/PreviewFeatures.cs b/app/MindWork AI Studio/Settings/DataModel/PreviewFeatures.cs index ff642a0a..85acedec 100644 --- a/app/MindWork AI Studio/Settings/DataModel/PreviewFeatures.cs +++ b/app/MindWork AI Studio/Settings/DataModel/PreviewFeatures.cs @@ -10,4 +10,5 @@ public enum PreviewFeatures PRE_RAG_2024, PRE_PLUGINS_2025, + PRE_READ_PDF_2025, } \ No newline at end of file diff --git a/app/MindWork AI Studio/Settings/DataModel/PreviewFeaturesExtensions.cs b/app/MindWork AI Studio/Settings/DataModel/PreviewFeaturesExtensions.cs index 2eb25587..dee08282 100644 --- a/app/MindWork AI Studio/Settings/DataModel/PreviewFeaturesExtensions.cs +++ b/app/MindWork AI Studio/Settings/DataModel/PreviewFeaturesExtensions.cs @@ -6,7 +6,9 @@ public static class PreviewFeaturesExtensions { PreviewFeatures.PRE_WRITER_MODE_2024 => "Writer Mode: Experiments about how to write long texts using AI", PreviewFeatures.PRE_RAG_2024 => "RAG: Preview of our RAG implementation where you can refer your files or integrate enterprise data within your company", + PreviewFeatures.PRE_PLUGINS_2025 => "Plugins: Preview of our plugin system where you can extend the functionality of the app", + PreviewFeatures.PRE_READ_PDF_2025 => "Read PDF: Preview of our PDF reading system where you can read and extract text from PDF files", _ => "Unknown preview feature" }; diff --git a/app/MindWork AI Studio/Settings/DataModel/PreviewVisibilityExtensions.cs b/app/MindWork AI Studio/Settings/DataModel/PreviewVisibilityExtensions.cs index b0f07716..6905fc0f 100644 --- a/app/MindWork AI Studio/Settings/DataModel/PreviewVisibilityExtensions.cs +++ b/app/MindWork AI Studio/Settings/DataModel/PreviewVisibilityExtensions.cs @@ -19,6 +19,7 @@ public static class PreviewVisibilityExtensions if (visibility >= PreviewVisibility.PROTOTYPE) { + features.Add(PreviewFeatures.PRE_READ_PDF_2025); features.Add(PreviewFeatures.PRE_RAG_2024); } diff --git a/app/MindWork AI Studio/Tools/Metadata/MetaDataArchitecture.cs b/app/MindWork AI Studio/Tools/Metadata/MetaDataArchitecture.cs index c5341c54..e37675d9 100644 --- a/app/MindWork AI Studio/Tools/Metadata/MetaDataArchitecture.cs +++ b/app/MindWork AI Studio/Tools/Metadata/MetaDataArchitecture.cs @@ -2,7 +2,7 @@ namespace AIStudio.Tools.Metadata; [AttributeUsage(AttributeTargets.Assembly)] -public class MetaDataArchitecture(string architecture) : Attribute +public class MetaDataArchitectureAttribute(string architecture) : Attribute { public string Architecture => architecture; } \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/Metadata/MetaDataLibrariesAttribute.cs b/app/MindWork AI Studio/Tools/Metadata/MetaDataLibrariesAttribute.cs new file mode 100644 index 00000000..d8e70240 --- /dev/null +++ b/app/MindWork AI Studio/Tools/Metadata/MetaDataLibrariesAttribute.cs @@ -0,0 +1,6 @@ +namespace AIStudio.Tools.Metadata; + +public class MetaDataLibrariesAttribute(string pdfiumVersion) : Attribute +{ + public string PdfiumVersion => pdfiumVersion; +} \ No newline at end of file diff --git a/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs b/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs new file mode 100644 index 00000000..4a201564 --- /dev/null +++ b/app/MindWork AI Studio/Tools/Services/RustService.Retrieval.cs @@ -0,0 +1,16 @@ +namespace AIStudio.Tools.Services; + +public sealed partial class RustService +{ + public async Task GetPDFText(string filePath) + { + var response = await this.http.GetAsync($"/retrieval/fs/read/pdf?file_path={filePath}"); + if (!response.IsSuccessStatusCode) + { + this.logger!.LogError($"Failed to read the PDF file due to an network error: '{response.StatusCode}'"); + return string.Empty; + } + + return await response.Content.ReadAsStringAsync(); + } +} \ No newline at end of file diff --git a/app/MindWork AI Studio/wwwroot/changelog/v0.9.42.md b/app/MindWork AI Studio/wwwroot/changelog/v0.9.42.md index 9a4783ae..d69edced 100644 --- a/app/MindWork AI Studio/wwwroot/changelog/v0.9.42.md +++ b/app/MindWork AI Studio/wwwroot/changelog/v0.9.42.md @@ -1,8 +1,9 @@ # v0.9.42, build 217 (2025-05-xx xx:xx UTC) - Added the writing style "Changelog" to the rewrite & improve text assistant. This helps to create changelogs for your projects. +- Added an option to load PDF files directly into the translation, text summarization, and legal check assistants as a preview prototype for testing before release. - Improved the model selection for OpenAI by removing all `o1-pro` models. These models cannot be used right now, since OpenAI introduced a new API, which is not yet supported by MindWork AI Studio. - Improved the internal plugin maintenance so that removed resources are now removed from the file system. - Improved the app settings to apply the chosen language immediately. - Fixed an issue where empty lines in source code were being ignored by the Markdown renderer. Thanks My Nihongo for fixing this bug in the `MudBlazor.Markdown` repository. - Fixed the localization assistant not being able to load the localization file when used in the release app. -- Upgraded .NET dependencies. \ No newline at end of file +- Upgraded Rust & .NET dependencies. \ No newline at end of file diff --git a/metadata.txt b/metadata.txt index 091aae9e..cb6b4f14 100644 --- a/metadata.txt +++ b/metadata.txt @@ -7,4 +7,5 @@ 8.5.1 1.8.1 47b6a896851, release -osx-arm64 \ No newline at end of file +osx-arm64 +137.0.7123.0 \ No newline at end of file diff --git a/runtime/Cargo.lock b/runtime/Cargo.lock index 5a820f1a..7b127a2e 100644 --- a/runtime/Cargo.lock +++ b/runtime/Cargo.lock @@ -188,6 +188,12 @@ dependencies = [ "system-deps 6.2.2", ] +[[package]] +name = "atoi_simd" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4790f9e8961209112beb783d85449b508673cf4a6a419c8449b210743ac4dbe9" + [[package]] name = "atomic" version = "0.5.3" @@ -430,15 +436,17 @@ dependencies = [ [[package]] name = "calamine" -version = "0.26.1" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "138646b9af2c5d7f1804ea4bf93afc597737d2bd4f7341d67c48b03316976eb1" +checksum = "6d80f81ba5c68206b9027e62346d49dc26fb32ffc4fe6ef7022a8ae21d348ccb" dependencies = [ + "atoi_simd", "byteorder", "codepage", "encoding_rs", + "fast-float2", "log", - "quick-xml 0.31.0", + "quick-xml 0.37.5", "serde", "zip 2.5.0", ] @@ -1064,6 +1072,12 @@ dependencies = [ "zune-inflate", ] +[[package]] +name = "fast-float2" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55" + [[package]] name = "fastrand" version = "2.1.0" @@ -3200,9 +3214,9 @@ dependencies = [ [[package]] name = "pdfium-render" -version = "0.8.30" +version = "0.8.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2773a939ec2c736640f5f2e62a325c2e1a997d694961c50f17cadfb4c8682e84" +checksum = "a473e1f4c2630f7240b127eb9efebd1d9e55053015b624bfe9dbeae0299947e1" dependencies = [ "bitflags 2.6.0", "bytemuck", @@ -3563,20 +3577,20 @@ checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" [[package]] name = "quick-xml" -version = "0.31.0" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" +checksum = "1d3a6e5838b60e0e8fa7a43f22ade549a37d61f8bdbe636d0d7816191de969c2" dependencies = [ - "encoding_rs", "memchr", ] [[package]] name = "quick-xml" -version = "0.32.0" +version = "0.37.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d3a6e5838b60e0e8fa7a43f22ade549a37d61f8bdbe636d0d7816191de969c2" +checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" dependencies = [ + "encoding_rs", "memchr", ] diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index aec7af9b..d2bf5445 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -34,8 +34,8 @@ hmac = "0.12.1" sha2 = "0.10.8" rcgen = { version = "0.13.2", features = ["pem"] } file-format = "0.26.0" -calamine = "0.26.1" -pdfium-render = "0.8.30" +calamine = "0.27.0" +pdfium-render = "0.8.31" sys-locale = "0.3.2" # Fixes security vulnerability downstream, where the upstream is not fixed yet: diff --git a/runtime/src/app_window.rs b/runtime/src/app_window.rs index a3b41510..874ed531 100644 --- a/runtime/src/app_window.rs +++ b/runtime/src/app_window.rs @@ -7,13 +7,14 @@ use rocket::serde::json::Json; use rocket::serde::Serialize; use serde::Deserialize; use tauri::updater::UpdateResponse; -use tauri::{Manager, Window}; +use tauri::{Manager, PathResolver, Window}; use tauri::api::dialog::blocking::FileDialogBuilder; use tokio::time; use crate::api_token::APIToken; use crate::dotnet::stop_dotnet_server; use crate::environment::{is_prod, CONFIG_DIRECTORY, DATA_DIRECTORY}; use crate::log::switch_to_file_logging; +use crate::metadata::META_DATA; /// The Tauri main window. static MAIN_WINDOW: Lazy>> = Lazy::new(|| Mutex::new(None)); @@ -39,6 +40,7 @@ pub fn start_tauri() { info!(Source = "Bootloader Tauri"; "Reconfigure the file logger to use the app data directory {data_path:?}"); switch_to_file_logging(data_path).map_err(|e| error!("Failed to switch logging to file: {e}")).unwrap(); + deploy_pdfium(app.path_resolver()); Ok(()) }) .plugin(tauri_plugin_window_state::Builder::default().build()) @@ -319,4 +321,50 @@ pub struct PreviousFile { pub struct FileSelectionResponse { user_cancelled: bool, selected_file_path: String, +} + +fn deploy_pdfium(path_resolver: PathResolver) { + info!(Source = "Bootloader Tauri"; "Deploy PDFium from the resources..."); + let working_directory = std::env::current_dir().unwrap(); + let pdfium_target_path = working_directory; + let metadata = &META_DATA; + let metadata = metadata.lock().unwrap(); + let arch = metadata.clone().unwrap().architecture; + let pdfium_filename = match arch.as_str() { + "linux-x64" => Some("libpdfium.so"), + "linux-arm64" => Some("libpdfium.so"), + + "win-x64" => Some("pdfium.dll"), + "win-arm64" => Some("pdfium.dll"), + + "osx-x64" => Some("libpdfium.dylib"), + "osx-arm64" => Some("libpdfium.dylib"), + + _ => None, + }; + + if pdfium_filename.is_none() { + error!(Source = "Bootloader Tauri"; "Failed to find the PDFium library for the current platform."); + return; + } + + let pdfium_filename = pdfium_filename.unwrap(); + let pdfium_relative_source_path = String::from("resources/libraries/") + pdfium_filename; + let pdfium_source_path = path_resolver.resolve_resource(pdfium_relative_source_path); + if pdfium_source_path.is_none() { + error!(Source = "Bootloader Tauri"; "Failed to find the PDFium library for the current platform."); + return; + } + + let pdfium_source_path = pdfium_source_path.unwrap(); + let pdfium_target_path = pdfium_target_path.join(pdfium_filename); + + info!(Source = "Bootloader Tauri"; "Detected platform: {arch:?}, expected PDFium filename: {pdfium_filename:?}, source path: {pdfium_source_path:?}, target path: {pdfium_target_path:?}"); + + if let Err(e) = std::fs::copy(pdfium_source_path, pdfium_target_path) { + error!(Source = "Bootloader Tauri"; "Failed to copy the PDFium library for the current platform: {e}"); + return; + } + + info!(Source = "Bootloader Tauri"; "Successfully deployed PDFium."); } \ No newline at end of file diff --git a/runtime/src/file_data.rs b/runtime/src/file_data.rs index 29bc477a..d5349f71 100644 --- a/runtime/src/file_data.rs +++ b/runtime/src/file_data.rs @@ -15,6 +15,7 @@ use rocket::response::stream::{EventStream, Event}; use rocket::tokio::select; use rocket::serde::Serialize; use rocket::get; +use crate::api_token::APIToken; #[derive(Debug, Serialize)] pub struct Chunk { @@ -39,7 +40,7 @@ type Result = std::result::Result type ChunkStream = Pin> + Send>>; #[get("/retrieval/fs/extract?")] -pub async fn extract_data(path: String, mut end: Shutdown) -> EventStream![] { +pub async fn extract_data(_token: APIToken, path: String, mut end: Shutdown) -> EventStream![] { EventStream! { let stream_result = stream_data(&path).await; match stream_result { @@ -78,7 +79,7 @@ async fn stream_data(file_path: &str) -> Result { FileFormat::from_file(&file_path_clone) }).await??; - let ext = file_path.split('.').last().unwrap_or(""); + let ext = file_path.split('.').next_back().unwrap_or(""); let stream = match ext { DOCX | ODT => { let from = if ext == DOCX { "docx" } else { "odt" }; @@ -91,7 +92,7 @@ async fn stream_data(file_path: &str) -> Result { _ => match fmt.kind() { Kind::Document => match fmt { - FileFormat::PortableDocumentFormat => read_pdf(file_path).await?, + FileFormat::PortableDocumentFormat => stream_pdf(file_path).await?, FileFormat::MicrosoftWordDocument => { convert_with_pandoc(file_path, "docx", TO_MARKDOWN).await? } @@ -133,7 +134,7 @@ async fn stream_text_file(file_path: &str) -> Result { let mut line_number = 0; let stream = stream! { - while let Ok(Some(line)) = lines.next_line().await { // Korrektur hier + while let Ok(Some(line)) = lines.next_line().await { line_number += 1; yield Ok(Chunk { content: line, @@ -145,23 +146,47 @@ async fn stream_text_file(file_path: &str) -> Result { Ok(Box::pin(stream)) } -async fn read_pdf(file_path: &str) -> Result { +#[get("/retrieval/fs/read/pdf?")] +pub fn read_pdf(_token: APIToken, file_path: String) -> String { + let pdfium = Pdfium::default(); + let doc = match pdfium.load_pdf_from_file(&file_path, None) { + Ok(document) => document, + Err(e) => return e.to_string(), + }; + + let mut pdf_content = String::new(); + for page in doc.pages().iter() { + let content = match page.text().map(|text_content| text_content.all()) { + Ok(content) => content, + Err(_) => { + continue + } + }; + + pdf_content.push_str(&content); + pdf_content.push_str("\n\n"); + } + + pdf_content +} + +async fn stream_pdf(file_path: &str) -> Result { let path = file_path.to_owned(); let (tx, rx) = mpsc::channel(10); tokio::task::spawn_blocking(move || { let pdfium = Pdfium::default(); let doc = match pdfium.load_pdf_from_file(&path, None) { - Ok(d) => d, + Ok(document) => document, Err(e) => { let _ = tx.blocking_send(Err(e.into())); return; } }; - for (i, page) in doc.pages().iter().enumerate() { + for (num_page, page) in doc.pages().iter().enumerate() { let content = match page.text().map(|t| t.all()) { - Ok(c) => c, + Ok(text_content) => text_content, Err(e) => { let _ = tx.blocking_send(Err(e.into())); continue; @@ -170,7 +195,7 @@ async fn read_pdf(file_path: &str) -> Result { if tx.blocking_send(Ok(Chunk { content, - metadata: Metadata::Pdf { page_number: i + 1 }, + metadata: Metadata::Pdf { page_number: num_page + 1 }, })).is_err() { break; } diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index a97347d3..8a2243ba 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -9,4 +9,5 @@ pub mod secret; pub mod clipboard; pub mod runtime_api; pub mod certificate; -pub mod file_data; \ No newline at end of file +pub mod file_data; +pub mod metadata; \ No newline at end of file diff --git a/runtime/src/main.rs b/runtime/src/main.rs index be7f5ea1..1aa71fcd 100644 --- a/runtime/src/main.rs +++ b/runtime/src/main.rs @@ -10,6 +10,7 @@ use mindwork_ai_studio::certificate::{generate_certificate}; use mindwork_ai_studio::dotnet::start_dotnet_server; use mindwork_ai_studio::environment::is_dev; use mindwork_ai_studio::log::init_logging; +use mindwork_ai_studio::metadata::{MetaData, META_DATA}; use mindwork_ai_studio::runtime_api::start_runtime_api; #[tokio::main] @@ -26,6 +27,22 @@ async fn main() { let tauri_version = metadata_lines.next().unwrap(); let app_commit_hash = metadata_lines.next().unwrap(); let architecture = metadata_lines.next().unwrap(); + let pdfium_version = metadata_lines.next().unwrap(); + + let metadata = MetaData { + architecture: architecture.to_string(), + app_commit_hash: app_commit_hash.to_string(), + app_version: app_version.to_string(), + build_number: build_number.to_string(), + build_time: build_time.to_string(), + dotnet_sdk_version: dotnet_sdk_version.to_string(), + dotnet_version: dotnet_version.to_string(), + mud_blazor_version: mud_blazor_version.to_string(), + rust_version: rust_version.to_string(), + tauri_version: tauri_version.to_string(), + }; + + *META_DATA.lock().unwrap() = Some(metadata); init_logging(); info!("Starting MindWork AI Studio:"); @@ -40,6 +57,7 @@ async fn main() { info!(".. Rust: v{rust_version}"); info!(".. MudBlazor: v{mud_blazor_version}"); info!(".. Tauri: v{tauri_version}"); + info!(".. PDFium: v{pdfium_version}"); if is_dev() { warn!("Running in development mode."); diff --git a/runtime/src/metadata.rs b/runtime/src/metadata.rs new file mode 100644 index 00000000..29e38adc --- /dev/null +++ b/runtime/src/metadata.rs @@ -0,0 +1,18 @@ +use std::sync::Mutex; +use once_cell::sync::Lazy; + +pub static META_DATA: Lazy>> = Lazy::new(|| Mutex::new(None)); + +#[derive(Clone)] +pub struct MetaData { + pub app_version: String, + pub build_time: String, + pub build_number: String, + pub dotnet_sdk_version: String, + pub dotnet_version: String, + pub rust_version: String, + pub mud_blazor_version: String, + pub tauri_version: String, + pub app_commit_hash: String, + pub architecture: String, +} \ No newline at end of file diff --git a/runtime/src/runtime_api.rs b/runtime/src/runtime_api.rs index bf3fa249..459fc936 100644 --- a/runtime/src/runtime_api.rs +++ b/runtime/src/runtime_api.rs @@ -79,6 +79,7 @@ pub fn start_runtime_api() { crate::environment::get_config_directory, crate::environment::read_user_language, crate::file_data::extract_data, + crate::file_data::read_pdf, crate::log::get_log_paths, ]) .ignite().await.unwrap() diff --git a/runtime/tauri.conf.json b/runtime/tauri.conf.json index 2a89fd03..5f7d46d9 100644 --- a/runtime/tauri.conf.json +++ b/runtime/tauri.conf.json @@ -29,6 +29,9 @@ "scope": [ "http://localhost" ] + }, + "fs": { + "scope": ["$RESOURCE/resources/*"] } }, "windows": [ @@ -57,6 +60,9 @@ "externalBin": [ "../app/MindWork AI Studio/bin/dist/mindworkAIStudioServer" ], + "resources": [ + "resources/*" + ], "macOS": { "exceptionDomain": "localhost" },