2025-05-29 12:01:56 +00:00
using System.Diagnostics ;
using System.IO.Compression ;
2025-05-29 12:56:53 +00:00
using System.Reflection ;
2025-05-29 12:01:56 +00:00
using System.Text.RegularExpressions ;
2025-05-29 12:56:53 +00:00
using AIStudio.Tools.Metadata ;
2025-05-29 12:01:56 +00:00
using AIStudio.Tools.Services ;
2025-05-29 12:56:53 +00:00
using SharedTools ;
2025-05-29 12:01:56 +00:00
namespace AIStudio.Tools ;
public static partial class Pandoc
{
2025-05-29 12:56:53 +00:00
private static readonly Assembly ASSEMBLY = Assembly . GetExecutingAssembly ( ) ;
private static readonly MetaDataArchitectureAttribute META_DATA_ARCH = ASSEMBLY . GetCustomAttribute < MetaDataArchitectureAttribute > ( ) ! ;
private static readonly RID CPU_ARCHITECTURE = META_DATA_ARCH . Architecture . ToRID ( ) ;
2025-05-29 12:01:56 +00:00
private const string DOWNLOAD_URL = "https://github.com/jgm/pandoc/releases/download" ;
private const string LATEST_URL = "https://github.com/jgm/pandoc/releases/latest" ;
2025-05-29 13:09:25 +00:00
private static readonly ILogger LOG = Program . LOGGER_FACTORY . CreateLogger ( "Pandoc" ) ;
2025-05-29 12:55:29 +00:00
private static readonly Version MINIMUM_REQUIRED_VERSION = new ( 3 , 7 ) ;
private static readonly Version FALLBACK_VERSION = new ( 3 , 7 , 0 , 2 ) ;
2025-05-29 12:01:56 +00:00
2025-05-29 13:50:51 +00:00
/// <summary>
/// Prepares a ProcessStartInfo for running pandoc with the given parameters.
/// </summary>
/// <remarks>
/// Any local installation of pandoc will be preferred over the system-wide installation.
/// </remarks>
/// <param name="rustService">The global rust service to access file system and data dir.</param>
/// <param name="inputFile">The input file to convert.</param>
/// <param name="outputFile">The output file to write the converted content to.</param>
/// <param name="inputFormat">The format of the input file (e.g., markdown, html, etc.).</param>
/// <param name="outputFormat">The format of the output file (e.g., pdf, docx, etc.).</param>
/// <param name="additionalArgs">Additional arguments to pass to the pandoc command (optional).</param>
/// <returns>The ProcessStartInfo object configured to run pandoc with the specified parameters.</returns>
2025-05-29 14:12:20 +00:00
public static async Task < PandocPreparedProcess > PreparePandocProcess ( RustService rustService , string inputFile , string outputFile , string inputFormat , string outputFormat , string? additionalArgs = null )
2025-05-29 13:50:51 +00:00
{
2025-05-29 14:12:20 +00:00
var pandocExecutable = await PandocExecutablePath ( rustService ) ;
return new ( new ProcessStartInfo
{
FileName = pandocExecutable . Executable ,
Arguments = $"{inputFile} -f {inputFormat} -t {outputFormat} {additionalArgs ?? string.Empty} -o {outputFile}" ,
RedirectStandardOutput = true ,
RedirectStandardError = true ,
UseShellExecute = false ,
CreateNoWindow = true
} , pandocExecutable . IsLocalInstallation ) ;
}
2025-05-29 13:50:51 +00:00
2025-05-29 12:01:56 +00:00
/// <summary>
2025-05-29 12:55:57 +00:00
/// Checks if pandoc is available on the system and can be started as a process or is present in AI Studio's data dir.
2025-05-29 12:01:56 +00:00
/// </summary>
2025-05-29 12:55:57 +00:00
/// <param name="rustService">Global rust service to access file system and data dir.</param>
/// <param name="showMessages">Controls if snackbars are shown to the user.</param>
/// <returns>True, if pandoc is available and the minimum required version is met, else false.</returns>
2025-05-29 14:12:20 +00:00
public static async Task < PandocInstallation > CheckAvailabilityAsync ( RustService rustService , bool showMessages = true )
2025-05-29 12:01:56 +00:00
{
try
{
2025-05-29 14:12:20 +00:00
var preparedProcess = await PreparePandocProcess ( rustService , string . Empty , string . Empty , string . Empty , string . Empty ) ;
var startInfo = preparedProcess . StartInfo ;
2025-05-29 13:50:51 +00:00
startInfo . Arguments = "--version" ;
2025-05-29 12:01:56 +00:00
using var process = Process . Start ( startInfo ) ;
if ( process = = null )
{
if ( showMessages )
await MessageBus . INSTANCE . SendError ( new ( Icons . Material . Filled . Help , "The pandoc process could not be started." ) ) ;
2025-05-29 13:09:53 +00:00
2025-05-29 12:01:56 +00:00
LOG . LogInformation ( "The pandoc process was not started, it was null" ) ;
2025-05-29 14:12:20 +00:00
return new ( false , "Was not able to start the pandoc process." , false , string . Empty , preparedProcess . IsLocal ) ;
2025-05-29 12:01:56 +00:00
}
var output = await process . StandardOutput . ReadToEndAsync ( ) ;
await process . WaitForExitAsync ( ) ;
if ( process . ExitCode ! = 0 )
{
if ( showMessages )
2025-05-29 13:51:09 +00:00
await MessageBus . INSTANCE . SendError ( new ( Icons . Material . Filled . Error , "The pandoc process exited unexpectedly." ) ) ;
2025-05-29 13:09:53 +00:00
2025-05-29 12:01:56 +00:00
LOG . LogError ( "The pandoc process was exited with code {ProcessExitCode}" , process . ExitCode ) ;
2025-05-29 14:12:20 +00:00
return new ( false , "Pandoc is not available on the system or the process exited unexpectedly." , false , string . Empty , preparedProcess . IsLocal ) ;
2025-05-29 12:01:56 +00:00
}
var versionMatch = PandocCmdRegex ( ) . Match ( output ) ;
if ( ! versionMatch . Success )
{
if ( showMessages )
2025-05-29 13:51:09 +00:00
await MessageBus . INSTANCE . SendError ( new ( Icons . Material . Filled . Terminal , "pandoc --version returned an invalid format." ) ) ;
2025-05-29 13:09:53 +00:00
2025-05-29 12:01:56 +00:00
LOG . LogError ( "pandoc --version returned an invalid format:\n {Output}" , output ) ;
2025-05-29 14:12:20 +00:00
return new ( false , "Pandoc is not available on the system or the version could not be parsed." , false , string . Empty , preparedProcess . IsLocal ) ;
2025-05-29 12:01:56 +00:00
}
var versions = versionMatch . Groups [ 1 ] . Value ;
var installedVersion = Version . Parse ( versions ) ;
2025-05-29 14:12:20 +00:00
var installedVersionString = installedVersion . ToString ( ) ;
2025-05-29 12:01:56 +00:00
if ( installedVersion > = MINIMUM_REQUIRED_VERSION )
{
if ( showMessages )
2025-05-29 14:12:20 +00:00
await MessageBus . INSTANCE . SendSuccess ( new ( Icons . Material . Filled . CheckCircle , $"Pandoc {installedVersionString} is installed." ) ) ;
2025-05-29 13:09:53 +00:00
2025-05-29 14:12:20 +00:00
return new ( true , string . Empty , true , installedVersionString , preparedProcess . IsLocal ) ;
2025-05-29 12:01:56 +00:00
}
if ( showMessages )
2025-05-29 14:12:20 +00:00
await MessageBus . INSTANCE . SendError ( new ( Icons . Material . Filled . Build , $"Pandoc {installedVersionString} is installed, but it doesn't match the required version ({MINIMUM_REQUIRED_VERSION.ToString()})." ) ) ;
2025-05-29 13:09:53 +00:00
2025-05-29 14:12:20 +00:00
LOG . LogInformation ( "Pandoc {Installed} is installed, but it does not match the required version ({Requirement})" , installedVersionString , MINIMUM_REQUIRED_VERSION . ToString ( ) ) ;
return new ( true , $"Pandoc {installedVersionString} is installed, but it does not match the required version ({MINIMUM_REQUIRED_VERSION.ToString()})." , false , installedVersionString , preparedProcess . IsLocal ) ;
2025-05-29 12:01:56 +00:00
}
catch ( Exception e )
{
if ( showMessages )
await MessageBus . INSTANCE . SendError ( new ( @Icons . Material . Filled . AppsOutage , "Pandoc is not installed." ) ) ;
2025-05-29 13:09:53 +00:00
LOG . LogError ( "Pandoc is not installed and threw an exception: {Message}" , e . Message ) ;
2025-05-29 14:12:20 +00:00
return new ( false , "Pandoc is not installed or could not be started." , false , string . Empty , false ) ;
2025-05-29 12:01:56 +00:00
}
}
/// <summary>
/// Automatically decompresses the latest pandoc archive into AiStudio's data directory
/// </summary>
/// <param name="rustService">Global rust service to access file system and data dir</param>
/// <returns>None</returns>
public static async Task InstallAsync ( RustService rustService )
{
var installDir = await GetPandocDataFolder ( rustService ) ;
ClearFolder ( installDir ) ;
try
{
if ( ! Directory . Exists ( installDir ) )
Directory . CreateDirectory ( installDir ) ;
using var client = new HttpClient ( ) ;
2025-05-29 12:55:09 +00:00
var uri = await GenerateArchiveUriAsync ( ) ;
2025-05-29 12:01:56 +00:00
var response = await client . GetAsync ( uri ) ;
if ( ! response . IsSuccessStatusCode )
{
2025-05-29 13:27:08 +00:00
await MessageBus . INSTANCE . SendError ( new ( Icons . Material . Filled . Error , "Pandoc was not installed successfully, because the archive was not found." ) ) ;
LOG . LogError ( "Pandoc was not installed, the release archive was not found (status code {StatusCode}): url='{Uri}', message='{Message}'" , response . StatusCode , uri , response . RequestMessage ) ;
2025-05-29 12:01:56 +00:00
return ;
}
2025-05-29 13:09:53 +00:00
2025-05-29 12:01:56 +00:00
var fileBytes = await response . Content . ReadAsByteArrayAsync ( ) ;
if ( uri . Contains ( ".zip" ) )
{
var tempZipPath = Path . Join ( Path . GetTempPath ( ) , "pandoc.zip" ) ;
await File . WriteAllBytesAsync ( tempZipPath , fileBytes ) ;
ZipFile . ExtractToDirectory ( tempZipPath , installDir ) ;
File . Delete ( tempZipPath ) ;
}
else if ( uri . Contains ( ".tar.gz" ) )
{
var tempTarPath = Path . Join ( Path . GetTempPath ( ) , "pandoc.tar.gz" ) ;
await File . WriteAllBytesAsync ( tempTarPath , fileBytes ) ;
ZipFile . ExtractToDirectory ( tempTarPath , installDir ) ;
File . Delete ( tempTarPath ) ;
}
else
{
2025-05-29 13:27:08 +00:00
await MessageBus . INSTANCE . SendError ( new ( Icons . Material . Filled . Error , "Pandoc was not installed successfully, because the archive type is unknown." ) ) ;
LOG . LogError ( "Pandoc was not installed, the archive is unknown: url='{Uri}'" , uri ) ;
2025-05-29 12:01:56 +00:00
return ;
}
2025-05-29 13:27:22 +00:00
await MessageBus . INSTANCE . SendSuccess ( new ( Icons . Material . Filled . CheckCircle , $"Pandoc {await FetchLatestVersionAsync()} was installed successfully." ) ) ;
2025-05-29 12:01:56 +00:00
}
catch ( Exception ex )
{
2025-05-29 13:10:15 +00:00
LOG . LogError ( ex , "An error occurred while installing Pandoc." ) ;
2025-05-29 12:01:56 +00:00
}
}
private static void ClearFolder ( string path )
{
2025-05-29 13:01:11 +00:00
if ( ! Directory . Exists ( path ) )
return ;
2025-05-29 12:01:56 +00:00
try
{
2025-05-29 13:01:30 +00:00
Directory . Delete ( path , true ) ;
2025-05-29 12:01:56 +00:00
}
catch ( Exception ex )
{
2025-05-29 13:01:16 +00:00
LOG . LogError ( ex , "Error clearing pandoc installation directory." ) ;
2025-05-29 12:01:56 +00:00
}
}
/// <summary>
/// Asynchronously fetch the content from Pandoc's latest release page and extract the latest version number
/// </summary>
/// <remarks>Version numbers can have the following formats: x.x, x.x.x or x.x.x.x</remarks>
/// <returns>Latest Pandoc version number</returns>
public static async Task < string > FetchLatestVersionAsync ( ) {
using var client = new HttpClient ( ) ;
var response = await client . GetAsync ( LATEST_URL ) ;
if ( ! response . IsSuccessStatusCode )
{
2025-05-29 12:56:30 +00:00
LOG . LogError ( "Code {StatusCode}: Could not fetch Pandoc's latest page: {Response}" , response . StatusCode , response . RequestMessage ) ;
2025-05-29 12:01:56 +00:00
await MessageBus . INSTANCE . SendWarning ( new ( Icons . Material . Filled . Warning , $"The latest pandoc version was not found, installing version {FALLBACK_VERSION.ToString()} instead." ) ) ;
return FALLBACK_VERSION . ToString ( ) ;
}
var htmlContent = await response . Content . ReadAsStringAsync ( ) ;
var versionMatch = LatestVersionRegex ( ) . Match ( htmlContent ) ;
if ( ! versionMatch . Success )
{
2025-05-29 12:56:30 +00:00
LOG . LogError ( "The latest version regex returned nothing: {Value}" , versionMatch . Groups . ToString ( ) ) ;
2025-05-29 12:01:56 +00:00
await MessageBus . INSTANCE . SendWarning ( new ( Icons . Material . Filled . Warning , $"The latest pandoc version was not found, installing version {FALLBACK_VERSION.ToString()} instead." ) ) ;
return FALLBACK_VERSION . ToString ( ) ;
}
var version = versionMatch . Groups [ 1 ] . Value ;
return version ;
}
/// <summary>
2025-05-29 12:55:57 +00:00
/// Reads the systems architecture to find the correct archive.
2025-05-29 12:01:56 +00:00
/// </summary>
2025-05-29 12:55:09 +00:00
/// <returns>Full URI to the right archive in Pandoc's repository.</returns>
public static async Task < string > GenerateArchiveUriAsync ( )
2025-05-29 12:01:56 +00:00
{
var version = await FetchLatestVersionAsync ( ) ;
var baseUri = $"{DOWNLOAD_URL}/{version}/pandoc-{version}-" ;
return CPU_ARCHITECTURE switch
{
2025-05-29 12:56:53 +00:00
//
// Unfortunately, pandoc is not yet available for ARM64 Windows systems,
// so we have to use the x86_64 version for now. ARM Windows contains
// an x86_64 emulation layer, so it should work fine for now.
//
// Pandoc would be available for ARM64 Windows, but the Haskell compiler
// does not support ARM64 Windows yet. Here are the related issues:
//
// - Haskell compiler: https://gitlab.haskell.org/ghc/ghc/-/issues/24603
// - Haskell ARM MR: https://gitlab.haskell.org/ghc/ghc/-/merge_requests/13856
// - Pandoc ARM64: https://github.com/jgm/pandoc/issues/10095
//
RID . WIN_X64 or RID . WIN_ARM64 = > $"{baseUri}windows-x86_64.zip" ,
RID . OSX_X64 = > $"{baseUri}x86_64-macOS.zip" ,
RID . OSX_ARM64 = > $"{baseUri}arm64-macOS.zip" ,
RID . LINUX_X64 = > $"{baseUri}linux-amd64.tar.gz" ,
RID . LINUX_ARM64 = > $"{baseUri}linux-arm64.tar.gz" ,
2025-05-29 12:01:56 +00:00
_ = > string . Empty ,
} ;
}
/// <summary>
/// Reads the systems architecture to find the correct Pandoc installer
/// </summary>
/// <returns>Full URI to the right installer in Pandoc's repo</returns>
public static async Task < string > GenerateInstallerUriAsync ( )
{
var version = await FetchLatestVersionAsync ( ) ;
var baseUri = $"{DOWNLOAD_URL}/{version}/pandoc-{version}-" ;
switch ( CPU_ARCHITECTURE )
{
2025-05-29 12:56:53 +00:00
//
// Unfortunately, pandoc is not yet available for ARM64 Windows systems,
// so we have to use the x86_64 version for now. ARM Windows contains
// an x86_64 emulation layer, so it should work fine for now.
//
// Pandoc would be available for ARM64 Windows, but the Haskell compiler
// does not support ARM64 Windows yet. Here are the related issues:
//
// - Haskell compiler: https://gitlab.haskell.org/ghc/ghc/-/issues/24603
// - Haskell ARM MR: https://gitlab.haskell.org/ghc/ghc/-/merge_requests/13856
// - Pandoc ARM64: https://github.com/jgm/pandoc/issues/10095
//
case RID . WIN_X64 or RID . WIN_ARM64 :
2025-05-29 12:01:56 +00:00
return $"{baseUri}windows-x86_64.msi" ;
2025-05-29 12:56:53 +00:00
case RID . OSX_X64 :
2025-05-29 12:01:56 +00:00
return $"{baseUri}x86_64-macOS.pkg" ;
2025-05-29 12:56:53 +00:00
case RID . OSX_ARM64 :
return $"{baseUri}arm64-macOS.pkg" ;
2025-05-29 12:01:56 +00:00
default :
await MessageBus . INSTANCE . SendError ( new ( Icons . Material . Filled . Terminal , $"Installers are not available on {CPU_ARCHITECTURE} systems." ) ) ;
return string . Empty ;
}
}
/// <summary>
2025-05-29 13:04:39 +00:00
/// Reads the os platform to determine the used executable name.
2025-05-29 12:01:56 +00:00
/// </summary>
2025-05-29 13:04:39 +00:00
private static string PandocExecutableName = > CPU_ARCHITECTURE is RID . WIN_ARM64 or RID . WIN_X64 ? "pandoc.exe" : "pandoc" ;
2025-05-29 13:50:51 +00:00
/// <summary>
/// Returns the path to the pandoc executable.
/// </summary>
/// <remarks>
/// Any local installation of pandoc will be preferred over the system-wide installation.
/// When a local installation is found, its absolute path will be returned. In case no local
/// installation is found, the name of the pandoc executable will be returned.
/// </remarks>
/// <param name="rustService">Global rust service to access file system and data dir.</param>
/// <returns>Path to the pandoc executable.</returns>
2025-05-29 14:12:20 +00:00
private static async Task < PandocExecutable > PandocExecutablePath ( RustService rustService )
2025-05-29 13:50:51 +00:00
{
//
// First, we try to find the pandoc executable in the data directory.
// Any local installation should be preferred over the system-wide installation.
//
var localInstallationRootDirectory = await GetPandocDataFolder ( rustService ) ;
try
{
var executableName = PandocExecutableName ;
var subdirectories = Directory . GetDirectories ( localInstallationRootDirectory ) ;
foreach ( var subdirectory in subdirectories )
{
var pandocPath = Path . Combine ( subdirectory , executableName ) ;
if ( File . Exists ( pandocPath ) )
2025-05-29 14:12:20 +00:00
return new ( pandocPath , true ) ;
2025-05-29 13:50:51 +00:00
}
}
catch
{
// ignored
}
//
// When no local installation was found, we assume that the pandoc executable is in the system PATH.
//
2025-05-29 14:12:20 +00:00
return new ( PandocExecutableName , false ) ;
2025-05-29 13:50:51 +00:00
}
2025-05-29 12:01:56 +00:00
private static async Task < string > GetPandocDataFolder ( RustService rustService ) = > Path . Join ( await rustService . GetDataDirectory ( ) , "pandoc" ) ;
[GeneratedRegex(@"pandoc(?:\.exe)?\s*([0-9] + \ . [ 0 - 9 ] + ( ? : \ . [ 0 - 9 ] + ) ? ( ? : \ . [ 0 - 9 ] + ) ? ) ")]
private static partial Regex PandocCmdRegex ( ) ;
[GeneratedRegex(@"pandoc(?:\.exe)?\s*([0-9] + \ . [ 0 - 9 ] + ( ? : \ . [ 0 - 9 ] + ) ? ( ? : \ . [ 0 - 9 ] + ) ? ) ")]
private static partial Regex LatestVersionRegex ( ) ;
}