website/lib/Pandoc.rakumod

#| Interaction with pandoc
unit module Pandoc;

use JSON::Fast;

#| Run pandoc with the given arguments, dieing on failure
sub pandoc(*@args --> Str:D) {
    # Call into pandoc
    my $pandoc = run 'pandoc', '--no-highlight', @args, :out, :err;

    # Collect the output
    my $output = $pandoc.out.slurp: :close;
    my $stderr = $pandoc.err.slurp: :close;
    die "Pandoc exited with {$pandoc.exitcode}\nout: $output\nerr: $stderr"
    unless $pandoc;

    $output
}

#| Extract the title from a markdown document
#|
#| The title is the only top level header, will throw an error if there are
#| multiple top level headers or are none
sub markdown-title(IO::Path:D $file --> Str:D) is export {
    # Collect the output
    my $output = pandoc <-f gfm -t JSON>, $file;

    # Parse out output from pandoc, we are making an executive decision to trust
    # pandoc here, so we won't do any error handling for pandoc's output
    my %parsed = from-json $output;
    # Extract a list of top level headers from the pandoc output, this should
    # only have one element in it, but as this is user input, its untrusted and
    # we need to do some error handling
    my sub is-header($v) {
        $v ~~ Associative && $v<t> ~~ "Header"
    }
    my @headers = %parsed<blocks>.grep(&is-header).grep(*<c>[0] == 1);
    die "More than one top level header in $file" if @headers.elems > 1;
    die "No top level headers in $file" if @headers.elems == 0;

    # Extract the header and process it into a string
    my @header = @headers[0]<c>[2].flat;
    my $title = "";
    for @header -> $component {
        next unless $component ~~ Associative;
        given $component<t> {
            when "Str" {
                $title = $title ~ $component<c>;
            }
            when "Space" {
                $title = $title ~ " ";
            }
            default {
                die "Invalid component type: $_";
            }
        }
    }

    return $title;
}

#| Use pandoc to extract the first paragraph of a markdown document
sub markdown-first-paragraph(IO::Path:D $file --> Str:D) is export {
    my $output = pandoc <-f gfm -t JSON>, $file;
    my %parsed = from-json $output;
    # Extract a list of paragraphs from the pandoc output
    my sub is-para($v) {
        $v ~~ Associative && $v<t> ~~ 'Para'
    }
    my @paras = %parsed<blocks>.grep(&is-para);
    die "No paragraphs in markdown" if @paras.elems == 0;
    my @para = @paras[0][0]<c>.flat;
    # Proces it into a string
    my $para = "";
    for @para -> $component {
        next unless $component ~~ Associative;
        given $component<t> {
            when "Str" {
                $para ~= $component<c>;
            }
            when "Space" {
                $para ~= ' ';
            }
            when "Code" {
                $para ~= $component<c>[*-1];
            }
            when "SoftBreak" {
                $para ~= "\n";
            }
            when "Link" {
                # TODO: Properly descend into links
                $para ~= $component<c>[1][0]<c>;
            }
            default {
                die "Invalid component type: $_";
            }
        }
    }

    $para
}

#| Use pandoc to render a markdown document to html
sub markdown-to-html(IO::Path:D $file --> Str:D) is export {
    # Remove the header, we'll regenerate it later
    my $output = pandoc <-f gfm>, $file;
    $output ~~ s:g/'<h1' .* '</h1>'//;
    $output
}
Basic pandoc module 2025-01-20 22:22:33 -05:00			`#\| Interaction with pandoc`
			`unit module Pandoc;`

			`use JSON::Fast;`

Description generation 2025-02-04 16:51:37 -05:00			`#\| Run pandoc with the given arguments, dieing on failure`
			`sub pandoc(*@args --> Str:D) {`
Basic pandoc module 2025-01-20 22:22:33 -05:00			`# Call into pandoc`
Functional syntax highlighting for rust 2025-02-07 05:48:05 -05:00			`my $pandoc = run 'pandoc', '--no-highlight', @args, :out, :err;`
pandoc generate html method 2025-02-03 20:53:32 -05:00
			`# Collect the output`
Basic pandoc module 2025-01-20 22:22:33 -05:00			`my $output = $pandoc.out.slurp: :close;`
			`my $stderr = $pandoc.err.slurp: :close;`
pandoc generate html method 2025-02-03 20:53:32 -05:00			`die "Pandoc exited with {$pandoc.exitcode}\nout: $output\nerr: $stderr"`
			`unless $pandoc;`
Basic pandoc module 2025-01-20 22:22:33 -05:00
Description generation 2025-02-04 16:51:37 -05:00			`$output`
			`}`

			`#\| Extract the title from a markdown document`
			`#\|`
			`#\| The title is the only top level header, will throw an error if there are`
			`#\| multiple top level headers or are none`
			`sub markdown-title(IO::Path:D $file --> Str:D) is export {`
			`# Collect the output`
			`my $output = pandoc <-f gfm -t JSON>, $file;`

Basic pandoc module 2025-01-20 22:22:33 -05:00			`# Parse out output from pandoc, we are making an executive decision to trust`
			`# pandoc here, so we won't do any error handling for pandoc's output`
Working markdown post title 2025-01-21 01:31:33 -05:00			`my %parsed = from-json $output;`
Basic pandoc module 2025-01-20 22:22:33 -05:00			`# Extract a list of top level headers from the pandoc output, this should`
			`# only have one element in it, but as this is user input, its untrusted and`
			`# we need to do some error handling`
Working markdown post title 2025-01-21 01:31:33 -05:00			`my sub is-header($v) {`
			`$v ~~ Associative && $v<t> ~~ "Header"`
			`}`
			`my @headers = %parsed<blocks>.grep(&is-header).grep(*<c>[0] == 1);`
Description generation 2025-02-04 16:51:37 -05:00			`die "More than one top level header in $file" if @headers.elems > 1;`
			`die "No top level headers in $file" if @headers.elems == 0;`
Basic pandoc module 2025-01-20 22:22:33 -05:00
			`# Extract the header and process it into a string`
Working markdown post title 2025-01-21 01:31:33 -05:00			`my @header = @headers[0]<c>[2].flat;`
Basic pandoc module 2025-01-20 22:22:33 -05:00			`my $title = "";`
			`for @header -> $component {`
Working markdown post title 2025-01-21 01:31:33 -05:00			`next unless $component ~~ Associative;`
			`given $component<t> {`
Basic pandoc module 2025-01-20 22:22:33 -05:00			`when "Str" {`
Working markdown post title 2025-01-21 01:31:33 -05:00			`$title = $title ~ $component<c>;`
Basic pandoc module 2025-01-20 22:22:33 -05:00			`}`
			`when "Space" {`
			`$title = $title ~ " ";`
			`}`
			`default {`
			`die "Invalid component type: $_";`
			`}`
			`}`
			`}`

			`return $title;`
			`}`
pandoc generate html method 2025-02-03 20:53:32 -05:00
Description generation 2025-02-04 16:51:37 -05:00			`#\| Use pandoc to extract the first paragraph of a markdown document`
			`sub markdown-first-paragraph(IO::Path:D $file --> Str:D) is export {`
			`my $output = pandoc <-f gfm -t JSON>, $file;`
			`my %parsed = from-json $output;`
			`# Extract a list of paragraphs from the pandoc output`
			`my sub is-para($v) {`
			`$v ~~ Associative && $v<t> ~~ 'Para'`
			`}`
			`my @paras = %parsed<blocks>.grep(&is-para);`
			`die "No paragraphs in markdown" if @paras.elems == 0;`
			`my @para = @paras[0][0]<c>.flat;`
			`# Proces it into a string`
			`my $para = "";`
			`for @para -> $component {`
			`next unless $component ~~ Associative;`
			`given $component<t> {`
			`when "Str" {`
			`$para ~= $component<c>;`
			`}`
			`when "Space" {`
First post 2025-02-05 05:59:34 -05:00			`$para ~= ' ';`
			`}`
			`when "Code" {`
			`$para ~= $component<c>[*-1];`
			`}`
			`when "SoftBreak" {`
			`$para ~= "\n";`
			`}`
			`when "Link" {`
Advent of bugs 2025-02-07 06:50:13 -05:00			`# TODO: Properly descend into links`
First post 2025-02-05 05:59:34 -05:00			`$para ~= $component<c>[1][0]<c>;`
Description generation 2025-02-04 16:51:37 -05:00			`}`
			`default {`
			`die "Invalid component type: $_";`
			`}`
			`}`
			`}`

			`$para`
			`}`

pandoc generate html method 2025-02-03 20:53:32 -05:00			`#\| Use pandoc to render a markdown document to html`
			`sub markdown-to-html(IO::Path:D $file --> Str:D) is export {`
Seperate out title into seperate visual block 2025-02-04 22:24:37 -05:00			`# Remove the header, we'll regenerate it later`
			`my $output = pandoc <-f gfm>, $file;`
			`$output ~~ s:g/'<h1' .* '</h1>'//;`
			`$output`
pandoc generate html method 2025-02-03 20:53:32 -05:00			`}`