website/lib/Pandoc.rakumod

#| Interaction with pandoc
unit module Pandoc;

use JSON::Fast;

#| Run pandoc with the given arguments, dieing on failure
sub pandoc(*@args --> Str:D) {
    # Call into pandoc
    my $pandoc = run 'pandoc', '--no-highlight', @args, :out, :err;

    # Collect the output
    my $output = $pandoc.out.slurp: :close;
    my $stderr = $pandoc.err.slurp: :close;
    die "Pandoc exited with {$pandoc.exitcode}\nout: $output\nerr: $stderr"
    unless $pandoc;

    $output
}

#| Extract the title from a markdown document
#|
#| The title is the only top level header, will throw an error if there are
#| multiple top level headers or are none
sub markdown-title(IO::Path:D $file --> Str:D) is export {
    # Collect the output
    my $output = pandoc <-f gfm -t JSON>, $file;

    # Parse out output from pandoc, we are making an executive decision to trust
    # pandoc here, so we won't do any error handling for pandoc's output
    my %parsed = from-json $output;
    # Extract a list of top level headers from the pandoc output, this should
    # only have one element in it, but as this is user input, its untrusted and
    # we need to do some error handling
    my sub is-header($v) {
        $v ~~ Associative && $v<t> ~~ "Header"
    }
    my @headers = %parsed<blocks>.grep(&is-header).grep(*<c>[0] == 1);
    die "More than one top level header in $file" if @headers.elems > 1;
    die "No top level headers in $file" if @headers.elems == 0;

    # Extract the header and process it into a string
    my @header = @headers[0]<c>[2].flat;
    my $title = "";
    for @header -> $component {
        next unless $component ~~ Associative;
        given $component<t> {
            when "Str" {
                $title = $title ~ $component<c>;
            }
            when "Space" {
                $title = $title ~ " ";
            }
            default {
                die "Invalid component type: $_";
            }
        }
    }

    return $title;
}

#| Use pandoc to extract the first paragraph of a markdown document
sub markdown-first-paragraph(IO::Path:D $file --> Str:D) is export {
    my $output = pandoc <-f gfm -t JSON>, $file;
    my %parsed = from-json $output;
    # Extract a list of paragraphs from the pandoc output
    my sub is-para($v) {
        $v ~~ Associative && $v<t> ~~ 'Para'
    }
    my @paras = %parsed<blocks>.grep(&is-para);
    die "No paragraphs in markdown" if @paras.elems == 0;
    my @para = @paras[0][0]<c>.flat;
    # Proces it into a string
    my $para = "";
    for @para -> $component {
        next unless $component ~~ Associative;
        given $component<t> {
            when "Str" {
                $para ~= $component<c>;
            }
            when "Space" {
                $para ~= ' ';
            }
            when "Code" {
                $para ~= $component<c>[*-1];
            }
            when "SoftBreak" {
                $para ~= "\n";
            }
            when "Link" {
                # TODO: Properly descend into links
                $para ~= $component<c>[1][0]<c>;
            }
            default {
                die "Invalid component type: $_";
            }
        }
    }

    $para
}

#| Use pandoc to render a markdown document to html
sub markdown-to-html(IO::Path:D $file --> Str:D) is export {
    # Remove the header, we'll regenerate it later
    my $output = pandoc <-f gfm>, $file;
    $output ~~ s:g/'<h1' .* '</h1>'//;
    $output
}