website/lib/Pandoc.rakumod

110 lines
3.4 KiB
Raku
Raw Permalink Normal View History

2025-01-20 22:22:33 -05:00
#| Interaction with pandoc
unit module Pandoc;
use JSON::Fast;
2025-02-04 16:51:37 -05:00
#| Run pandoc with the given arguments, dieing on failure
sub pandoc(*@args --> Str:D) {
2025-01-20 22:22:33 -05:00
# Call into pandoc
my $pandoc = run 'pandoc', '--no-highlight', @args, :out, :err;
2025-02-03 20:53:32 -05:00
# Collect the output
2025-01-20 22:22:33 -05:00
my $output = $pandoc.out.slurp: :close;
my $stderr = $pandoc.err.slurp: :close;
2025-02-03 20:53:32 -05:00
die "Pandoc exited with {$pandoc.exitcode}\nout: $output\nerr: $stderr"
unless $pandoc;
2025-01-20 22:22:33 -05:00
2025-02-04 16:51:37 -05:00
$output
}
#| Extract the title from a markdown document
#|
#| The title is the only top level header, will throw an error if there are
#| multiple top level headers or are none
sub markdown-title(IO::Path:D $file --> Str:D) is export {
# Collect the output
my $output = pandoc <-f gfm -t JSON>, $file;
2025-01-20 22:22:33 -05:00
# Parse out output from pandoc, we are making an executive decision to trust
# pandoc here, so we won't do any error handling for pandoc's output
2025-01-21 01:31:33 -05:00
my %parsed = from-json $output;
2025-01-20 22:22:33 -05:00
# Extract a list of top level headers from the pandoc output, this should
# only have one element in it, but as this is user input, its untrusted and
# we need to do some error handling
2025-01-21 01:31:33 -05:00
my sub is-header($v) {
$v ~~ Associative && $v<t> ~~ "Header"
}
my @headers = %parsed<blocks>.grep(&is-header).grep(*<c>[0] == 1);
2025-02-04 16:51:37 -05:00
die "More than one top level header in $file" if @headers.elems > 1;
die "No top level headers in $file" if @headers.elems == 0;
2025-01-20 22:22:33 -05:00
# Extract the header and process it into a string
2025-01-21 01:31:33 -05:00
my @header = @headers[0]<c>[2].flat;
2025-01-20 22:22:33 -05:00
my $title = "";
for @header -> $component {
2025-01-21 01:31:33 -05:00
next unless $component ~~ Associative;
given $component<t> {
2025-01-20 22:22:33 -05:00
when "Str" {
2025-01-21 01:31:33 -05:00
$title = $title ~ $component<c>;
2025-01-20 22:22:33 -05:00
}
when "Space" {
$title = $title ~ " ";
}
default {
die "Invalid component type: $_";
}
}
}
return $title;
}
2025-02-03 20:53:32 -05:00
2025-02-04 16:51:37 -05:00
#| Use pandoc to extract the first paragraph of a markdown document
sub markdown-first-paragraph(IO::Path:D $file --> Str:D) is export {
my $output = pandoc <-f gfm -t JSON>, $file;
my %parsed = from-json $output;
# Extract a list of paragraphs from the pandoc output
my sub is-para($v) {
$v ~~ Associative && $v<t> ~~ 'Para'
}
my @paras = %parsed<blocks>.grep(&is-para);
die "No paragraphs in markdown" if @paras.elems == 0;
my @para = @paras[0][0]<c>.flat;
# Proces it into a string
my $para = "";
for @para -> $component {
next unless $component ~~ Associative;
given $component<t> {
when "Str" {
$para ~= $component<c>;
}
when "Space" {
2025-02-05 05:59:34 -05:00
$para ~= ' ';
}
when "Code" {
$para ~= $component<c>[*-1];
}
when "SoftBreak" {
$para ~= "\n";
}
when "Link" {
2025-02-07 06:50:13 -05:00
# TODO: Properly descend into links
2025-02-05 05:59:34 -05:00
$para ~= $component<c>[1][0]<c>;
2025-02-04 16:51:37 -05:00
}
default {
die "Invalid component type: $_";
}
}
}
$para
}
2025-02-03 20:53:32 -05:00
#| Use pandoc to render a markdown document to html
sub markdown-to-html(IO::Path:D $file --> Str:D) is export {
# Remove the header, we'll regenerate it later
my $output = pandoc <-f gfm>, $file;
$output ~~ s:g/'<h1' .* '</h1>'//;
$output
2025-02-03 20:53:32 -05:00
}