ssg/src/SSG/Parser/Djot.idr
2025-02-21 03:24:23 -05:00

338 lines
7.6 KiB
Idris

module SSG.Parser.Djot
import SSG.Parser.Core
import SSG.Parser.Util
import Data.List1
import Data.Vect
import Data.Nat
import Data.String
import Control.Eff
import Derive.Prelude
-- For iutils unit tests
import System
%language ElabReflection
--*****************************************
--* Data Types *
--*****************************************
public export
data Inline : Type where
HardLineBreak : Inline
SoftLineBreak : Inline
NonBreakingSpace : Inline
Text : (c : Char) -> Inline
%runElab derive "Inline" [Show, Eq]
public export
data Block : Type where
Paragraph : (contents : List1 Inline) -> Block
%runElab derive "Block" [Show, Eq]
--*****************************************
--* Character Classes and String Escaping *
--*****************************************
-----------------------
-- Character classes --
-----------------------
-- Class contents
punctuationChars : List Char
punctuationChars =
[
'!', '"', '#', '$', '%', '&', '\''
, '(' , ')' , '*' , '+' , ',' , '-'
, '.' , '/' , ':' , ';' , '<' , '='
, '>' , '?' , '@' , '[' , ']' , '^'
, '_' , '`' , '{' , '|' , '}' , '~'
]
horizontalWhitespaceChars : List Char
horizontalWhitespaceChars =
[' ', '\t']
verticalWhitespaceChars : List Char
verticalWhitespaceChars =
['\n', '\r']
-- Class parsers
punctuation : PS Char
punctuation = theseChars punctuationChars
--------------
-- Escaping --
--------------
escapedChar : PS Char
escapedChar = do
_ <- thisString "\\"
oneOfE "Expected an escapable code"
[ punctuation
, exactReplace "n" '\n'
, exactReplace "t" '\t'
, exactReplace "r" '\r'
]
------------------------------------
-- Line Qualifying And Whitespace --
------------------------------------
space : PS Char
space = theseChars horizontalWhitespaceChars
spaces : PS Nat
spaces = do
xs <- many space
pure $ length xs
nonTerminal : PS Char
nonTerminal = notTheseChars verticalWhitespaceChars
lineEnding : PS String
lineEnding = theseStrings ["\r\n", "\n", "\r"]
terminal : PS ()
terminal = do
Nothing <- tryMaybe lineEnding
| _ => pure ()
test <- parseEoF
case test of
False => throw "Expected line terminal"
True => pure ()
line : PS (List Char)
line = do
cs <- many nonTerminal
_ <- lineEnding
pure cs
isHorizontalWhitespace : Char -> Bool
isHorizontalWhitespace c = any (== c) horizontalWhitespaceChars
blankLine : PS (List Char)
blankLine = do
cs <- line
case all isHorizontalWhitespace cs of
False => throw "nonblank line"
True => pure cs
blankLineOrEnd : PS ()
blankLineOrEnd = do
Nothing <- tryMaybe blankLine
| Just _ => pure ()
eof <- parseEoF
case eof of
False => throw "Expected newline or end of document"
True => pure ()
blankLines : PS ()
blankLines = do
xs <- many blankLine
if length xs > 0
then pure ()
else blankLineOrEnd
--*****************************************
--* Inline syntax *
--*****************************************
------------------------
-- Escaped Whitespace --
------------------------
hardLineBreak : PS Inline
hardLineBreak = do
_ <- thisString "\\"
_ <- spaces
_ <- lineEnding
pure $ HardLineBreak
nbsp : PS Inline
nbsp = do
_ <- thisString "\\ "
pure $ NonBreakingSpace
softLineBreak : PS Inline
softLineBreak = do
_ <- lineEnding
-- Check to see if the next line is empty, if it is, we are at the end of the inline
-- content, go ahead and bail
state <- save
Nothing <- tryMaybe blankLine
| Just _ => throw "End of inline"
load state
pure $ SoftLineBreak
---------------------
-- Emphasis/Strong --
---------------------
----------
-- Text --
----------
-- Process escape codes before processing as text
escapedText : PS Inline
escapedText = do
c <- escapedChar
pure $ Text c
-- Any non-line-ending character can be part of regular text
text : PS Inline
text = do
c <- nonTerminal
pure $ Text c
---------------------------
-- Overall Inline Parser --
---------------------------
inlineElementsNoNewlines : PS Inline
inlineElementsNoNewlines = oneOfE "" [
nbsp
, escapedText
-- Text is last so that anything can superseed it
, text
]
inlineElement : PS Inline
inlineElement = oneOfE "" [
hardLineBreak
, softLineBreak
, inlineElementsNoNewlines
]
inline : PS (List1 Inline)
inline = atLeastOne "Expected Inline Content" inlineElement
--*****************************************
--* Block Syntax *
--*****************************************
---------------
-- Paragraph --
---------------
paragraph : PS Block
paragraph = do
inlines <- inline
blankLineOrEnd
pure $ Paragraph inlines
--------------------------
-- Overall Block Parser --
--------------------------
block : PS Block
block = do
-- eat up any blank lines
_ <- many blankLine
oneOfE "" [
paragraph
]
blocks : PS (List Block)
blocks = many block
--*****************************************
--* Unit Tests *
--*****************************************
-------------------------------
-- Testing Utility Functions --
-------------------------------
-- Test a parser against a golden value with the supplied input
golden : Show a => Eq a =>
(input : String) -> (reference : a) -> (parser : PS a) -> IO Bool
golden input ref parser = do
putStrLn "Input: \{input}"
putStrLn "Expected: \{show ref}"
let got = runPS parser input
case got of
Left err => do
putStrLn "Failed with error: \{err}"
pure False
Right x => do
putStrLn "Output: \{show x}"
pure $ x == ref
inlineFromString : String -> List (Inline)
inlineFromString str with (asList str)
inlineFromString "" | [] = []
inlineFromString (strCons c str) | (c :: x) =
Text c :: inlineFromString str | x
inlineFromString' : String -> List1 (Inline)
inlineFromString' str =
case inlineFromString str of
[] => assert_total $ idris_crash "Bad unit test fromString"
(x :: xs) => x ::: xs
-------------------------
-- Inline Syntax Tests --
-------------------------
-- @@test Plain Text Hello World
inlineTextSmoke : IO Bool
inlineTextSmoke =
let input = "Hello World!" in
golden input (map Text . unpack $ input) (map forget inline)
-- @@test Escaped Text
inlineEscapedSmoke : IO Bool
inlineEscapedSmoke =
let input = "Hello\\n\\*World"
ref = inlineFromString "Hello" ++ [Text '\n', Text '*'] ++ inlineFromString "World"
in golden input ref (map forget inline)
-- @@test Hard Line Break
inlineHardBreakSmoke : IO Bool
inlineHardBreakSmoke =
let input = "Hello\\\nWorld"
ref = inlineFromString "Hello" ++ [HardLineBreak] ++ inlineFromString "World"
in golden input ref (map forget inline)
-- @@test Soft Line Break
inlineSoftBreakSmoke : IO Bool
inlineSoftBreakSmoke =
let input = "Hello\nWorld"
ref = inlineFromString "Hello" ++ [SoftLineBreak] ++ inlineFromString "World"
in golden input ref (map forget inline)
-- @@test Nonbreaking Space
inlineNbspSmoke : IO Bool
inlineNbspSmoke =
let input = "Hello\\ World"
ref = inlineFromString "Hello" ++ [NonBreakingSpace] ++ inlineFromString "World"
in golden input ref (map forget inline)
------------------------
-- Block Syntax Tests --
------------------------
-- @@test Paragraph
blockParagraphSmoke : IO Bool
blockParagraphSmoke =
let input = "Hello World"
ref = [Paragraph (inlineFromString' "Hello World")]
in golden input ref blocks
-- @@test Two Paragraph
blockTwoParagraphSmoke : IO Bool
blockTwoParagraphSmoke =
let input = "Hello World\n\nHello Again"
ref = [
Paragraph (inlineFromString' "Hello World")
, Paragraph (inlineFromString' "Hello Again")
]
in golden input ref blocks