module SSG.Parser.Djot import SSG.Parser.Core import SSG.Parser.Util import Data.List1 import Data.Vect import Data.Nat import Data.String import Control.Eff import Derive.Prelude -- For iutils unit tests import System %language ElabReflection --***************************************** --* Data Types * --***************************************** public export data Inline : Type where HardLineBreak : Inline SoftLineBreak : Inline NonBreakingSpace : Inline Text : (c : Char) -> Inline %runElab derive "Inline" [Show, Eq] public export data Block : Type where Paragraph : (contents : List1 Inline) -> Block %runElab derive "Block" [Show, Eq] --***************************************** --* Character Classes and String Escaping * --***************************************** ----------------------- -- Character classes -- ----------------------- -- Class contents punctuationChars : List Char punctuationChars = [ '!', '"', '#', '$', '%', '&', '\'' , '(' , ')' , '*' , '+' , ',' , '-' , '.' , '/' , ':' , ';' , '<' , '=' , '>' , '?' , '@' , '[' , ']' , '^' , '_' , '`' , '{' , '|' , '}' , '~' ] horizontalWhitespaceChars : List Char horizontalWhitespaceChars = [' ', '\t'] verticalWhitespaceChars : List Char verticalWhitespaceChars = ['\n', '\r'] -- Class parsers punctuation : PS Char punctuation = theseChars punctuationChars -------------- -- Escaping -- -------------- escapedChar : PS Char escapedChar = do _ <- thisString "\\" oneOfE "Expected an escapable code" [ punctuation , exactReplace "n" '\n' , exactReplace "t" '\t' , exactReplace "r" '\r' ] ------------------------------------ -- Line Qualifying And Whitespace -- ------------------------------------ space : PS Char space = theseChars horizontalWhitespaceChars spaces : PS Nat spaces = do xs <- many space pure $ length xs nonTerminal : PS Char nonTerminal = notTheseChars verticalWhitespaceChars lineEnding : PS String lineEnding = theseStrings ["\r\n", "\n", "\r"] terminal : PS () terminal = do Nothing <- tryMaybe lineEnding | _ => pure () test <- parseEoF case test of False => throw "Expected line terminal" True => pure () line : PS (List Char) line = do cs <- many nonTerminal _ <- lineEnding pure cs isHorizontalWhitespace : Char -> Bool isHorizontalWhitespace c = any (== c) horizontalWhitespaceChars blankLine : PS (List Char) blankLine = do cs <- line case all isHorizontalWhitespace cs of False => throw "nonblank line" True => pure cs blankLineOrEnd : PS () blankLineOrEnd = do Nothing <- tryMaybe blankLine | Just _ => pure () eof <- parseEoF case eof of False => throw "Expected newline or end of document" True => pure () blankLines : PS () blankLines = do xs <- many blankLine if length xs > 0 then pure () else blankLineOrEnd --***************************************** --* Inline syntax * --***************************************** ------------------------ -- Escaped Whitespace -- ------------------------ hardLineBreak : PS Inline hardLineBreak = do _ <- thisString "\\" _ <- spaces _ <- lineEnding pure $ HardLineBreak nbsp : PS Inline nbsp = do _ <- thisString "\\ " pure $ NonBreakingSpace softLineBreak : PS Inline softLineBreak = do _ <- lineEnding -- Check to see if the next line is empty, if it is, we are at the end of the inline -- content, go ahead and bail state <- save Nothing <- tryMaybe blankLine | Just _ => throw "End of inline" load state pure $ SoftLineBreak --------------------- -- Emphasis/Strong -- --------------------- ---------- -- Text -- ---------- -- Process escape codes before processing as text escapedText : PS Inline escapedText = do c <- escapedChar pure $ Text c -- Any non-line-ending character can be part of regular text text : PS Inline text = do c <- nonTerminal pure $ Text c --------------------------- -- Overall Inline Parser -- --------------------------- inlineElementsNoNewlines : PS Inline inlineElementsNoNewlines = oneOfE "" [ nbsp , escapedText -- Text is last so that anything can superseed it , text ] inlineElement : PS Inline inlineElement = oneOfE "" [ hardLineBreak , softLineBreak , inlineElementsNoNewlines ] inline : PS (List1 Inline) inline = atLeastOne "Expected Inline Content" inlineElement --***************************************** --* Block Syntax * --***************************************** --------------- -- Paragraph -- --------------- paragraph : PS Block paragraph = do inlines <- inline blankLineOrEnd pure $ Paragraph inlines -------------------------- -- Overall Block Parser -- -------------------------- block : PS Block block = do -- eat up any blank lines _ <- many blankLine oneOfE "" [ paragraph ] blocks : PS (List Block) blocks = many block --***************************************** --* Unit Tests * --***************************************** ------------------------------- -- Testing Utility Functions -- ------------------------------- -- Test a parser against a golden value with the supplied input golden : Show a => Eq a => (input : String) -> (reference : a) -> (parser : PS a) -> IO Bool golden input ref parser = do putStrLn "Input: \{input}" putStrLn "Expected: \{show ref}" let got = runPS parser input case got of Left err => do putStrLn "Failed with error: \{err}" pure False Right x => do putStrLn "Output: \{show x}" pure $ x == ref inlineFromString : String -> List (Inline) inlineFromString str with (asList str) inlineFromString "" | [] = [] inlineFromString (strCons c str) | (c :: x) = Text c :: inlineFromString str | x inlineFromString' : String -> List1 (Inline) inlineFromString' str = case inlineFromString str of [] => assert_total $ idris_crash "Bad unit test fromString" (x :: xs) => x ::: xs ------------------------- -- Inline Syntax Tests -- ------------------------- -- @@test Plain Text Hello World inlineTextSmoke : IO Bool inlineTextSmoke = let input = "Hello World!" in golden input (map Text . unpack $ input) (map forget inline) -- @@test Escaped Text inlineEscapedSmoke : IO Bool inlineEscapedSmoke = let input = "Hello\\n\\*World" ref = inlineFromString "Hello" ++ [Text '\n', Text '*'] ++ inlineFromString "World" in golden input ref (map forget inline) -- @@test Hard Line Break inlineHardBreakSmoke : IO Bool inlineHardBreakSmoke = let input = "Hello\\\nWorld" ref = inlineFromString "Hello" ++ [HardLineBreak] ++ inlineFromString "World" in golden input ref (map forget inline) -- @@test Soft Line Break inlineSoftBreakSmoke : IO Bool inlineSoftBreakSmoke = let input = "Hello\nWorld" ref = inlineFromString "Hello" ++ [SoftLineBreak] ++ inlineFromString "World" in golden input ref (map forget inline) -- @@test Nonbreaking Space inlineNbspSmoke : IO Bool inlineNbspSmoke = let input = "Hello\\ World" ref = inlineFromString "Hello" ++ [NonBreakingSpace] ++ inlineFromString "World" in golden input ref (map forget inline) ------------------------ -- Block Syntax Tests -- ------------------------ -- @@test Paragraph blockParagraphSmoke : IO Bool blockParagraphSmoke = let input = "Hello World" ref = [Paragraph (inlineFromString' "Hello World")] in golden input ref blocks -- @@test Two Paragraph blockTwoParagraphSmoke : IO Bool blockTwoParagraphSmoke = let input = "Hello World\n\nHello Again" ref = [ Paragraph (inlineFromString' "Hello World") , Paragraph (inlineFromString' "Hello Again") ] in golden input ref blocks