338 lines
7.6 KiB
Idris
338 lines
7.6 KiB
Idris
module SSG.Parser.Djot
|
|
|
|
import SSG.Parser.Core
|
|
import SSG.Parser.Util
|
|
|
|
import Data.List1
|
|
import Data.Vect
|
|
import Data.Nat
|
|
import Data.String
|
|
|
|
import Control.Eff
|
|
import Derive.Prelude
|
|
|
|
-- For iutils unit tests
|
|
import System
|
|
|
|
%language ElabReflection
|
|
|
|
--*****************************************
|
|
--* Data Types *
|
|
--*****************************************
|
|
|
|
public export
|
|
data Inline : Type where
|
|
HardLineBreak : Inline
|
|
SoftLineBreak : Inline
|
|
NonBreakingSpace : Inline
|
|
Text : (c : Char) -> Inline
|
|
|
|
%runElab derive "Inline" [Show, Eq]
|
|
|
|
public export
|
|
data Block : Type where
|
|
Paragraph : (contents : List1 Inline) -> Block
|
|
|
|
%runElab derive "Block" [Show, Eq]
|
|
|
|
--*****************************************
|
|
--* Character Classes and String Escaping *
|
|
--*****************************************
|
|
|
|
-----------------------
|
|
-- Character classes --
|
|
-----------------------
|
|
|
|
-- Class contents
|
|
|
|
punctuationChars : List Char
|
|
punctuationChars =
|
|
[
|
|
'!', '"', '#', '$', '%', '&', '\''
|
|
, '(' , ')' , '*' , '+' , ',' , '-'
|
|
, '.' , '/' , ':' , ';' , '<' , '='
|
|
, '>' , '?' , '@' , '[' , ']' , '^'
|
|
, '_' , '`' , '{' , '|' , '}' , '~'
|
|
]
|
|
|
|
horizontalWhitespaceChars : List Char
|
|
horizontalWhitespaceChars =
|
|
[' ', '\t']
|
|
|
|
verticalWhitespaceChars : List Char
|
|
verticalWhitespaceChars =
|
|
['\n', '\r']
|
|
|
|
-- Class parsers
|
|
|
|
punctuation : PS Char
|
|
punctuation = theseChars punctuationChars
|
|
|
|
--------------
|
|
-- Escaping --
|
|
--------------
|
|
|
|
escapedChar : PS Char
|
|
escapedChar = do
|
|
_ <- thisString "\\"
|
|
oneOfE "Expected an escapable code"
|
|
[ punctuation
|
|
, exactReplace "n" '\n'
|
|
, exactReplace "t" '\t'
|
|
, exactReplace "r" '\r'
|
|
]
|
|
|
|
------------------------------------
|
|
-- Line Qualifying And Whitespace --
|
|
------------------------------------
|
|
|
|
space : PS Char
|
|
space = theseChars horizontalWhitespaceChars
|
|
|
|
spaces : PS Nat
|
|
spaces = do
|
|
xs <- many space
|
|
pure $ length xs
|
|
|
|
nonTerminal : PS Char
|
|
nonTerminal = notTheseChars verticalWhitespaceChars
|
|
|
|
lineEnding : PS String
|
|
lineEnding = theseStrings ["\r\n", "\n", "\r"]
|
|
|
|
terminal : PS ()
|
|
terminal = do
|
|
Nothing <- tryMaybe lineEnding
|
|
| _ => pure ()
|
|
test <- parseEoF
|
|
case test of
|
|
False => throw "Expected line terminal"
|
|
True => pure ()
|
|
|
|
line : PS (List Char)
|
|
line = do
|
|
cs <- many nonTerminal
|
|
_ <- lineEnding
|
|
pure cs
|
|
|
|
isHorizontalWhitespace : Char -> Bool
|
|
isHorizontalWhitespace c = any (== c) horizontalWhitespaceChars
|
|
|
|
blankLine : PS (List Char)
|
|
blankLine = do
|
|
cs <- line
|
|
case all isHorizontalWhitespace cs of
|
|
False => throw "nonblank line"
|
|
True => pure cs
|
|
|
|
blankLineOrEnd : PS ()
|
|
blankLineOrEnd = do
|
|
Nothing <- tryMaybe blankLine
|
|
| Just _ => pure ()
|
|
eof <- parseEoF
|
|
case eof of
|
|
False => throw "Expected newline or end of document"
|
|
True => pure ()
|
|
|
|
blankLines : PS ()
|
|
blankLines = do
|
|
xs <- many blankLine
|
|
if length xs > 0
|
|
then pure ()
|
|
else blankLineOrEnd
|
|
|
|
--*****************************************
|
|
--* Inline syntax *
|
|
--*****************************************
|
|
|
|
------------------------
|
|
-- Escaped Whitespace --
|
|
------------------------
|
|
|
|
hardLineBreak : PS Inline
|
|
hardLineBreak = do
|
|
_ <- thisString "\\"
|
|
_ <- spaces
|
|
_ <- lineEnding
|
|
pure $ HardLineBreak
|
|
|
|
nbsp : PS Inline
|
|
nbsp = do
|
|
_ <- thisString "\\ "
|
|
pure $ NonBreakingSpace
|
|
|
|
softLineBreak : PS Inline
|
|
softLineBreak = do
|
|
_ <- lineEnding
|
|
-- Check to see if the next line is empty, if it is, we are at the end of the inline
|
|
-- content, go ahead and bail
|
|
state <- save
|
|
Nothing <- tryMaybe blankLine
|
|
| Just _ => throw "End of inline"
|
|
load state
|
|
pure $ SoftLineBreak
|
|
|
|
---------------------
|
|
-- Emphasis/Strong --
|
|
---------------------
|
|
|
|
----------
|
|
-- Text --
|
|
----------
|
|
|
|
-- Process escape codes before processing as text
|
|
escapedText : PS Inline
|
|
escapedText = do
|
|
c <- escapedChar
|
|
pure $ Text c
|
|
|
|
-- Any non-line-ending character can be part of regular text
|
|
text : PS Inline
|
|
text = do
|
|
c <- nonTerminal
|
|
pure $ Text c
|
|
|
|
---------------------------
|
|
-- Overall Inline Parser --
|
|
---------------------------
|
|
|
|
inlineElementsNoNewlines : PS Inline
|
|
inlineElementsNoNewlines = oneOfE "" [
|
|
nbsp
|
|
, escapedText
|
|
-- Text is last so that anything can superseed it
|
|
, text
|
|
]
|
|
|
|
inlineElement : PS Inline
|
|
inlineElement = oneOfE "" [
|
|
hardLineBreak
|
|
, softLineBreak
|
|
, inlineElementsNoNewlines
|
|
]
|
|
|
|
inline : PS (List1 Inline)
|
|
inline = atLeastOne "Expected Inline Content" inlineElement
|
|
|
|
--*****************************************
|
|
--* Block Syntax *
|
|
--*****************************************
|
|
|
|
---------------
|
|
-- Paragraph --
|
|
---------------
|
|
|
|
paragraph : PS Block
|
|
paragraph = do
|
|
inlines <- inline
|
|
blankLineOrEnd
|
|
pure $ Paragraph inlines
|
|
|
|
--------------------------
|
|
-- Overall Block Parser --
|
|
--------------------------
|
|
|
|
block : PS Block
|
|
block = do
|
|
-- eat up any blank lines
|
|
_ <- many blankLine
|
|
oneOfE "" [
|
|
paragraph
|
|
]
|
|
|
|
blocks : PS (List Block)
|
|
blocks = many block
|
|
|
|
--*****************************************
|
|
--* Unit Tests *
|
|
--*****************************************
|
|
|
|
-------------------------------
|
|
-- Testing Utility Functions --
|
|
-------------------------------
|
|
|
|
-- Test a parser against a golden value with the supplied input
|
|
golden : Show a => Eq a =>
|
|
(input : String) -> (reference : a) -> (parser : PS a) -> IO Bool
|
|
golden input ref parser = do
|
|
putStrLn "Input: \{input}"
|
|
putStrLn "Expected: \{show ref}"
|
|
let got = runPS parser input
|
|
case got of
|
|
Left err => do
|
|
putStrLn "Failed with error: \{err}"
|
|
pure False
|
|
Right x => do
|
|
putStrLn "Output: \{show x}"
|
|
pure $ x == ref
|
|
|
|
inlineFromString : String -> List (Inline)
|
|
inlineFromString str with (asList str)
|
|
inlineFromString "" | [] = []
|
|
inlineFromString (strCons c str) | (c :: x) =
|
|
Text c :: inlineFromString str | x
|
|
|
|
inlineFromString' : String -> List1 (Inline)
|
|
inlineFromString' str =
|
|
case inlineFromString str of
|
|
[] => assert_total $ idris_crash "Bad unit test fromString"
|
|
(x :: xs) => x ::: xs
|
|
|
|
-------------------------
|
|
-- Inline Syntax Tests --
|
|
-------------------------
|
|
|
|
-- @@test Plain Text Hello World
|
|
inlineTextSmoke : IO Bool
|
|
inlineTextSmoke =
|
|
let input = "Hello World!" in
|
|
golden input (map Text . unpack $ input) (map forget inline)
|
|
|
|
-- @@test Escaped Text
|
|
inlineEscapedSmoke : IO Bool
|
|
inlineEscapedSmoke =
|
|
let input = "Hello\\n\\*World"
|
|
ref = inlineFromString "Hello" ++ [Text '\n', Text '*'] ++ inlineFromString "World"
|
|
in golden input ref (map forget inline)
|
|
|
|
-- @@test Hard Line Break
|
|
inlineHardBreakSmoke : IO Bool
|
|
inlineHardBreakSmoke =
|
|
let input = "Hello\\\nWorld"
|
|
ref = inlineFromString "Hello" ++ [HardLineBreak] ++ inlineFromString "World"
|
|
in golden input ref (map forget inline)
|
|
|
|
-- @@test Soft Line Break
|
|
inlineSoftBreakSmoke : IO Bool
|
|
inlineSoftBreakSmoke =
|
|
let input = "Hello\nWorld"
|
|
ref = inlineFromString "Hello" ++ [SoftLineBreak] ++ inlineFromString "World"
|
|
in golden input ref (map forget inline)
|
|
|
|
-- @@test Nonbreaking Space
|
|
inlineNbspSmoke : IO Bool
|
|
inlineNbspSmoke =
|
|
let input = "Hello\\ World"
|
|
ref = inlineFromString "Hello" ++ [NonBreakingSpace] ++ inlineFromString "World"
|
|
in golden input ref (map forget inline)
|
|
|
|
------------------------
|
|
-- Block Syntax Tests --
|
|
------------------------
|
|
|
|
-- @@test Paragraph
|
|
blockParagraphSmoke : IO Bool
|
|
blockParagraphSmoke =
|
|
let input = "Hello World"
|
|
ref = [Paragraph (inlineFromString' "Hello World")]
|
|
in golden input ref blocks
|
|
|
|
-- @@test Two Paragraph
|
|
blockTwoParagraphSmoke : IO Bool
|
|
blockTwoParagraphSmoke =
|
|
let input = "Hello World\n\nHello Again"
|
|
ref = [
|
|
Paragraph (inlineFromString' "Hello World")
|
|
, Paragraph (inlineFromString' "Hello Again")
|
|
]
|
|
in golden input ref blocks
|