diff --git a/SSG.ipkg b/SSG.ipkg index 5775c67..5189c9b 100644 --- a/SSG.ipkg +++ b/SSG.ipkg @@ -16,11 +16,12 @@ authors = "Nathan McCarty" depends = structures , eff , refined + , elab-util -- modules to install modules = SSG.Parser.Core , SSG.Parser.Util - , SSG.Parser.Markdown + , SSG.Parser.Djot , SSG.HTML , SSG.HTML.ElementTypes diff --git a/src/SSG/Parser/Core.idr b/src/SSG/Parser/Core.idr index ab8cc92..9cea39d 100644 --- a/src/SSG/Parser/Core.idr +++ b/src/SSG/Parser/Core.idr @@ -4,6 +4,9 @@ import Data.Vect import Data.String import Control.Eff +import Derive.Prelude + +%language ElabReflection --------------------------- -- Internal parser state -- @@ -15,12 +18,14 @@ data ParserLocation : Type where AtPoint : (length : Int) -> (location : Int) -> ParserLocation EndOfInput : (length : Int) -> ParserLocation +%runElab derive "ParserLocation" [Show] + namespace ParserLocation export fromString : String -> ParserLocation fromString str = if strLength str > 0 - then AtPoint 0 (strLength str) + then AtPoint (strLength str) 0 else EndOfInput 0 ||| The state of a parser @@ -30,6 +35,8 @@ record ParserState where input : String location : ParserLocation +%runElab derive "ParserState" [Show] + namespace ParserState export fromString : String -> ParserState @@ -69,6 +76,15 @@ data Parser : Type -> Type where -- Detect the end of input ParseEoF : Parser Bool +Show (Parser a) where + show Save = "save" + show (Load x) = "load: \{show x}" + show (Peek n) = "peek: \{show n}]" + show (ParseChar predicate) = "ParseChar" + show (ParseExact str) = "ParseExact: \{str}" + show (SkipChars predicate) = "SkipChars" + show ParseEoF = "ParseEof" + ----------------------- -- Effectful Actions -- ----------------------- @@ -136,7 +152,7 @@ unParser s@(MkPS input (AtPoint length location)) (ParseChar predicate) = let c = assert_total $ strIndex input location in if predicate c then if location + 1 < length - then (Got c, MkPS input (AtPoint length location)) + then (Got c, MkPS input (AtPoint length (location + 1))) else (Got c, MkPS input (EndOfInput length)) else (Failed c, s) unParser s@(MkPS input (EndOfInput length)) (ParseChar predicate) = diff --git a/src/SSG/Parser/Djot.idr b/src/SSG/Parser/Djot.idr new file mode 100644 index 0000000..7d19cee --- /dev/null +++ b/src/SSG/Parser/Djot.idr @@ -0,0 +1,267 @@ +module SSG.Parser.Djot + +import SSG.Parser.Core +import SSG.Parser.Util + +import Data.List1 +import Data.Vect +import Data.Nat +import Data.String + +import Control.Eff +import Derive.Prelude + +-- For iutils unit tests +import System + +%language ElabReflection + +--***************************************** +--* Data Types * +--***************************************** + +public export +data Inline : Type where + HardLineBreak : Inline + SoftLineBreak : Inline + NonBreakingSpace : Inline + Text : (c : Char) -> Inline + +%runElab derive "Inline" [Show, Eq] + +public export +data Block : Type where + +--***************************************** +--* Character Classes and String Escaping * +--***************************************** + +----------------------- +-- Character classes -- +----------------------- + +-- Class contents + +punctuationChars : List Char +punctuationChars = + [ + '!', '"', '#', '$', '%', '&', '\'' + , '(' , ')' , '*' , '+' , ',' , '-' + , '.' , '/' , ':' , ';' , '<' , '=' + , '>' , '?' , '@' , '[' , ']' , '^' + , '_' , '`' , '{' , '|' , '}' , '~' + ] + +horizontalWhitespaceChars : List Char +horizontalWhitespaceChars = + [' ', '\t'] + +verticalWhitespaceChars : List Char +verticalWhitespaceChars = + ['\n', '\r'] + +-- Class parsers + +punctuation : PS Char +punctuation = theseChars punctuationChars + +-------------- +-- Escaping -- +-------------- + +escapedChar : PS Char +escapedChar = do + _ <- thisString "\\" + oneOfE "Expected an escapable code" + [ punctuation + , exactReplace "n" '\n' + , exactReplace "t" '\t' + , exactReplace "r" '\r' + ] + +------------------------------------ +-- Line Qualifying And Whitespace -- +------------------------------------ + +spaces : PS Nat +spaces = do + xs <- many $ theseChars horizontalWhitespaceChars + pure $ length xs + +nonTerminal : PS Char +nonTerminal = notTheseChars verticalWhitespaceChars + +lineEnding : PS String +lineEnding = theseStrings ["\r\n", "\n", "\r"] + +terminal : PS () +terminal = do + Nothing <- tryMaybe lineEnding + | _ => pure () + test <- parseEoF + case test of + False => throw "Expected line terminal" + True => pure () + +line : PS (List Char) +line = do + cs <- many nonTerminal + terminal + pure cs + +isHorizontalWhitespace : Char -> Bool +isHorizontalWhitespace c = any (== c) horizontalWhitespaceChars + +blankLine : PS (List Char) +blankLine = do + cs <- line + case all isHorizontalWhitespace cs of + False => throw "nonblank line" + True => pure cs + +--***************************************** +--* Inline syntax * +--***************************************** + +inlineElement : PS Inline + +------------------------ +-- Escaped Whitespace -- +------------------------ + +hardLineBreak : PS Inline +hardLineBreak = do + _ <- thisString "\\" + _ <- spaces + _ <- lineEnding + pure $ HardLineBreak + +nbsp : PS Inline +nbsp = do + _ <- thisString "\\ " + pure $ NonBreakingSpace + +softLineBreak : PS Inline +softLineBreak = do + _ <- lineEnding + -- Check to see if the next line is empty, if it is, we are at the end of the inline + -- content, go ahead and bail + state <- save + Nothing <- tryMaybe blankLine + | Just _ => throw "End of inline" + load state + pure $ SoftLineBreak + +--------------------- +-- Emphasis/Strong -- +--------------------- + +---------- +-- Text -- +---------- + +-- Process escape codes before processing as text +escapedText : PS Inline +escapedText = do + c <- escapedChar + pure $ Text c + +-- Any non-line-ending character can be part of regular text +text : PS Inline +text = do + c <- nonTerminal + pure $ Text c + +--------------------------- +-- Overall Inline Parser -- +--------------------------- + +inlineElement = oneOfE "" [ + hardLineBreak + , nbsp + , escapedText + , softLineBreak + -- Text is last so that anything can superseed it + , text + ] + +inline : PS (List1 Inline) +inline = atLeastOne "Expected Inline Content" inlineElement + + +--***************************************** +--* Utility Functions * +--***************************************** + +------------------ +-- Constructors -- +------------------ + +namespace Inline + export + fromString : String -> List (Inline) + fromString str with (asList str) + fromString "" | [] = [] + fromString (strCons c str) | (c :: x) = + Text c :: fromString str | x + +--***************************************** +--* Unit Tests * +--***************************************** + +------------------------------- +-- Testing Utility Functions -- +------------------------------- + +-- Test a parser against a golden value with the supplied input +golden : Show a => Eq a => + (input : String) -> (reference : a) -> (parser : PS a) -> IO Bool +golden input ref parser = do + putStrLn "Input: \{input}" + putStrLn "Expected: \{show ref}" + let got = runPS parser input + case got of + Left err => do + putStrLn "Failed with error: \{err}" + pure False + Right x => do + putStrLn "Output: \{show x}" + pure $ x == ref + +------------------------- +-- Inline Syntax Tests -- +------------------------- + +-- @@test Plain Text Hello World +inlineTextSmoke : IO Bool +inlineTextSmoke = + let input = "Hello World!" in + golden input (map Text . unpack $ input) (map forget inline) + +-- @@test Escaped Text +inlineEscapedSmoke : IO Bool +inlineEscapedSmoke = + let input = "Hello\\n\\*World" + ref = fromString "Hello" ++ [Text '\n', Text '*'] ++ fromString "World" + in golden input ref (map forget inline) + +-- @@test Hard Line Break +inlineHardBreakSmoke : IO Bool +inlineHardBreakSmoke = + let input = "Hello\\\nWorld" + ref = fromString "Hello" ++ [HardLineBreak] ++ fromString "World" + in golden input ref (map forget inline) + +-- @@test Soft Line Break +inlineSoftBreakSmoke : IO Bool +inlineSoftBreakSmoke = + let input = "Hello\nWorld" + ref = fromString "Hello" ++ [SoftLineBreak] ++ fromString "World" + in golden input ref (map forget inline) + +-- @@test Nonbreaking Space +inlineNbspSmoke : IO Bool +inlineNbspSmoke = + let input = "Hello\\ World" + ref = fromString "Hello" ++ [NonBreakingSpace] ++ fromString "World" + in golden input ref (map forget inline) diff --git a/src/SSG/Parser/Markdown.idr b/src/SSG/Parser/Markdown.idr deleted file mode 100644 index 4f4ade3..0000000 --- a/src/SSG/Parser/Markdown.idr +++ /dev/null @@ -1,10 +0,0 @@ -module SSG.Parser.Markdown - -import SSG.Parser.Core -import SSG.Parser.Util - -import Data.List1 -import Data.Vect -import Data.Nat -import Control.Eff - diff --git a/src/SSG/Parser/Util.idr b/src/SSG/Parser/Util.idr index 22aeeeb..a195236 100644 --- a/src/SSG/Parser/Util.idr +++ b/src/SSG/Parser/Util.idr @@ -16,6 +16,12 @@ public export PS : Type -> Type PS a = Eff [Parser, Except ParseError] a +||| Run a parser and extact the parsed value +export +runPS : PS a -> (str : String) -> Either String a +runPS x str = + fst . extract . runParser str . runExcept $ x + ||| Attempt to run a fallible parser, backtracking on failure export try : (f : PS a) -> (err : ParseError -> PS a) -> PS a @@ -67,6 +73,16 @@ atLeastOne err f = do tail <- many f pure $ head ::: tail +||| Attempt to parse an exact string +export +thisString : String -> PS String +thisString str = do + res <- parseExact str + case res of + Got x => pure x + Failed x => throw "Expected \{show str} Got \{show x}" + EoF => throw "Unexpected EoF" + ||| Attempt to parse one of a list of strings export theseStrings : List String -> PS String @@ -87,7 +103,9 @@ theseChars cs = do Just [next] <- peek 1 | _ => throw "Unexpected EoF" if any (== next) cs - then pure next + then do + _ <- parseChar (const True) + pure next else throw "Expected one of \{show cs}, got \{show next}" ||| Attempt to parse any char not in the list @@ -98,4 +116,23 @@ notTheseChars cs = do | _ => throw "Unexpected EoF" if any (== next) cs then throw "Expected not one of \{show cs}, got \{show next}" - else pure next + else do + _ <- parseChar (const True) + pure next + +||| Parse an exact match, but 'replace' it with the provided value +export +exactReplace : (str : String) -> (replacement : a) -> PS a +exactReplace str replacement = do + _ <- parseExact str + pure replacement + +||| Any character +export +any : PS Char +any = do + x <- parseChar (const True) + case x of + Got y => pure y + (Failed y) => throw "What?" + EoF => throw "Unexpected EoF" diff --git a/todo.org b/todo.org index 1a2d61b..0ac9266 100644 --- a/todo.org +++ b/todo.org @@ -7,3 +7,50 @@ Decided to rename =Tag= to =Html=, and =Raw= to =Text=, which makes this make se * Parser Core ** TODO Refine =location= in =ParserLocation= ** TODO Error messages +** TODO Combinators for predictive parsing +* Djot [2/40] +:PROPERTIES: +:COOKIE_DATA: recursive +:END: +** Inline Syntax [2/18] +*** DONE Ordinary Text +*** TODO Link +*** TODO Image +*** TODO Autolink +*** TODO Verbatim +*** TODO Emphasis/strong +*** TODO Highlighted +*** TODO Super/subscript +*** TODO Insert/delete +*** TODO Smart punctuation +*** TODO Math +*** TODO Footnote reference +*** DONE Linebreak +*** TODO Comment +*** TODO Symbols +*** TODO Raw inline +*** TODO Span +*** TODO Inline attributes +** Block Syntax [0/15] +*** TODO Paragraph +*** TODO Heading +*** TODO Block quote +*** TODO List item +*** TODO List +*** TODO Code block +*** TODO Thematic break +*** TODO Raw block +*** TODO Div +*** TODO Pipe table +*** TODO Reference link +*** TODO definition +*** TODO Footnote +*** TODO Block attributes +*** TODO Links to headings +** TODO Predictive parsing +** TODO Support all types of whitespace +*** TODO Escaping +*** TODO Whitespace class +** TODO hex escapes +** TODO GFM style alerts +** TODO Group adjacent =Text=s into a =String=