From fe239e287bf6ed224ca19f4a3f10d4f862c38397 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Fri, 21 Feb 2025 20:36:04 -0500 Subject: [PATCH] Handle Multiline headings properly --- src/SSG/Djot/Block.idr | 84 ++++++++++++++++++++++--- src/SSG/Djot/Common.idr | 9 +++ src/SSG/Djot/Inline.idr | 2 +- src/SSG/Parser/Util.idr | 10 +++ test/djot-to-html/003-headings/expected | 8 +++ test/djot-to-html/003-headings/test.dj | 24 +++++++ todo.org | 10 +-- 7 files changed, 131 insertions(+), 16 deletions(-) diff --git a/src/SSG/Djot/Block.idr b/src/SSG/Djot/Block.idr index 067feb0..273f481 100644 --- a/src/SSG/Djot/Block.idr +++ b/src/SSG/Djot/Block.idr @@ -20,9 +20,9 @@ import System %language ElabReflection ---************** ---* Data Types * ---************** +--***************** +--* Data Types * +--***************** public export data HeaderLevel : Type where @@ -67,9 +67,27 @@ data Block : Type where %runElab derive "List1" [Pretty] %runElab derive "Block" [Show, Eq, Pretty] ---************** ---* Syntax * ---************** +--***************** +--* Parsing Utils * +--***************** + +-- Parse a prefixed line, stripping the prefix +prefixedLine : (pfx : PS b) -> PS String +prefixedLine pfx = do + _ <- pfx + cs <- nonBlankLine + pure $ pack cs + +-- Parses a block prefixed by the given character string with the given parser +parsePrefixedBlock : (pfx : PS b) -> (parser : PS a) -> PS a +parsePrefixedBlock pfx parser = do + lines <- atLeastOne "No lines in block" $ prefixedLine pfx + let block = joinBy "\n" . forget $ lines + runPS' parser block + +--***************** +--* Syntax * +--***************** --------------- -- Paragraph -- @@ -85,14 +103,42 @@ paragraph = do -- Heading -- ------------- +headingPrefix : (level : Nat) -> PS String +headingPrefix level = oneOfE "" [notPrefixP, prefixP] + where + notPrefixed : PS () + notPrefixed = do + -- peek the first character to check and see if its a heading marker + Just [first] <- peek 1 + | _ => throw "End of File" + case first of + '#' => throw "Header prefix" + _ => pure () + notPrefixP : PS String + notPrefixP = do + notPrefixed + map pack $ many space + prefixP : PS String + prefixP = do + levelMarker <- atLeastOne "Expected Header Marker" $ thisString "#" + ws <- atLeastOne "Expected Whitespace after header marker" $ space + case length levelMarker == level of + False => throw "Mismatched levels" + True => pure $ (concat . forget $ levelMarker) ++ (pack . forget $ ws) + heading : PS Block heading = do + -- peek the level marker, then parse as a prefixed block + state <- save levelMarker <- atLeastOne "Expected Header Marker" $ thisString "#" ws <- atLeastOne "Expected Whitespace after header marker" $ space case integerToHeader (natToInteger $ length levelMarker) of Nothing => throw "Invalid header length: \{show $ length levelMarker}" Just level => do - inlines <- inline + load state + inlines <- parsePrefixedBlock (headingPrefix (length levelMarker)) inline + -- djot demands a blank line or the end of file after a heading + _ <- blankLineOrEnd pure $ Heading level inlines -------------------------- @@ -113,9 +159,9 @@ export blocks : PS (List Block) blocks = many block ---************** ---* Unit Tests * ---************** +--***************** +--* Unit Tests * +--***************** ------------------------ -- Block Syntax Tests -- @@ -155,3 +201,21 @@ blockLevelTwoHeadingSmoke = Heading 2 (inlineFromString' "Level 2") ] in golden input ref blocks + +-- @@test Multiline prefixed headings +blockMultilinePrefixed : IO Bool +blockMultilinePrefixed = + let input = "# Level 1\n# More text" + ref = [ + Heading 1 (Text "Level 1" ::: [SoftLineBreak, Text "More text"]) + ] + in golden input ref blocks + +-- @@test Multiline unprefixed headings +blockMultilineUnprefixed : IO Bool +blockMultilineUnprefixed = + let input = "# Level 1\nMore text" + ref = [ + Heading 1 (Text "Level 1" ::: [SoftLineBreak, Text "More text"]) + ] + in golden input ref blocks diff --git a/src/SSG/Djot/Common.idr b/src/SSG/Djot/Common.idr index addd20e..8f8b71b 100644 --- a/src/SSG/Djot/Common.idr +++ b/src/SSG/Djot/Common.idr @@ -108,6 +108,15 @@ blankLine = do False => throw "nonblank line" True => pure cs +export +nonBlankLine : PS (List Char) +nonBlankLine = do + cs <- many nonTerminal + _ <- terminal + case all isHorizontalWhitespace cs of + True => throw "blank line" + False => pure cs + export blankLineOrEnd : PS () blankLineOrEnd = do diff --git a/src/SSG/Djot/Inline.idr b/src/SSG/Djot/Inline.idr index f5db77f..66a0ee9 100644 --- a/src/SSG/Djot/Inline.idr +++ b/src/SSG/Djot/Inline.idr @@ -201,7 +201,7 @@ inlineTextSmoke = inlineEscapedSmoke : IO Bool inlineEscapedSmoke = let input = "Hello\\n\\*World" - ref = inlineFromString "Hello\n*World" + ref = [Text "Hello", SoftLineBreak, Text "*World"] in golden input ref (map forget inline) -- @@test Hard Line Break diff --git a/src/SSG/Parser/Util.idr b/src/SSG/Parser/Util.idr index a195236..4777f7b 100644 --- a/src/SSG/Parser/Util.idr +++ b/src/SSG/Parser/Util.idr @@ -4,6 +4,7 @@ import SSG.Parser.Core import Data.List1 import Data.Vect +import Data.String import Control.Eff ||| Type alias for parsing errors @@ -22,6 +23,15 @@ runPS : PS a -> (str : String) -> Either String a runPS x str = fst . extract . runParser str . runExcept $ x +||| Run a parser, extracting the parsed value, and throwing an error if the parse was not +||| complete or if it failed +export +runPS' : PS a -> (str : String) -> PS a +runPS' x str = do + (res, rest) <- lift . runParser str . runExcept $ x + unless (null rest) $ throw "Incomplete parse" + rethrow res + ||| Attempt to run a fallible parser, backtracking on failure export try : (f : PS a) -> (err : ParseError -> PS a) -> PS a diff --git a/test/djot-to-html/003-headings/expected b/test/djot-to-html/003-headings/expected index f281872..91f7ee8 100644 --- a/test/djot-to-html/003-headings/expected +++ b/test/djot-to-html/003-headings/expected @@ -15,5 +15,13 @@

Even more content

Back up

Some final content

+

A Multiline Heading

+

A Level 2 Multiline Heading

+

## Heading ### Not a heading

+

### Heading ## Not a heading

+

A Multiline Heading

+

A Level 2 Multiline Heading

+

A Multiline Heading

+

A Level 2 Multiline Heading

\ No newline at end of file diff --git a/test/djot-to-html/003-headings/test.dj b/test/djot-to-html/003-headings/test.dj index e43c08b..d6eb8ec 100644 --- a/test/djot-to-html/003-headings/test.dj +++ b/test/djot-to-html/003-headings/test.dj @@ -26,3 +26,27 @@ Even more content # Back up Some final content + +# A Multiline +# Heading + +## A Level 2 +## Multiline Heading + +## Heading +### Not a heading + +### Heading +## Not a heading + +# A Multiline +Heading + +## A Level 2 +Multiline Heading + +# A Multiline + Heading + +## A Level 2 + Multiline Heading diff --git a/todo.org b/todo.org index bc61011..d263555 100644 --- a/todo.org +++ b/todo.org @@ -9,7 +9,7 @@ Decided to rename =Tag= to =Html=, and =Raw= to =Text=, which makes this make se ** TODO Refine =location= in =ParserLocation= ** TODO Error messages ** TODO Combinators for predictive parsing -* Djot [5/42] +* Djot [7/43] :PROPERTIES: :COOKIE_DATA: recursive :END: @@ -33,10 +33,7 @@ Decided to rename =Tag= to =Html=, and =Raw= to =Text=, which makes this make se **** TODO Inline attributes **** DONE Ordinary Text **** DONE Linebreak -*** Block Syntax [2/16] -**** Heading -***** TODO Multiline without leading count -***** DONE Basic +*** Block Syntax [4/17] **** TODO Block quote **** TODO List item **** TODO List @@ -51,6 +48,9 @@ Decided to rename =Tag= to =Html=, and =Raw= to =Text=, which makes this make se **** TODO Block attributes **** TODO Links to headings **** DONE Paragraph +**** DONE Heading +***** DONE Multiline without leading count +***** DONE Basic ** TODO Predictive parsing ** TODO Support all types of whitespace *** TODO Escaping