Handle Multiline headings properly

This commit is contained in:
Nathan McCarty 2025-02-21 20:36:04 -05:00
parent f54df1d2a4
commit fe239e287b
7 changed files with 131 additions and 16 deletions

View file

@ -20,9 +20,9 @@ import System
%language ElabReflection
--**************
--* Data Types *
--**************
--*****************
--* Data Types *
--*****************
public export
data HeaderLevel : Type where
@ -67,9 +67,27 @@ data Block : Type where
%runElab derive "List1" [Pretty]
%runElab derive "Block" [Show, Eq, Pretty]
--**************
--* Syntax *
--**************
--*****************
--* Parsing Utils *
--*****************
-- Parse a prefixed line, stripping the prefix
prefixedLine : (pfx : PS b) -> PS String
prefixedLine pfx = do
_ <- pfx
cs <- nonBlankLine
pure $ pack cs
-- Parses a block prefixed by the given character string with the given parser
parsePrefixedBlock : (pfx : PS b) -> (parser : PS a) -> PS a
parsePrefixedBlock pfx parser = do
lines <- atLeastOne "No lines in block" $ prefixedLine pfx
let block = joinBy "\n" . forget $ lines
runPS' parser block
--*****************
--* Syntax *
--*****************
---------------
-- Paragraph --
@ -85,14 +103,42 @@ paragraph = do
-- Heading --
-------------
headingPrefix : (level : Nat) -> PS String
headingPrefix level = oneOfE "" [notPrefixP, prefixP]
where
notPrefixed : PS ()
notPrefixed = do
-- peek the first character to check and see if its a heading marker
Just [first] <- peek 1
| _ => throw "End of File"
case first of
'#' => throw "Header prefix"
_ => pure ()
notPrefixP : PS String
notPrefixP = do
notPrefixed
map pack $ many space
prefixP : PS String
prefixP = do
levelMarker <- atLeastOne "Expected Header Marker" $ thisString "#"
ws <- atLeastOne "Expected Whitespace after header marker" $ space
case length levelMarker == level of
False => throw "Mismatched levels"
True => pure $ (concat . forget $ levelMarker) ++ (pack . forget $ ws)
heading : PS Block
heading = do
-- peek the level marker, then parse as a prefixed block
state <- save
levelMarker <- atLeastOne "Expected Header Marker" $ thisString "#"
ws <- atLeastOne "Expected Whitespace after header marker" $ space
case integerToHeader (natToInteger $ length levelMarker) of
Nothing => throw "Invalid header length: \{show $ length levelMarker}"
Just level => do
inlines <- inline
load state
inlines <- parsePrefixedBlock (headingPrefix (length levelMarker)) inline
-- djot demands a blank line or the end of file after a heading
_ <- blankLineOrEnd
pure $ Heading level inlines
--------------------------
@ -113,9 +159,9 @@ export
blocks : PS (List Block)
blocks = many block
--**************
--* Unit Tests *
--**************
--*****************
--* Unit Tests *
--*****************
------------------------
-- Block Syntax Tests --
@ -155,3 +201,21 @@ blockLevelTwoHeadingSmoke =
Heading 2 (inlineFromString' "Level 2")
]
in golden input ref blocks
-- @@test Multiline prefixed headings
blockMultilinePrefixed : IO Bool
blockMultilinePrefixed =
let input = "# Level 1\n# More text"
ref = [
Heading 1 (Text "Level 1" ::: [SoftLineBreak, Text "More text"])
]
in golden input ref blocks
-- @@test Multiline unprefixed headings
blockMultilineUnprefixed : IO Bool
blockMultilineUnprefixed =
let input = "# Level 1\nMore text"
ref = [
Heading 1 (Text "Level 1" ::: [SoftLineBreak, Text "More text"])
]
in golden input ref blocks

View file

@ -108,6 +108,15 @@ blankLine = do
False => throw "nonblank line"
True => pure cs
export
nonBlankLine : PS (List Char)
nonBlankLine = do
cs <- many nonTerminal
_ <- terminal
case all isHorizontalWhitespace cs of
True => throw "blank line"
False => pure cs
export
blankLineOrEnd : PS ()
blankLineOrEnd = do

View file

@ -201,7 +201,7 @@ inlineTextSmoke =
inlineEscapedSmoke : IO Bool
inlineEscapedSmoke =
let input = "Hello\\n\\*World"
ref = inlineFromString "Hello\n*World"
ref = [Text "Hello", SoftLineBreak, Text "*World"]
in golden input ref (map forget inline)
-- @@test Hard Line Break

View file

@ -4,6 +4,7 @@ import SSG.Parser.Core
import Data.List1
import Data.Vect
import Data.String
import Control.Eff
||| Type alias for parsing errors
@ -22,6 +23,15 @@ runPS : PS a -> (str : String) -> Either String a
runPS x str =
fst . extract . runParser str . runExcept $ x
||| Run a parser, extracting the parsed value, and throwing an error if the parse was not
||| complete or if it failed
export
runPS' : PS a -> (str : String) -> PS a
runPS' x str = do
(res, rest) <- lift . runParser str . runExcept $ x
unless (null rest) $ throw "Incomplete parse"
rethrow res
||| Attempt to run a fallible parser, backtracking on failure
export
try : (f : PS a) -> (err : ParseError -> PS a) -> PS a

View file

@ -15,5 +15,13 @@
<p>Even more content</p>
<h1>Back up</h1>
<p>Some final content</p>
<h1>A Multiline Heading</h1>
<h2>A Level 2 Multiline Heading</h2>
<p>## Heading ### Not a heading</p>
<p>### Heading ## Not a heading</p>
<h1>A Multiline Heading</h1>
<h2>A Level 2 Multiline Heading</h2>
<h1>A Multiline Heading</h1>
<h2>A Level 2 Multiline Heading</h2>
</body>
</html>

View file

@ -26,3 +26,27 @@ Even more content
# Back up
Some final content
# A Multiline
# Heading
## A Level 2
## Multiline Heading
## Heading
### Not a heading
### Heading
## Not a heading
# A Multiline
Heading
## A Level 2
Multiline Heading
# A Multiline
Heading
## A Level 2
Multiline Heading

View file

@ -9,7 +9,7 @@ Decided to rename =Tag= to =Html=, and =Raw= to =Text=, which makes this make se
** TODO Refine =location= in =ParserLocation=
** TODO Error messages
** TODO Combinators for predictive parsing
* Djot [5/42]
* Djot [7/43]
:PROPERTIES:
:COOKIE_DATA: recursive
:END:
@ -33,10 +33,7 @@ Decided to rename =Tag= to =Html=, and =Raw= to =Text=, which makes this make se
**** TODO Inline attributes
**** DONE Ordinary Text
**** DONE Linebreak
*** Block Syntax [2/16]
**** Heading
***** TODO Multiline without leading count
***** DONE Basic
*** Block Syntax [4/17]
**** TODO Block quote
**** TODO List item
**** TODO List
@ -51,6 +48,9 @@ Decided to rename =Tag= to =Html=, and =Raw= to =Text=, which makes this make se
**** TODO Block attributes
**** TODO Links to headings
**** DONE Paragraph
**** DONE Heading
***** DONE Multiline without leading count
***** DONE Basic
** TODO Predictive parsing
** TODO Support all types of whitespace
*** TODO Escaping