Paragraph Support

This commit is contained in:
Nathan McCarty 2025-02-21 03:20:46 -05:00
parent 98ed90a9bc
commit f60e209073
2 changed files with 136 additions and 60 deletions

View file

@ -31,6 +31,9 @@ data Inline : Type where
public export
data Block : Type where
Paragraph : (contents : List1 Inline) -> Block
%runElab derive "Block" [Show, Eq]
--*****************************************
--* Character Classes and String Escaping *
@ -83,9 +86,12 @@ escapedChar = do
-- Line Qualifying And Whitespace --
------------------------------------
space : PS Char
space = theseChars horizontalWhitespaceChars
spaces : PS Nat
spaces = do
xs <- many $ theseChars horizontalWhitespaceChars
xs <- many space
pure $ length xs
nonTerminal : PS Char
@ -106,7 +112,7 @@ terminal = do
line : PS (List Char)
line = do
cs <- many nonTerminal
terminal
_ <- lineEnding
pure cs
isHorizontalWhitespace : Char -> Bool
@ -119,12 +125,26 @@ blankLine = do
False => throw "nonblank line"
True => pure cs
blankLineOrEnd : PS ()
blankLineOrEnd = do
Nothing <- tryMaybe blankLine
| Just _ => pure ()
eof <- parseEoF
case eof of
False => throw "Expected newline or end of document"
True => pure ()
blankLines : PS ()
blankLines = do
xs <- many blankLine
if length xs > 0
then pure ()
else blankLineOrEnd
--*****************************************
--* Inline syntax *
--*****************************************
inlineElement : PS Inline
------------------------
-- Escaped Whitespace --
------------------------
@ -176,34 +196,52 @@ text = do
-- Overall Inline Parser --
---------------------------
inlineElement = oneOfE "" [
hardLineBreak
, nbsp
inlineElementsNoNewlines : PS Inline
inlineElementsNoNewlines = oneOfE "" [
nbsp
, escapedText
, softLineBreak
-- Text is last so that anything can superseed it
, text
]
inlineElement : PS Inline
inlineElement = oneOfE "" [
hardLineBreak
, softLineBreak
, inlineElementsNoNewlines
]
inline : PS (List1 Inline)
inline = atLeastOne "Expected Inline Content" inlineElement
--*****************************************
--* Utility Functions *
--* Block Syntax *
--*****************************************
------------------
-- Constructors --
------------------
---------------
-- Paragraph --
---------------
namespace Inline
export
fromString : String -> List (Inline)
fromString str with (asList str)
fromString "" | [] = []
fromString (strCons c str) | (c :: x) =
Text c :: fromString str | x
paragraph : PS Block
paragraph = do
inlines <- inline
blankLineOrEnd
pure $ Paragraph inlines
--------------------------
-- Overall Block Parser --
--------------------------
block : PS Block
block = do
-- eat up any blank lines
_ <- many blankLine
oneOfE "" [
paragraph
]
blocks : PS (List Block)
blocks = many block
--*****************************************
--* Unit Tests *
@ -228,6 +266,18 @@ golden input ref parser = do
putStrLn "Output: \{show x}"
pure $ x == ref
inlineFromString : String -> List (Inline)
inlineFromString str with (asList str)
inlineFromString "" | [] = []
inlineFromString (strCons c str) | (c :: x) =
Text c :: inlineFromString str | x
inlineFromString' : String -> List1 (Inline)
inlineFromString' str =
case inlineFromString str of
[] => assert_total $ idris_crash "Bad unit test fromString"
(x :: xs) => x ::: xs
-------------------------
-- Inline Syntax Tests --
-------------------------
@ -242,26 +292,47 @@ inlineTextSmoke =
inlineEscapedSmoke : IO Bool
inlineEscapedSmoke =
let input = "Hello\\n\\*World"
ref = fromString "Hello" ++ [Text '\n', Text '*'] ++ fromString "World"
ref = inlineFromString "Hello" ++ [Text '\n', Text '*'] ++ inlineFromString "World"
in golden input ref (map forget inline)
-- @@test Hard Line Break
inlineHardBreakSmoke : IO Bool
inlineHardBreakSmoke =
let input = "Hello\\\nWorld"
ref = fromString "Hello" ++ [HardLineBreak] ++ fromString "World"
ref = inlineFromString "Hello" ++ [HardLineBreak] ++ inlineFromString "World"
in golden input ref (map forget inline)
-- @@test Soft Line Break
inlineSoftBreakSmoke : IO Bool
inlineSoftBreakSmoke =
let input = "Hello\nWorld"
ref = fromString "Hello" ++ [SoftLineBreak] ++ fromString "World"
ref = inlineFromString "Hello" ++ [SoftLineBreak] ++ inlineFromString "World"
in golden input ref (map forget inline)
-- @@test Nonbreaking Space
inlineNbspSmoke : IO Bool
inlineNbspSmoke =
let input = "Hello\\ World"
ref = fromString "Hello" ++ [NonBreakingSpace] ++ fromString "World"
ref = inlineFromString "Hello" ++ [NonBreakingSpace] ++ inlineFromString "World"
in golden input ref (map forget inline)
------------------------
-- Block Syntax Tests --
------------------------
-- @@test Paragraph
blockParagraphSmoke : IO Bool
blockParagraphSmoke =
let input = "Hello World"
ref = [Paragraph (inlineFromString' "Hello World")]
in golden input ref blocks
-- @@test Two Paragraph
blockTwoParagraphSmoke : IO Bool
blockTwoParagraphSmoke =
let input = "Hello World\n\nHello Again"
ref = [
Paragraph (inlineFromString' "Hello World")
, Paragraph (inlineFromString' "Hello Again")
]
in golden input ref blocks

View file

@ -8,45 +8,50 @@ Decided to rename =Tag= to =Html=, and =Raw= to =Text=, which makes this make se
** TODO Refine =location= in =ParserLocation=
** TODO Error messages
** TODO Combinators for predictive parsing
* Djot [2/40]
* Djot [3/42]
:PROPERTIES:
:COOKIE_DATA: recursive
:END:
** Inline Syntax [2/18]
*** DONE Ordinary Text
*** TODO Link
*** TODO Image
*** TODO Autolink
*** TODO Verbatim
*** TODO Emphasis/strong
*** TODO Highlighted
*** TODO Super/subscript
*** TODO Insert/delete
*** TODO Smart punctuation
*** TODO Math
*** TODO Footnote reference
*** DONE Linebreak
*** TODO Comment
*** TODO Symbols
*** TODO Raw inline
*** TODO Span
*** TODO Inline attributes
** Block Syntax [0/15]
*** TODO Paragraph
*** TODO Heading
*** TODO Block quote
*** TODO List item
*** TODO List
*** TODO Code block
*** TODO Thematic break
*** TODO Raw block
*** TODO Div
*** TODO Pipe table
*** TODO Reference link
*** TODO definition
*** TODO Footnote
*** TODO Block attributes
*** TODO Links to headings
** Parsing
*** Inline Syntax [2/18]
**** DONE Ordinary Text
**** TODO Link
**** TODO Image
**** TODO Autolink
**** TODO Verbatim
**** TODO Emphasis/strong
**** TODO Highlighted
**** TODO Super/subscript
**** TODO Insert/delete
**** TODO Smart punctuation
**** TODO Math
**** TODO Footnote reference
**** DONE Linebreak
**** TODO Comment
**** TODO Symbols
**** TODO Raw inline
**** TODO Span
**** TODO Inline attributes
*** Block Syntax [1/16]
**** DONE Paragraph
**** Heading
***** TODO Multiline without leading count
***** TODO Basic
**** TODO Block quote
**** TODO List item
**** TODO List
**** TODO Code block
**** TODO Thematic break
**** TODO Raw block
**** TODO Div
**** TODO Pipe table
**** TODO Reference link
**** TODO definition
**** TODO Footnote
**** TODO Block attributes
**** TODO Links to headings
** TODO Rendering
** Bugs [0/0]
** TODO Predictive parsing
** TODO Support all types of whitespace
*** TODO Escaping