Paragraph parsing

This commit is contained in:
Nathan McCarty 2025-02-27 03:37:10 -05:00
parent 626245e629
commit a09f3b60df
4 changed files with 138 additions and 6 deletions

View file

@ -19,6 +19,7 @@ depends = contrib
, eff
, elab-util
, elab-pretty
, sop
, prettier
-- modules to install

View file

@ -1 +1,130 @@
||| DJot Block formatting parser
module SSG.Djot.Block
import SSG.Djot.Inline
import SSG.Djot.Lines
import Control.Eff
import Data.Maybe
import Data.String
import Derive.Prelude
import Derive.Pretty
import Generics.Derive
import Structures.Dependent.DList
-- Just for iutils unit tests
import System
%language ElabReflection
%hide Generics.SOP.values
%hide Generics.Derive.Show
%hide Generics.Derive.Eq
--* Data Structures *
||| Types of block level element
public export
data BlockType : Type where
TParagraph : BlockType
%runElab derive "BlockType" [Generic, Show, Eq, DecEq]
||| A block element
public export
data Block : BlockType -> Type where
Paragraph : (content : List (Inline)) -> Block TParagraph
%runElab derive "Block" [Show, Eq]
--* Parsing Utilities *
||| Type alias for block parsing
BParser : Type -> Type
BParser t = Eff [Lines, Fail] t
-- FIXME: Proper whitespace handling
||| Detect if a line is blank
blankLine : String -> Bool
blankLine = null . trim
||| Select and apply the first matching parser from a list, with a fallback parser
selectParser :
{fallback_type : BlockType} -> {values : _}
-> (fallback : BParser (Block fallback_type))
-> DList BlockType (\t => (String -> Bool, BParser (Block t))) values
-> BParser (t : BlockType ** Block t)
selectParser fallback [] = do
x <- fallback
pure (_ ** x)
selectParser fallback ((pred, parser) :: rest) = do
Just line <- peek
| _ => fail
if pred line
then do
x <- parser
pure (_ ** x)
else selectParser fallback rest
||| Repeat a parser until failure or if we are out of lines
repeat : BParser t -> BParser (List t)
repeat x = do
Just _ <- peek
| _ => pure []
Just res <- lift . runFail $ x
| _ => pure []
map (res ::) (repeat x)
||| Run a parser
runBParser : (input : String) -> BParser t -> Maybe t
runBParser input x =
map fst . extract . runFail . runLines input $ x
--* Syntax *
-- Forward declare for mutual recursion
||| Top level block parser
pBlock : BParser (t : BlockType ** Block t)
paragraph : BParser (Block TParagraph)
paragraph = do
contents <- map (inline . joinBy "\n") $ slurp blankLine
pure $ Paragraph contents
-- Definition for top level block parser
pBlock = selectParser paragraph []
--******************************
--* Top Level Parsing Function *
--******************************
||| Parse a string as a sequences of blocks
export
block : (input : String) -> (values ** DList BlockType Block values)
block input =
case runBParser input (repeat pBlock) of
Nothing => (_ ** [])
Just x => fromList x
--* Unit Tests *
[myshow] Show (t : BlockType ** Block t) where
show (_ ** block) = show block
{values : _} -> Show (DList BlockType Block values) where
show xs =
let xs = map (show @{myshow}) $ toList xs
in "[\{joinBy ", " xs}]"
golden : {values : _}
-> (input : String) -> (ref : DList BlockType Block values) -> IO Bool
golden input ref = do
putStrLn "Input: \{show input}"
putStrLn "Reference:\n \{show ref}"
let (types ** output) = block input
putStrLn "Output:\n \{show output}"
pure $ ref $== output
-- @@test Paragraph Smoke
smokeParagraph : IO Bool
smokeParagraph = golden "Hello World!" [Paragraph [Text "Hello World!"]]

View file

@ -36,7 +36,9 @@ take = send Take
--************************
||| Take lines until a line matching the given predicate is encountered, dropping the
||| matching line
||| all of the matching lines until the first non matching one
|||
||| Intended to be used to slurp up to the next blank line, discarding the blanks
export
slurp : Has Lines fs => (predicate : String -> Bool) -> Eff fs (List String)
slurp predicate = do
@ -45,7 +47,7 @@ slurp predicate = do
if predicate line
then do
_ <- take
pure []
slurp predicate
else do
Just x <- take
| _ => pure []

View file

@ -9,13 +9,12 @@ Decided to rename =Tag= to =Html=, and =Raw= to =Text=, which makes this make se
** TODO Refine =location= in =ParserLocation=
** TODO Error messages
** TODO Combinators for predictive parsing
* Djot [1/38]
* Djot [2/38]
:PROPERTIES:
:COOKIE_DATA: recursive
:END:
** Parsing [1/33]
** Parsing [2/33]
*** Block Level
**** TODO Paragraph
**** TODO Heading
**** TODO Block Quote
**** TODO List Item
@ -29,8 +28,8 @@ Decided to rename =Tag= to =Html=, and =Raw= to =Text=, which makes this make se
**** TODO Footnote
**** TODO Block Attributes
**** TODO Heading Links
**** DONE Paragraph
*** Inline
**** DONE Ordinary Text
**** TODO Escaped Text
**** TODO Link
**** TODO Image
@ -48,6 +47,7 @@ Decided to rename =Tag= to =Html=, and =Raw= to =Text=, which makes this make se
**** TODO Symbols
**** TODO Raw Inline
**** TODO Inline Attributes
**** DONE Ordinary Text
*** Lines effect
**** TODO =IO= Backed implementation
*** Known Inaccuracies