Parse inline text

This commit is contained in:
Nathan McCarty 2025-02-27 00:47:28 -05:00
parent bd94410c01
commit 77890ba549
4 changed files with 145 additions and 2 deletions

View file

@ -26,6 +26,8 @@ modules = SSG.HTML
, SSG.HTML.ElementTypes
, SSG.Djot
, SSG.Djot.Lines
, SSG.Djot.Block
, SSG.Djot.Inline
-- main file (i.e. file to load at REPL)
main = Main

1
src/SSG/Djot/Block.idr Normal file
View file

@ -0,0 +1 @@
module SSG.Djot.Block

139
src/SSG/Djot/Inline.idr Normal file
View file

@ -0,0 +1,139 @@
||| Djot inline formatting parser
module SSG.Djot.Inline
import Control.Eff
import Data.List.Lazy
import Data.Maybe
import Data.String
import Derive.Prelude
import Derive.Pretty
-- Just for iutils unit tests
import System
%language ElabReflection
--******************************
--* Data Structures *
--******************************
||| Types of inline styling
public export
data Inline : Type where
Text : (text : String) -> Inline
%runElab derive "Inline" [Eq, Show, Pretty]
--******************************
--* Parsing Utilities *
--******************************
||| Type alias for inline parsing
IParser : Type -> Type
IParser t = Eff [State (List Char), Choose] t
||| Get the next char, modifiying the state
popChar : IParser Char
popChar = do
x :: xs <- get
| [] => empty
put xs
pure x
||| Get the next char, without modifying the state
peekChar : IParser Char
peekChar = do
x :: xs <- get
| [] => empty
pure x
||| Attempt to parse something without propagating the failure
try : IParser t -> IParser (Maybe t)
try x = do
state <- get
x <- lift . runChoose {f = Maybe} $ x
case x of
Nothing => do
put state
pure Nothing
Just y => pure $ Just y
||| Choose a parser
oneOf : List (IParser t) -> IParser t
oneOf [] = empty
oneOf (x :: xs) = x `alt` oneOf xs
||| Parse 0+ of something
many : IParser t -> IParser (List t)
many x = do
Just y <- try x
| _ => pure []
map (y ::) $ many x
||| Run a parser
runIParser : (str : String) -> IParser t -> Maybe t
runIParser str x =
fst . extract . runState (unpack str) . runChoose {f = Maybe} $ x
--******************************
--* Syntax *
--******************************
-- Forward declare so we can get mutually recursive
||| Top level parser function for Inline Content
pInline : IParser Inline
||| Parse a character as plain text
text : IParser Inline
text = do
c <- popChar
pure $ Text (singleton c)
-- Definition of pInline
pInline = oneOf
[ text
]
--******************************
--* Post Processing *
--******************************
||| Combine adjacent Text entries
combineTexts : List Inline -> List Inline
combineTexts (Text a :: (Text b :: xs)) =
combineTexts (Text (a ++ b) :: xs)
combineTexts xs = xs
||| Top level post processor
postProcess : List Inline -> List Inline
postProcess xs = combineTexts xs
--******************************
--* Top Level Parsing Function *
--******************************
||| Parse a string as inline content
export
inline : (input : String) -> List Inline
inline input =
postProcess . fromMaybe [] . runIParser input $ many pInline
--******************************
--* Unit Tests *
--******************************
golden : (input : String) -> (reference : List Inline) -> IO Bool
golden input reference = do
putStrLn "Input: \{show input}"
let opts = Opts 78
let ref_pretty = Doc.render opts $ pretty reference
putStrLn "Reference:\n\{unlines . map (" " ++) . lines $ ref_pretty}"
let output = inline input
let out_pretty = Doc.render opts $ pretty output
putStrLn "Output:\n\{unlines . map (" " ++) . lines $ out_pretty}"
pure $ reference == output
-- @@test Just text parses as text
testTextAsText : IO Bool
testTextAsText =
golden "Hello World!" [Text "Hello World!"]

View file

@ -9,7 +9,7 @@ Decided to rename =Tag= to =Html=, and =Raw= to =Text=, which makes this make se
** TODO Refine =location= in =ParserLocation=
** TODO Error messages
** TODO Combinators for predictive parsing
* Djot [0/31]
* Djot [1/33]
:PROPERTIES:
:COOKIE_DATA: recursive
:END:
@ -30,7 +30,8 @@ Decided to rename =Tag= to =Html=, and =Raw= to =Text=, which makes this make se
**** TODO Block Attributes
**** TODO Heading Links
*** Inline
**** TODO Ordinary Text
**** DONE Ordinary Text
**** TODO Escaped Text
**** TODO Link
**** TODO Image
**** TODO Autolink