Inline text parsing
This commit is contained in:
parent
2033a48c3e
commit
98ed90a9bc
6 changed files with 373 additions and 15 deletions
3
SSG.ipkg
3
SSG.ipkg
|
@ -16,11 +16,12 @@ authors = "Nathan McCarty"
|
|||
depends = structures
|
||||
, eff
|
||||
, refined
|
||||
, elab-util
|
||||
|
||||
-- modules to install
|
||||
modules = SSG.Parser.Core
|
||||
, SSG.Parser.Util
|
||||
, SSG.Parser.Markdown
|
||||
, SSG.Parser.Djot
|
||||
, SSG.HTML
|
||||
, SSG.HTML.ElementTypes
|
||||
|
||||
|
|
|
@ -4,6 +4,9 @@ import Data.Vect
|
|||
import Data.String
|
||||
|
||||
import Control.Eff
|
||||
import Derive.Prelude
|
||||
|
||||
%language ElabReflection
|
||||
|
||||
---------------------------
|
||||
-- Internal parser state --
|
||||
|
@ -15,12 +18,14 @@ data ParserLocation : Type where
|
|||
AtPoint : (length : Int) -> (location : Int) -> ParserLocation
|
||||
EndOfInput : (length : Int) -> ParserLocation
|
||||
|
||||
%runElab derive "ParserLocation" [Show]
|
||||
|
||||
namespace ParserLocation
|
||||
export
|
||||
fromString : String -> ParserLocation
|
||||
fromString str =
|
||||
if strLength str > 0
|
||||
then AtPoint 0 (strLength str)
|
||||
then AtPoint (strLength str) 0
|
||||
else EndOfInput 0
|
||||
|
||||
||| The state of a parser
|
||||
|
@ -30,6 +35,8 @@ record ParserState where
|
|||
input : String
|
||||
location : ParserLocation
|
||||
|
||||
%runElab derive "ParserState" [Show]
|
||||
|
||||
namespace ParserState
|
||||
export
|
||||
fromString : String -> ParserState
|
||||
|
@ -69,6 +76,15 @@ data Parser : Type -> Type where
|
|||
-- Detect the end of input
|
||||
ParseEoF : Parser Bool
|
||||
|
||||
Show (Parser a) where
|
||||
show Save = "save"
|
||||
show (Load x) = "load: \{show x}"
|
||||
show (Peek n) = "peek: \{show n}]"
|
||||
show (ParseChar predicate) = "ParseChar"
|
||||
show (ParseExact str) = "ParseExact: \{str}"
|
||||
show (SkipChars predicate) = "SkipChars"
|
||||
show ParseEoF = "ParseEof"
|
||||
|
||||
-----------------------
|
||||
-- Effectful Actions --
|
||||
-----------------------
|
||||
|
@ -136,7 +152,7 @@ unParser s@(MkPS input (AtPoint length location)) (ParseChar predicate) =
|
|||
let c = assert_total $ strIndex input location
|
||||
in if predicate c
|
||||
then if location + 1 < length
|
||||
then (Got c, MkPS input (AtPoint length location))
|
||||
then (Got c, MkPS input (AtPoint length (location + 1)))
|
||||
else (Got c, MkPS input (EndOfInput length))
|
||||
else (Failed c, s)
|
||||
unParser s@(MkPS input (EndOfInput length)) (ParseChar predicate) =
|
||||
|
|
267
src/SSG/Parser/Djot.idr
Normal file
267
src/SSG/Parser/Djot.idr
Normal file
|
@ -0,0 +1,267 @@
|
|||
module SSG.Parser.Djot
|
||||
|
||||
import SSG.Parser.Core
|
||||
import SSG.Parser.Util
|
||||
|
||||
import Data.List1
|
||||
import Data.Vect
|
||||
import Data.Nat
|
||||
import Data.String
|
||||
|
||||
import Control.Eff
|
||||
import Derive.Prelude
|
||||
|
||||
-- For iutils unit tests
|
||||
import System
|
||||
|
||||
%language ElabReflection
|
||||
|
||||
--*****************************************
|
||||
--* Data Types *
|
||||
--*****************************************
|
||||
|
||||
public export
|
||||
data Inline : Type where
|
||||
HardLineBreak : Inline
|
||||
SoftLineBreak : Inline
|
||||
NonBreakingSpace : Inline
|
||||
Text : (c : Char) -> Inline
|
||||
|
||||
%runElab derive "Inline" [Show, Eq]
|
||||
|
||||
public export
|
||||
data Block : Type where
|
||||
|
||||
--*****************************************
|
||||
--* Character Classes and String Escaping *
|
||||
--*****************************************
|
||||
|
||||
-----------------------
|
||||
-- Character classes --
|
||||
-----------------------
|
||||
|
||||
-- Class contents
|
||||
|
||||
punctuationChars : List Char
|
||||
punctuationChars =
|
||||
[
|
||||
'!', '"', '#', '$', '%', '&', '\''
|
||||
, '(' , ')' , '*' , '+' , ',' , '-'
|
||||
, '.' , '/' , ':' , ';' , '<' , '='
|
||||
, '>' , '?' , '@' , '[' , ']' , '^'
|
||||
, '_' , '`' , '{' , '|' , '}' , '~'
|
||||
]
|
||||
|
||||
horizontalWhitespaceChars : List Char
|
||||
horizontalWhitespaceChars =
|
||||
[' ', '\t']
|
||||
|
||||
verticalWhitespaceChars : List Char
|
||||
verticalWhitespaceChars =
|
||||
['\n', '\r']
|
||||
|
||||
-- Class parsers
|
||||
|
||||
punctuation : PS Char
|
||||
punctuation = theseChars punctuationChars
|
||||
|
||||
--------------
|
||||
-- Escaping --
|
||||
--------------
|
||||
|
||||
escapedChar : PS Char
|
||||
escapedChar = do
|
||||
_ <- thisString "\\"
|
||||
oneOfE "Expected an escapable code"
|
||||
[ punctuation
|
||||
, exactReplace "n" '\n'
|
||||
, exactReplace "t" '\t'
|
||||
, exactReplace "r" '\r'
|
||||
]
|
||||
|
||||
------------------------------------
|
||||
-- Line Qualifying And Whitespace --
|
||||
------------------------------------
|
||||
|
||||
spaces : PS Nat
|
||||
spaces = do
|
||||
xs <- many $ theseChars horizontalWhitespaceChars
|
||||
pure $ length xs
|
||||
|
||||
nonTerminal : PS Char
|
||||
nonTerminal = notTheseChars verticalWhitespaceChars
|
||||
|
||||
lineEnding : PS String
|
||||
lineEnding = theseStrings ["\r\n", "\n", "\r"]
|
||||
|
||||
terminal : PS ()
|
||||
terminal = do
|
||||
Nothing <- tryMaybe lineEnding
|
||||
| _ => pure ()
|
||||
test <- parseEoF
|
||||
case test of
|
||||
False => throw "Expected line terminal"
|
||||
True => pure ()
|
||||
|
||||
line : PS (List Char)
|
||||
line = do
|
||||
cs <- many nonTerminal
|
||||
terminal
|
||||
pure cs
|
||||
|
||||
isHorizontalWhitespace : Char -> Bool
|
||||
isHorizontalWhitespace c = any (== c) horizontalWhitespaceChars
|
||||
|
||||
blankLine : PS (List Char)
|
||||
blankLine = do
|
||||
cs <- line
|
||||
case all isHorizontalWhitespace cs of
|
||||
False => throw "nonblank line"
|
||||
True => pure cs
|
||||
|
||||
--*****************************************
|
||||
--* Inline syntax *
|
||||
--*****************************************
|
||||
|
||||
inlineElement : PS Inline
|
||||
|
||||
------------------------
|
||||
-- Escaped Whitespace --
|
||||
------------------------
|
||||
|
||||
hardLineBreak : PS Inline
|
||||
hardLineBreak = do
|
||||
_ <- thisString "\\"
|
||||
_ <- spaces
|
||||
_ <- lineEnding
|
||||
pure $ HardLineBreak
|
||||
|
||||
nbsp : PS Inline
|
||||
nbsp = do
|
||||
_ <- thisString "\\ "
|
||||
pure $ NonBreakingSpace
|
||||
|
||||
softLineBreak : PS Inline
|
||||
softLineBreak = do
|
||||
_ <- lineEnding
|
||||
-- Check to see if the next line is empty, if it is, we are at the end of the inline
|
||||
-- content, go ahead and bail
|
||||
state <- save
|
||||
Nothing <- tryMaybe blankLine
|
||||
| Just _ => throw "End of inline"
|
||||
load state
|
||||
pure $ SoftLineBreak
|
||||
|
||||
---------------------
|
||||
-- Emphasis/Strong --
|
||||
---------------------
|
||||
|
||||
----------
|
||||
-- Text --
|
||||
----------
|
||||
|
||||
-- Process escape codes before processing as text
|
||||
escapedText : PS Inline
|
||||
escapedText = do
|
||||
c <- escapedChar
|
||||
pure $ Text c
|
||||
|
||||
-- Any non-line-ending character can be part of regular text
|
||||
text : PS Inline
|
||||
text = do
|
||||
c <- nonTerminal
|
||||
pure $ Text c
|
||||
|
||||
---------------------------
|
||||
-- Overall Inline Parser --
|
||||
---------------------------
|
||||
|
||||
inlineElement = oneOfE "" [
|
||||
hardLineBreak
|
||||
, nbsp
|
||||
, escapedText
|
||||
, softLineBreak
|
||||
-- Text is last so that anything can superseed it
|
||||
, text
|
||||
]
|
||||
|
||||
inline : PS (List1 Inline)
|
||||
inline = atLeastOne "Expected Inline Content" inlineElement
|
||||
|
||||
|
||||
--*****************************************
|
||||
--* Utility Functions *
|
||||
--*****************************************
|
||||
|
||||
------------------
|
||||
-- Constructors --
|
||||
------------------
|
||||
|
||||
namespace Inline
|
||||
export
|
||||
fromString : String -> List (Inline)
|
||||
fromString str with (asList str)
|
||||
fromString "" | [] = []
|
||||
fromString (strCons c str) | (c :: x) =
|
||||
Text c :: fromString str | x
|
||||
|
||||
--*****************************************
|
||||
--* Unit Tests *
|
||||
--*****************************************
|
||||
|
||||
-------------------------------
|
||||
-- Testing Utility Functions --
|
||||
-------------------------------
|
||||
|
||||
-- Test a parser against a golden value with the supplied input
|
||||
golden : Show a => Eq a =>
|
||||
(input : String) -> (reference : a) -> (parser : PS a) -> IO Bool
|
||||
golden input ref parser = do
|
||||
putStrLn "Input: \{input}"
|
||||
putStrLn "Expected: \{show ref}"
|
||||
let got = runPS parser input
|
||||
case got of
|
||||
Left err => do
|
||||
putStrLn "Failed with error: \{err}"
|
||||
pure False
|
||||
Right x => do
|
||||
putStrLn "Output: \{show x}"
|
||||
pure $ x == ref
|
||||
|
||||
-------------------------
|
||||
-- Inline Syntax Tests --
|
||||
-------------------------
|
||||
|
||||
-- @@test Plain Text Hello World
|
||||
inlineTextSmoke : IO Bool
|
||||
inlineTextSmoke =
|
||||
let input = "Hello World!" in
|
||||
golden input (map Text . unpack $ input) (map forget inline)
|
||||
|
||||
-- @@test Escaped Text
|
||||
inlineEscapedSmoke : IO Bool
|
||||
inlineEscapedSmoke =
|
||||
let input = "Hello\\n\\*World"
|
||||
ref = fromString "Hello" ++ [Text '\n', Text '*'] ++ fromString "World"
|
||||
in golden input ref (map forget inline)
|
||||
|
||||
-- @@test Hard Line Break
|
||||
inlineHardBreakSmoke : IO Bool
|
||||
inlineHardBreakSmoke =
|
||||
let input = "Hello\\\nWorld"
|
||||
ref = fromString "Hello" ++ [HardLineBreak] ++ fromString "World"
|
||||
in golden input ref (map forget inline)
|
||||
|
||||
-- @@test Soft Line Break
|
||||
inlineSoftBreakSmoke : IO Bool
|
||||
inlineSoftBreakSmoke =
|
||||
let input = "Hello\nWorld"
|
||||
ref = fromString "Hello" ++ [SoftLineBreak] ++ fromString "World"
|
||||
in golden input ref (map forget inline)
|
||||
|
||||
-- @@test Nonbreaking Space
|
||||
inlineNbspSmoke : IO Bool
|
||||
inlineNbspSmoke =
|
||||
let input = "Hello\\ World"
|
||||
ref = fromString "Hello" ++ [NonBreakingSpace] ++ fromString "World"
|
||||
in golden input ref (map forget inline)
|
|
@ -1,10 +0,0 @@
|
|||
module SSG.Parser.Markdown
|
||||
|
||||
import SSG.Parser.Core
|
||||
import SSG.Parser.Util
|
||||
|
||||
import Data.List1
|
||||
import Data.Vect
|
||||
import Data.Nat
|
||||
import Control.Eff
|
||||
|
|
@ -16,6 +16,12 @@ public export
|
|||
PS : Type -> Type
|
||||
PS a = Eff [Parser, Except ParseError] a
|
||||
|
||||
||| Run a parser and extact the parsed value
|
||||
export
|
||||
runPS : PS a -> (str : String) -> Either String a
|
||||
runPS x str =
|
||||
fst . extract . runParser str . runExcept $ x
|
||||
|
||||
||| Attempt to run a fallible parser, backtracking on failure
|
||||
export
|
||||
try : (f : PS a) -> (err : ParseError -> PS a) -> PS a
|
||||
|
@ -67,6 +73,16 @@ atLeastOne err f = do
|
|||
tail <- many f
|
||||
pure $ head ::: tail
|
||||
|
||||
||| Attempt to parse an exact string
|
||||
export
|
||||
thisString : String -> PS String
|
||||
thisString str = do
|
||||
res <- parseExact str
|
||||
case res of
|
||||
Got x => pure x
|
||||
Failed x => throw "Expected \{show str} Got \{show x}"
|
||||
EoF => throw "Unexpected EoF"
|
||||
|
||||
||| Attempt to parse one of a list of strings
|
||||
export
|
||||
theseStrings : List String -> PS String
|
||||
|
@ -87,7 +103,9 @@ theseChars cs = do
|
|||
Just [next] <- peek 1
|
||||
| _ => throw "Unexpected EoF"
|
||||
if any (== next) cs
|
||||
then pure next
|
||||
then do
|
||||
_ <- parseChar (const True)
|
||||
pure next
|
||||
else throw "Expected one of \{show cs}, got \{show next}"
|
||||
|
||||
||| Attempt to parse any char not in the list
|
||||
|
@ -98,4 +116,23 @@ notTheseChars cs = do
|
|||
| _ => throw "Unexpected EoF"
|
||||
if any (== next) cs
|
||||
then throw "Expected not one of \{show cs}, got \{show next}"
|
||||
else pure next
|
||||
else do
|
||||
_ <- parseChar (const True)
|
||||
pure next
|
||||
|
||||
||| Parse an exact match, but 'replace' it with the provided value
|
||||
export
|
||||
exactReplace : (str : String) -> (replacement : a) -> PS a
|
||||
exactReplace str replacement = do
|
||||
_ <- parseExact str
|
||||
pure replacement
|
||||
|
||||
||| Any character
|
||||
export
|
||||
any : PS Char
|
||||
any = do
|
||||
x <- parseChar (const True)
|
||||
case x of
|
||||
Got y => pure y
|
||||
(Failed y) => throw "What?"
|
||||
EoF => throw "Unexpected EoF"
|
||||
|
|
47
todo.org
47
todo.org
|
@ -7,3 +7,50 @@ Decided to rename =Tag= to =Html=, and =Raw= to =Text=, which makes this make se
|
|||
* Parser Core
|
||||
** TODO Refine =location= in =ParserLocation=
|
||||
** TODO Error messages
|
||||
** TODO Combinators for predictive parsing
|
||||
* Djot [2/40]
|
||||
:PROPERTIES:
|
||||
:COOKIE_DATA: recursive
|
||||
:END:
|
||||
** Inline Syntax [2/18]
|
||||
*** DONE Ordinary Text
|
||||
*** TODO Link
|
||||
*** TODO Image
|
||||
*** TODO Autolink
|
||||
*** TODO Verbatim
|
||||
*** TODO Emphasis/strong
|
||||
*** TODO Highlighted
|
||||
*** TODO Super/subscript
|
||||
*** TODO Insert/delete
|
||||
*** TODO Smart punctuation
|
||||
*** TODO Math
|
||||
*** TODO Footnote reference
|
||||
*** DONE Linebreak
|
||||
*** TODO Comment
|
||||
*** TODO Symbols
|
||||
*** TODO Raw inline
|
||||
*** TODO Span
|
||||
*** TODO Inline attributes
|
||||
** Block Syntax [0/15]
|
||||
*** TODO Paragraph
|
||||
*** TODO Heading
|
||||
*** TODO Block quote
|
||||
*** TODO List item
|
||||
*** TODO List
|
||||
*** TODO Code block
|
||||
*** TODO Thematic break
|
||||
*** TODO Raw block
|
||||
*** TODO Div
|
||||
*** TODO Pipe table
|
||||
*** TODO Reference link
|
||||
*** TODO definition
|
||||
*** TODO Footnote
|
||||
*** TODO Block attributes
|
||||
*** TODO Links to headings
|
||||
** TODO Predictive parsing
|
||||
** TODO Support all types of whitespace
|
||||
*** TODO Escaping
|
||||
*** TODO Whitespace class
|
||||
** TODO hex escapes
|
||||
** TODO GFM style alerts
|
||||
** TODO Group adjacent =Text=s into a =String=
|
||||
|
|
Loading…
Add table
Reference in a new issue