Core parser effect

This commit is contained in:
Nathan McCarty 2025-02-19 10:02:05 -05:00
parent 7f4bcf5f51
commit 045fab1b4b
3 changed files with 173 additions and 0 deletions

View file

@ -15,6 +15,7 @@ authors = "Nathan McCarty"
-- packages to add to search path
depends = structures
, eff
, refined
-- modules to install
modules = SSG.Parser.Core

View file

@ -1 +1,171 @@
module SSG.Parser.Core
import Data.Vect
import Data.String
import Control.Eff
---------------------------
-- Internal parser state --
---------------------------
||| The location of a parser within a string
public export
data ParserLocation : Type where
AtPoint : (length : Int) -> (location : Int) -> ParserLocation
EndOfInput : (length : Int) -> ParserLocation
namespace ParserLocation
export
fromString : String -> ParserLocation
fromString str =
if strLength str > 0
then AtPoint 0 (strLength str)
else EndOfInput 0
||| The state of a parser
public export
record ParserState where
constructor MkPS
input : String
location : ParserLocation
namespace ParserState
export
fromString : String -> ParserState
fromString str = MkPS str (fromString str)
export
toString : ParserState -> String
toString (MkPS input (AtPoint length location)) = strSubstr location length input
toString (MkPS input (EndOfInput length)) = ""
||| Three way result of parsing
public export
data ParseResult : Type -> Type where
Got : t -> ParseResult t
Failed : t -> ParseResult t
EoF : ParseResult t
-----------------
-- Effect Type --
-----------------
||| The parser effect
export
data Parser : Type -> Type where
-- Save and load the state
Save : Parser ParserState
Load : ParserState -> Parser ()
-- Peek the next `n` characters, if there are enough, without altering the parser state
Peek : (n : Nat) -> Parser (Maybe (Vect n Char))
-- Parse a single char, if it satisfies a predicate
ParseChar : (predicate : Char -> Bool) -> Parser (ParseResult Char)
-- Parse an exact string match
ParseExact : (str : String) -> Parser (ParseResult String)
-- Skip a run of 0 or more chars all satisfying a predicate
SkipChars : (predicate : Char -> Bool) -> Parser ()
-- Detect the end of input
ParseEoF : Parser Bool
-----------------------
-- Effectful Actions --
-----------------------
||| Aquire a copy of the current parser state
|||
||| Intended to be used for backtracking
export
save : Has Parser fs => Eff fs ParserState
save = send Save
||| Restore the parser state to a previousy persisted copy
export
load : Has Parser fs => ParserState -> Eff fs ()
load x = send $ Load x
||| Look ahead exactly `n` characters
export
peek : Has Parser fs => (n : Nat) -> Eff fs (Maybe (Vect n Char))
peek n = send $ Peek n
||| Attempt to parse a single char satisfying the given predicate, leaving the parser
||| state unchanged on failure
export
parseChar : Has Parser fs => (predicate : Char -> Bool) -> Eff fs (ParseResult Char)
parseChar predicate = send $ ParseChar predicate
||| Parse an exact match of the given string
export
parseExact : Has Parser fs => (str : String) -> Eff fs (ParseResult String)
parseExact str = send $ ParseExact str
||| Skip a run of any number of characters satisfying the given predicate
export
skipChars : Has Parser fs => (predicate : Char -> Bool) -> Eff fs ()
skipChars predicate = send $ SkipChars predicate
--------------------
-- Effect handler --
--------------------
export
unParser : ParserState -> Parser a -> (a, ParserState)
-- Save the parser state
unParser s Save =
(s, s)
-- Load a provided parser state
unParser s (Load x) =
((), x)
-- Look ahead exactly `n` characters
unParser s@(MkPS input (AtPoint length location)) (Peek n) =
let cs = unpack . strSubstr location (cast n) $ input
in (toVect n cs, s)
unParser s@(MkPS input (EndOfInput length)) (Peek n) =
(Nothing, s)
-- Attempt to parse a single character matching a given predicate, bail out without
-- affecting the parser state otherwise
unParser s@(MkPS input (AtPoint length location)) (ParseChar predicate) =
-- Saftey: We maintain location < length as a type invariant
let c = assert_total $ strIndex input location
in if predicate c
then if location + 1 < length
then (Got c, MkPS input (AtPoint length location))
else (Got c, MkPS input (EndOfInput length))
else (Failed c, s)
unParser s@(MkPS input (EndOfInput length)) (ParseChar predicate) =
(EoF, s)
-- Parse an exact string match
unParser s@(MkPS input (AtPoint length location)) (ParseExact str) =
let substr = strSubstr location (strLength str) input
in if substr == str
then if location + (strLength str) < length
then (Got substr, MkPS input (AtPoint length (location + strLength str)))
else (Got substr, MkPS input (EndOfInput length))
else (Failed substr, s)
unParser s@(MkPS input (EndOfInput length)) (ParseExact str) = (EoF, s)
-- Skip any number of chars satisfying a given predicate
unParser s@(MkPS input (AtPoint length location)) (SkipChars predicate) =
case unParser s (ParseChar predicate) of
(Got _, s) => unParser s (SkipChars predicate)
(_, s) => ((), s)
unParser s@(MkPS input (EndOfInput length)) (SkipChars predicate) =
((), s)
-- "Parse" the end of input
unParser s@(MkPS input (AtPoint length location)) ParseEoF = (False, s)
unParser s@(MkPS input (EndOfInput length)) ParseEoF = (True, s)
export
runParser' : Has Parser fs =>
(s : ParserState) -> Eff fs t -> Eff (fs - Parser) (t, ParserState)
runParser' s =
handleRelayS s (\state, t => pure (t, state)) $ \s2, ps, f =>
let (a, st) = unParser s2 ps
in f st a
export
runParser : Has Parser fs =>
(s : String) -> Eff fs t -> Eff (fs - Parser) (t, String)
runParser s x =
map (\(x,y) => (x, toString y)) $ runParser' (fromString s) x

View file

@ -4,3 +4,5 @@
Probably want to refine the attribute and value strings, and be smarter about how we choose to quote the value string.
** NO Move =Raw= out of =Tag=
Decided to rename =Tag= to =Html=, and =Raw= to =Text=, which makes this make sense
* Parser Core
** TODO Refine =location= in =ParserLocation=