From 045fab1b4bb1cba0d84c7eb744ab7a2ddcc5c185 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Wed, 19 Feb 2025 10:02:05 -0500 Subject: [PATCH] Core parser effect --- SSG.ipkg | 1 + src/SSG/Parser/Core.idr | 170 ++++++++++++++++++++++++++++++++++++++++ todo.org | 2 + 3 files changed, 173 insertions(+) diff --git a/SSG.ipkg b/SSG.ipkg index 0033da9..dadbfdb 100644 --- a/SSG.ipkg +++ b/SSG.ipkg @@ -15,6 +15,7 @@ authors = "Nathan McCarty" -- packages to add to search path depends = structures , eff + , refined -- modules to install modules = SSG.Parser.Core diff --git a/src/SSG/Parser/Core.idr b/src/SSG/Parser/Core.idr index e070a95..17c7725 100644 --- a/src/SSG/Parser/Core.idr +++ b/src/SSG/Parser/Core.idr @@ -1 +1,171 @@ module SSG.Parser.Core + +import Data.Vect +import Data.String + +import Control.Eff + +--------------------------- +-- Internal parser state -- +--------------------------- + +||| The location of a parser within a string +public export +data ParserLocation : Type where + AtPoint : (length : Int) -> (location : Int) -> ParserLocation + EndOfInput : (length : Int) -> ParserLocation + +namespace ParserLocation + export + fromString : String -> ParserLocation + fromString str = + if strLength str > 0 + then AtPoint 0 (strLength str) + else EndOfInput 0 + +||| The state of a parser +public export +record ParserState where + constructor MkPS + input : String + location : ParserLocation + +namespace ParserState + export + fromString : String -> ParserState + fromString str = MkPS str (fromString str) + + export + toString : ParserState -> String + toString (MkPS input (AtPoint length location)) = strSubstr location length input + toString (MkPS input (EndOfInput length)) = "" + + +||| Three way result of parsing +public export +data ParseResult : Type -> Type where + Got : t -> ParseResult t + Failed : t -> ParseResult t + EoF : ParseResult t + +----------------- +-- Effect Type -- +----------------- + +||| The parser effect +export +data Parser : Type -> Type where + -- Save and load the state + Save : Parser ParserState + Load : ParserState -> Parser () + -- Peek the next `n` characters, if there are enough, without altering the parser state + Peek : (n : Nat) -> Parser (Maybe (Vect n Char)) + -- Parse a single char, if it satisfies a predicate + ParseChar : (predicate : Char -> Bool) -> Parser (ParseResult Char) + -- Parse an exact string match + ParseExact : (str : String) -> Parser (ParseResult String) + -- Skip a run of 0 or more chars all satisfying a predicate + SkipChars : (predicate : Char -> Bool) -> Parser () + -- Detect the end of input + ParseEoF : Parser Bool + +----------------------- +-- Effectful Actions -- +----------------------- + +||| Aquire a copy of the current parser state +||| +||| Intended to be used for backtracking +export +save : Has Parser fs => Eff fs ParserState +save = send Save + +||| Restore the parser state to a previousy persisted copy +export +load : Has Parser fs => ParserState -> Eff fs () +load x = send $ Load x + +||| Look ahead exactly `n` characters +export +peek : Has Parser fs => (n : Nat) -> Eff fs (Maybe (Vect n Char)) +peek n = send $ Peek n + +||| Attempt to parse a single char satisfying the given predicate, leaving the parser +||| state unchanged on failure +export +parseChar : Has Parser fs => (predicate : Char -> Bool) -> Eff fs (ParseResult Char) +parseChar predicate = send $ ParseChar predicate + +||| Parse an exact match of the given string +export +parseExact : Has Parser fs => (str : String) -> Eff fs (ParseResult String) +parseExact str = send $ ParseExact str + +||| Skip a run of any number of characters satisfying the given predicate +export +skipChars : Has Parser fs => (predicate : Char -> Bool) -> Eff fs () +skipChars predicate = send $ SkipChars predicate + +-------------------- +-- Effect handler -- +-------------------- + +export +unParser : ParserState -> Parser a -> (a, ParserState) +-- Save the parser state +unParser s Save = + (s, s) +-- Load a provided parser state +unParser s (Load x) = + ((), x) +-- Look ahead exactly `n` characters +unParser s@(MkPS input (AtPoint length location)) (Peek n) = + let cs = unpack . strSubstr location (cast n) $ input + in (toVect n cs, s) +unParser s@(MkPS input (EndOfInput length)) (Peek n) = + (Nothing, s) +-- Attempt to parse a single character matching a given predicate, bail out without +-- affecting the parser state otherwise +unParser s@(MkPS input (AtPoint length location)) (ParseChar predicate) = + -- Saftey: We maintain location < length as a type invariant + let c = assert_total $ strIndex input location + in if predicate c + then if location + 1 < length + then (Got c, MkPS input (AtPoint length location)) + else (Got c, MkPS input (EndOfInput length)) + else (Failed c, s) +unParser s@(MkPS input (EndOfInput length)) (ParseChar predicate) = + (EoF, s) +-- Parse an exact string match +unParser s@(MkPS input (AtPoint length location)) (ParseExact str) = + let substr = strSubstr location (strLength str) input + in if substr == str + then if location + (strLength str) < length + then (Got substr, MkPS input (AtPoint length (location + strLength str))) + else (Got substr, MkPS input (EndOfInput length)) + else (Failed substr, s) +unParser s@(MkPS input (EndOfInput length)) (ParseExact str) = (EoF, s) +-- Skip any number of chars satisfying a given predicate +unParser s@(MkPS input (AtPoint length location)) (SkipChars predicate) = + case unParser s (ParseChar predicate) of + (Got _, s) => unParser s (SkipChars predicate) + (_, s) => ((), s) +unParser s@(MkPS input (EndOfInput length)) (SkipChars predicate) = + ((), s) +-- "Parse" the end of input +unParser s@(MkPS input (AtPoint length location)) ParseEoF = (False, s) +unParser s@(MkPS input (EndOfInput length)) ParseEoF = (True, s) + +export +runParser' : Has Parser fs => + (s : ParserState) -> Eff fs t -> Eff (fs - Parser) (t, ParserState) +runParser' s = + handleRelayS s (\state, t => pure (t, state)) $ \s2, ps, f => + let (a, st) = unParser s2 ps + in f st a + +export +runParser : Has Parser fs => + (s : String) -> Eff fs t -> Eff (fs - Parser) (t, String) +runParser s x = + map (\(x,y) => (x, toString y)) $ runParser' (fromString s) x diff --git a/todo.org b/todo.org index cda0ed9..a8cb81f 100644 --- a/todo.org +++ b/todo.org @@ -4,3 +4,5 @@ Probably want to refine the attribute and value strings, and be smarter about how we choose to quote the value string. ** NO Move =Raw= out of =Tag= Decided to rename =Tag= to =Html=, and =Raw= to =Text=, which makes this make sense +* Parser Core +** TODO Refine =location= in =ParserLocation=