From 5a2ffc1058355b5d468c5497e5b021cdc8672736 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Fri, 24 Jan 2025 01:45:02 -0500 Subject: [PATCH] Basic Parsing Interface --- README.md | 12 ++ advent.ipkg | 2 + src/Parser.md | 8 + src/Parser/Interface.md | 332 ++++++++++++++++++++++++++++++++++ src/Parser/ParserState.md | 369 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 723 insertions(+) create mode 100644 src/Parser.md create mode 100644 src/Parser/Interface.md create mode 100644 src/Parser/ParserState.md diff --git a/README.md b/README.md index dd7ac9c..6f18796 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,18 @@ solution. Provider wrappers over the standard library `IOArray` type to make them more ergonomic to use. +- [Parser](src/Parser.md) + + Effectful parser mini-library + + - [Interface](src/Parser/Interface.md) + + Effectful parser API + + - [ParserState](src/Parser/ParserState.md) + + Internal state of a parser + ## Index of years and days - 2015 diff --git a/advent.ipkg b/advent.ipkg index 23c2130..56f6e75 100644 --- a/advent.ipkg +++ b/advent.ipkg @@ -30,6 +30,8 @@ modules = Runner , Util.Eff , Util.Digits , Array + , Parser + , Parser.Interface -- main file (i.e. file to load at REPL) main = Main diff --git a/src/Parser.md b/src/Parser.md new file mode 100644 index 0000000..9eacc0b --- /dev/null +++ b/src/Parser.md @@ -0,0 +1,8 @@ +# Parsing Utilties + +```idris +module Parser + +import public Parser.Interface as Parser +import public Parser.ParserState as Parser +``` diff --git a/src/Parser/Interface.md b/src/Parser/Interface.md new file mode 100644 index 0000000..79583cf --- /dev/null +++ b/src/Parser/Interface.md @@ -0,0 +1,332 @@ +# The interface of a `Parser` + +```idris +module Parser.Interface + +import public Data.List1 + +import public Parser.ParserState + +import public Control.Eff + +export infixr 4 >| +export infixr 5 >& +``` + +## Parser Errors + +Combine the parser state at time of error with an error message. + +```idris +public export +data ParseError : Type where + -- TODO: Rename this constructor + MkParseError : (state : ParserInternal Id) -> (message : String) -> ParseError + BeforeParse : (message : String) -> ParseError + NestedErrors : (state : ParserInternal Id) -> (message : String) + -> (rest : List ParseError) -> ParseError +``` + +```idris hide +export +Show ParseError where + show (MkParseError state message) = + let (line, col) = positionPair state + (line, col) = (show line, show col) + position = show state.position.index + in "Error at line \{line}, column \{col} (\{position}): \{message}" + show (BeforeParse message) = + "Error before parsing: \{message}" + show (NestedErrors state message rest) = + let rest = assert_total $joinBy "\n" . map ((" " ++) . show) $ rest + (line, col) = positionPair state + (line, col) = (show line, show col) + position = show state.position.index + first = "Error at line \{line}, column \{col} (\{position}): \{message}" + in "\{first}\n\{rest}" +``` + +## Type Alias + +```idris +public export +Parser : Type -> Type +Parser a = Eff [ParserState, Except ParseError] a +``` + +## Error Generation + +Provide a few effectful actions to generate an error from an error message, and +either return it or throw it. + +```idris +export +parseError : Has ParserState fs => (message : String) -> Eff fs ParseError +parseError message = do + state <- save + pure $ MkParseError state message + +export +throwParseError : Has ParserState fs => Has (Except ParseError) fs => + (message : String) -> Eff fs a +throwParseError message = do + err <- parseError message + throw err + +export +guardMaybe : Has ParserState fs => Has (Except ParseError) fs => + (message : String) -> Eff fs (Maybe a) -> Eff fs a +guardMaybe message x = do + Just x <- x + | _ => throwParseError message + pure x + +export +replaceError : (message : String) -> Parser (a -> Parser b) +replaceError message = do + state <- save + pure (\_ => throw $ MkParseError state message) +``` + +## Running a parser + +We will use the phrasing "rundown" to refer to running all the effects in the +parser effect stack except `ParserState`, which is left in the effect stack to +facilitate handling in the context of another monad or effect stack, since it +benefits from mutability. + +Rundown a parser, accepting the first returning parse, which may be failing or +succeding, and automatically generating a "no valid parses" message in the event +no paths in the `Choice` effect produce a returning parse. + +```idris +export +rundownFirst : (f : Parser a) -> Eff [ParserState] (Either ParseError a) +rundownFirst f = + runExcept f +``` + +Provide wrappers for `rundownFirst` for evaluating it in various contexts. + +```idris +export +runFirstIO : (f : Parser a) -> String -> IO (Either ParseError a) +runFirstIO f str = do + Just state <- newInternalIO str + | _ => pure . Left $ BeforeParse "Empty input" + runEff (rundownFirst f) [handleParserStateIO state] + +export +runFirstIODebug : (f : Parser a) -> String -> IO (Either ParseError a) +runFirstIODebug f str = do + Just state <- newInternalIO str + | _ => pure . Left $ BeforeParse "Empty input" + runEff (rundownFirst f) [handleParserStateIODebug state] + +export +runFirst : (f : Parser a) -> String -> Eff fs (Either ParseError a) +runFirst f str = do + Just state <- pure $ newInternal str + | _ => pure . Left $ BeforeParse "Empty input" + (result, _) <- lift . runParserState state . rundownFirst $ f + pure result + +export +runFirst' : (f : Parser a) -> String -> Either ParseError a +runFirst' f str = extract $ runFirst f str {fs = []} +``` + +## Utility functionality + +### Parser combinators + +Try to run a computation in the context of the `Parser` effect stack, if it +fails (via `Except`), reset the state and resort to the supplied callback + +Also supply a version specialized to ignore the error value, returning `Just a` +if the parse succeeds, and `Nothing` if it fails. + +```idris +export +try : (f : Parser a) -> (err : ParseError -> Parser a) -> Parser a +try f err = do + starting_state <- save + result <- lift . runExcept $ f + case result of + Left error => do + load starting_state + err error + Right result => pure result + +export +tryMaybe : (f : Parser a) -> Parser (Maybe a) +tryMaybe f = try (map Just f) (\_ => pure Nothing) + +export +tryEither : (f : Parser a) -> Parser (Either ParseError a) +tryEither f = try (map Right f) (pure . Left) +``` + +Attempt to parse one of the given input parsers, in the provided order, invoking +the provided error action on failure. + +The state will not be modified when an input parser fails + +```idris +export +oneOfE : (err : String) -> List (Parser a) -> Parser a +oneOfE err xs = do + start <- save + oneOfE' err start [] xs + where + oneOfE' : (err : String) -> (start : ParserInternal Id) + -> (errs : List ParseError) -> List (Parser a) -> Parser a + oneOfE' err start errs [] = do + throw $ NestedErrors start err (reverse errs) + oneOfE' err start errs (x :: xs) = do + x <- tryEither x + case x of + Right val => pure val + Left error => oneOfE' err start (error :: errs) xs +``` + +Attempt to parse 0+ of an item + +```idris +export +many : (f : Parser a) -> Parser (List a) +many f = do + Just next <- tryMaybe f + | _ => pure [] + map (next ::) $ many f +``` + +Attempt to parse 1+ of an item, invoking the supplied error action on failure + +```idris +export +atLeastOne : (err : ParseError -> Parser (List1 a)) -> (f : Parser a) + -> Parser (List1 a) +atLeastOne err f = do + Right next <- tryEither f + | Left e => err e + map (next :::) $ many f +``` + +Lift a parser producing a `List` or `List1` of `Char` into a parser producing a +`String` + +```idris +-- TODO: Rename these +export +liftString : Parser (List Char) -> Parser String +liftString x = do + xs <- x + pure $ pack xs + +export +liftString' : Parser (List1 Char) -> Parser String +liftString' x = liftString $ map forget x +``` + +Attempt to parse a specified character + +```idris +export +charExact : Char -> Parser Char +charExact c = do + result <- charExact' c + case result of + GotChar char => pure char + GotError err => throwParseError "Got \{show err} Expected \{show c}" + EndOfInput => throwParseError "End of input" +``` + +Attempt to parse one of a list of chars + +```idris +export +theseChars : List Char -> Parser Char +theseChars cs = do + pnote "Parsing one of: \{show cs}" + result <- charPredicate (\x => any (== x) cs) + case result of + GotChar char => pure char + GotError err => throwParseError "Got \{show err} Expected one of \{show cs}" + EndOfInput => throwParseError "End of input" +``` + +Attempt to parse an exact string + +```idris +export +exactString : String -> Parser String +exactString str with (asList str) + exactString "" | [] = do + pnote "Parsing the empty string" + pure "" + exactString input@(strCons c str) | (c :: x) = do + pnote "Parsing exact string \{show input}" + GotChar next <- charPredicate (== c) + | GotError err => throwParseError "Got \{show err} expected \{show c}" + | EndOfInput => throwParseError "End of input" + rest <- exactString str | x + pure input +``` + +Wrap a parser in delimiter characters, discarding the value of the delimiters + +```idris +export +delimited : (before, after : Char) -> Parser a -> Parser a +delimited before after x = do + pnote "Parsing delimited by \{show before} \{show after}" + starting_state <- save + _ <- charExact before + Right val <- tryEither x + | Left err => do + load starting_state + throw err + Just _ <- tryMaybe $ charExact after + | _ => do + load starting_state + throw $ MkParseError starting_state "Unmatched delimiter \{show before}" + pure val +``` + +Consume any number of characters of the provided character class and discard the +result. Also a version for doing so on both sides of a provided parser + +```idris +export +nom : Parser Char -> Parser () +nom x = do + pnote "Nomming" + _ <- many x + pure () + +export +surround : (around : Parser Char) -> (item : Parser a) -> Parser a +surround around item = do + pnote "Surrounding" + nom around + val <- item + nom around + pure val +``` + +### Composition of boolean functions + +```idris +||| Return true if both of the predicates evaluate to true +public export +(>&) : (a : e -> Bool) -> (b : e -> Bool) -> (e -> Bool) +(>&) a b x = a x && b x +``` + +```idris +||| Return true if either of the predicates evaulates to true +public export +(>|) : (a : e -> Bool) -> (b : e -> Bool) -> (e -> Bool) +(>|) a b x = a x || b x +``` diff --git a/src/Parser/ParserState.md b/src/Parser/ParserState.md new file mode 100644 index 0000000..82216d5 --- /dev/null +++ b/src/Parser/ParserState.md @@ -0,0 +1,369 @@ +# Parser State + +An effectful description of the text a parser consumes + +```idris +module Parser.ParserState + +import public Data.String +import public Data.DPair +import public Data.Refined +import public Data.Refined.Int64 +import public Data.SortedMap +import public Data.IORef + +import Data.Primitives.Interpolation +import System.File + +import public Control.Eff +``` + +## Barbie Basics + +Barbies are types that can "change their clothes", in Idris, this manifests as a +type indexed by a type-level function that affects the types of the fields. + +Since we know our usage here is going to be quite simple, and not even really +making use of dependently typed fun, we are going to implement all the barbie +functionality we need by hand, but if you feel like barbies might be a good fit +for your problem, or you simply want to learn more, please check out a library +like `barbies`[^1] + +```idris +public export +Id : Type -> Type +Id x = x +``` + +## Internal State of a Parser + +Type alias for our refined `Int64`s + +```idris +public export +0 IsIndex : (length : Int64) -> Int64 -> Type +IsIndex length = From 0 && LessThan length + +public export +record Index (length : Int64) where + constructor MkIndex + index : Int64 + {auto 0 prf : IsIndex length index} +``` + +```idris hide +export +Eq (Index i) where + x == y = x.index == y.index + +export +Ord (Index i) where + compare x y = compare x.index y.index + +export +Show (Index i) where + show (MkIndex index) = show index +``` + +Stores the location we are currently at in the string, and metadata about it for +providing good error messages. Parsing an empty input isn't very interesting, so +we exclude inputs of length zero, since that will make other things easier. + +```idris +||| State representing a parser's position in the text +public export +record ParserInternal (f : Type -> Type) where + constructor MkInternal + -- IDEA: Maybe go full barbie and have this be a field, so that we can, say, + -- read directly from a file instead of from an already loaded string using the + -- same parser + ||| The input string + input : String + ||| The length of the input string + length : Int64 + {auto 0 len_prf : length = cast (strLength input)} + ||| A sorted set containing the positions of the start of each line + line_starts : SortedMap (Index length) Nat + ||| The position of the next character to read in the input + position : f (Index length) + ||| True if we have hit the end of input + end_of_input : f Bool +%name ParserInternal pi, pj, pk +``` + +### ParserInternal Methods + +Construct a `ParserInternal` from an input string. Will fail if the input is +empty, because then we can't index it. + +```idris +export +newInternal : (input : String) -> Maybe (ParserInternal Id) +newInternal input = + -- Check if we have at least one character in the input + case refine0 0 {p = IsIndex (cast (strLength input))} of + Nothing => Nothing + Just (Element position _) => Just $ + MkInternal input + (cast (strLength input)) + (mkStarts' input (MkIndex position)) + (MkIndex position) + False + where + partial + mkStarts : + (str : String) -> (acc : List (Index (cast (strLength str)), Nat)) + -> (idx : Index (cast (strLength str))) -> (count : Nat) -> (next : Bool) + -> List (Index (cast (strLength str)), Nat) + mkStarts str acc idx count True = + mkStarts str ((idx, count) :: acc) idx (S count) False + mkStarts str acc idx count False = + case refine0 (idx.index + 1) {p = IsIndex (cast (strLength str))} of + Nothing => acc + Just (Element next _) => + if strIndex str (cast idx.index) == '\n' + then mkStarts str acc (MkIndex next) count True + else mkStarts str acc (MkIndex next) count False + mkStarts' : (str : String) -> (start : Index (cast (strLength str))) + -> SortedMap (Index (cast (strLength str))) Nat + mkStarts' str start = + let + pairs = assert_total $ + mkStarts str [] start 0 True + in fromList pairs +``` + +Get the current line and column number + +```idris +||| Returns the current position of the parser cursor in, zero indexed, (line, +||| column) form +export +positionPair : (pi : ParserInternal Id) -> (Nat, Nat) +positionPair pi = + case lookup pi.position pi.line_starts of + Just line => (line, 0) + Nothing => + case lookupBetween pi.position pi.line_starts of + -- There will always be at least one line start, and we would have hit + -- the previous case if we were at the start of the first one, so if + -- there isn't a before, we can return a nonsense value safely + (Nothing, _) => (0, 0) + (Just (start, linum), after) => + -- Our index will always be after the start of the line, for previously + -- mentioned reasons, so this cast is safe + let col = cast {to = Nat} $ pi.position.index - start.index + in (linum, col) +``` + +```idris hide +export +Show (ParserInternal Id) where + show pi = + let (line, col) = positionPair pi + current = assert_total $ strIndex pi.input (cast pi.position.index) + pos = pi.position.index + eof = show pi.end_of_input + in "Position: \{pos}(\{line}, \{col}}) Value: \{show current} EoF: \{eof}" +``` + +### More Barbie Functionality + +Provide the barbie analogs of `map` and `traverse` for our `ParserInternal` +type, allowing us to change the type the values in a `ParserInternal` by mapping +over those values. + +```idris +export +bmap : ({0 a : Type} -> f a -> g a) -> ParserInternal f -> ParserInternal g +-- bmap f = bmap_ (\_ => f) +bmap fun (MkInternal input length line_starts position end_of_input) = + let position' = fun position + end_of_input' = fun end_of_input + in MkInternal input length line_starts position' end_of_input' + +export +btraverse : Applicative e => ({0 a : Type} -> f a -> e (g a)) + -> ParserInternal f -> e (ParserInternal g) +btraverse fun (MkInternal input length line_starts position end_of_input) = + let pures = (MkInternal input length line_starts) + in [| pures (fun position) (fun end_of_input)|] +``` + +## Three way result + +```idris +||| Three way result returned from attempting to parse a single char +public export +data ParseCharResult : Type where + GotChar : (char : Char) -> ParseCharResult + GotError : (err : Char) -> ParseCharResult + EndOfInput : ParseCharResult +``` + +## The Effect Type + +```idris +export +data ParserState : Type -> Type where + Save : ParserState (ParserInternal Id) + Load : (ParserInternal Id) -> ParserState () + -- TODO: Maybe add a ParseString that parses a string of characters as a + -- string using efficent slicing? + ParseChar : (predicate : Char -> Bool) -> ParserState ParseCharResult + ParseExactChar : (char : Char) -> ParserState ParseCharResult + ParseEoF : ParserState Bool + Note : Lazy String -> ParserState () +``` + +```idris hide +Show (ParserState t) where + show Save = "Saving state" + show (Load pi) = "Loading state \{show pi}" + show (ParseChar predicate) = "Parsing char" + show (ParseExactChar char) = "Parsing char \{show char}" + show ParseEoF = "Parsing EoF" + show (Note _) = "Note" +``` + +### Actions + +```idris +||| Return the current state, for potential later reloading +export +save : Has ParserState fs => Eff fs (ParserInternal Id) +save = send Save + +||| Reset to the provided state +export +load : Has ParserState fs => ParserInternal Id -> Eff fs () +load pi = send $ Load pi + +||| Attempt to parse a char, checking to see if it complies with the supplied +||| predicate, updates the state if parsing succeeds, does not alter it in an +||| error condition. +export +charPredicate : Has ParserState fs => (predicate : Char -> Bool) + -> Eff fs ParseCharResult +charPredicate predicate = send $ ParseChar predicate + +||| Parse a char by exact value +export +charExact' : Has ParserState fs => (chr : Char) -> Eff fs ParseCharResult +charExact' chr = send $ ParseExactChar chr + +||| "Parse" the end of input, returning `True` if the parser state is currently +||| at the end of the input +export +parseEoF : Has ParserState fs => Eff fs Bool +parseEoF = send ParseEoF + +||| Make a note to be output when running with a debug handler +export +pnote : Has ParserState fs => Lazy String -> Eff fs () +pnote x = send $ Note x +``` + +## Handling a ParserState + +### IO Context + +```idris +export +handleParserStateIO : HasIO io => + IORef (ParserInternal IORef) -> ParserState t -> io t +handleParserStateIO pi Save = do + pi <- readIORef pi + btraverse readIORef pi +handleParserStateIO pi (Load pj) = do + pj <- btraverse newIORef pj + writeIORef pi pj +handleParserStateIO pi (ParseChar predicate) = do + pi <- readIORef pi + False <- readIORef pi.end_of_input + | _ => pure EndOfInput + position <- readIORef pi.position + let char = assert_total $ strIndex pi.input (cast position.index) + True <- pure $ predicate char + | _ => pure $ GotError char + -- Our refinement type on the position forces us to check that the length is + -- in bounds after incrementing it, if its out of bounds, set the end_of_input + -- flag + case refine0 (position.index + 1) {p = IsIndex pi.length} of + Nothing => do + writeIORef pi.end_of_input True + pure $ GotChar char + Just (Element next _) => do + writeIORef pi.position $ MkIndex next + pure $ GotChar char +handleParserStateIO pi (ParseExactChar chr) = do + -- TODO: do this directly? + handleParserStateIO pi (ParseChar (== chr)) +handleParserStateIO pi ParseEoF = do + pi <- readIORef pi + readIORef pi.end_of_input +-- We ignore notes in non-debug mode +handleParserStateIO pi (Note _) = pure () + +export +newInternalIO : HasIO io => String -> io $ Maybe (IORef (ParserInternal IORef)) +newInternalIO str = do + Just internal <- pure $ newInternal str + | _ => pure Nothing + internal <- btraverse newIORef internal + map Just $ newIORef internal +``` + +Wrapper with debugging output + +```idris +export +handleParserStateIODebug : HasIO io => + IORef (ParserInternal IORef) -> ParserState t -> io t +handleParserStateIODebug x (Note note) = do + state <- readIORef x + state <- btraverse readIORef state + _ <- fPutStrLn stderr "Note \{note} -> \{show state}" + pure () +handleParserStateIODebug x y = do + state <- readIORef x + state <- btraverse readIORef state + _ <- fPutStrLn stderr "\{show y} -> \{show state}" + handleParserStateIO x y +``` + +### State context + +```idris +unPS : ParserInternal Id -> ParserState a -> (a, ParserInternal Id) +unPS pi Save = (pi, pi) +unPS pi (Load pj) = ((), pi) +unPS pi (ParseChar predicate) = + if pi.end_of_input + then (EndOfInput, pi) + else let + char = assert_total $ strIndex pi.input (cast pi.position.index) + in if predicate char + then case refine0 (pi.position.index + 1) {p = IsIndex pi.length} of + Nothing => + (GotChar char, {end_of_input := True} pi) + Just (Element next _) => + (GotChar char, {position := MkIndex next} pi) + else (GotError char, pi) +unPS pi (ParseExactChar chr) = unPS pi (ParseChar (== chr)) +unPS pi ParseEoF = (pi.end_of_input, pi) +unPS pi (Note _) = ((), pi) + +export +runParserState : Has ParserState fs => + (s : ParserInternal Id) -> Eff fs t + -> Eff (fs - ParserState) (t, ParserInternal Id) +runParserState s = + handleRelayS s (\x, y => pure (y, x)) $ \s2,ps,f => + let (a, st) = unPS s2 ps + in f st a +``` + +## Footnotes + +[^1]: https://github.com/stefan-hoeck/idris2-barbies