From 222ae171808fd133c88f80c1f0ca47a568f9fed3 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Fri, 24 Jan 2025 01:45:02 -0500 Subject: [PATCH 01/45] core: Beginnings of parser module --- README.md | 12 ++ advent.ipkg | 2 + src/Parser.md | 8 ++ src/Parser/Interface.md | 186 ++++++++++++++++++++++++++ src/Parser/ParserState.md | 273 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 481 insertions(+) create mode 100644 src/Parser.md create mode 100644 src/Parser/Interface.md create mode 100644 src/Parser/ParserState.md diff --git a/README.md b/README.md index d8bd7e3..52d86cd 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,18 @@ solution. Provider wrappers over the standard library `IOArray` type to make them more ergonomic to use. +- [Parser](src/Parser.md) + + Effectful parser mini-library + + - [Interface](src/Parser/Interface.md) + + Effectful parser API + + - [ParserState](src/Parser/ParserState.md) + + Internal state of a parser + ## Index of years and days - 2015 diff --git a/advent.ipkg b/advent.ipkg index 23c2130..56f6e75 100644 --- a/advent.ipkg +++ b/advent.ipkg @@ -30,6 +30,8 @@ modules = Runner , Util.Eff , Util.Digits , Array + , Parser + , Parser.Interface -- main file (i.e. file to load at REPL) main = Main diff --git a/src/Parser.md b/src/Parser.md new file mode 100644 index 0000000..9eacc0b --- /dev/null +++ b/src/Parser.md @@ -0,0 +1,8 @@ +# Parsing Utilties + +```idris +module Parser + +import public Parser.Interface as Parser +import public Parser.ParserState as Parser +``` diff --git a/src/Parser/Interface.md b/src/Parser/Interface.md new file mode 100644 index 0000000..32999a6 --- /dev/null +++ b/src/Parser/Interface.md @@ -0,0 +1,186 @@ +# The interface of a `Parser` + +```idris +module Parser.Interface + +import public Data.List1 + +import public Parser.ParserState + +import public Control.Eff + +export infixr 4 >| +export infixr 5 >& +``` + +## Parser Errors + +Combine the parser state at time of error with an error message. + +```idris +public export +record ParseError where + constructor MkParseError + state : ParserInternal Id + message : String +``` + +## Type Alias + +```idris +public export +Parser : Type -> Type +Parser a = Eff [ParserState, Except ParseError, Choose] a +``` + +## Error Generation + +Provide a few effectful actions to generate an error from an error message, and +either return it or throw it. + +```idris +export +parseError : Has ParserState fs => (message : String) -> Eff fs ParseError +parseError message = do + state <- save + pure $ MkParseError state message + +export +throwParseError : Has ParserState fs => Has (Except ParseError) fs => + (message : String) -> Eff fs a +throwParseError message = do + err <- parseError message + throw err + +export +guardMaybe : Has ParserState fs => Has (Except ParseError) fs => + (message : String) -> Eff fs (Maybe a) -> Eff fs a +guardMaybe message x = do + Just x <- x + | _ => throwParseError message + pure x +``` + +## Running a parser + +We will use the phrasing "rundown" to refer to running all the effects in the +parser effect stack except `ParserState`, which is left in the effect stack to +facilitate handling in the context of another monad or effect stack, since it +benefits from mutability. + +Rundown a parser, accepting the first returning parse, which may be failing or +succeding, and automatically generating a "no valid parses" message in the event +no paths in the `Choice` effect produce a returning parse. + +```idris +export +rundownFirst : (f : Parser a) -> Eff [ParserState] (Either ParseError a) +rundownFirst f = + runExcept . guardMaybe "No returning parses" . runChoose {f = Maybe} $ f +``` + +## Utility functionality + +### Parser combinators + +Try to run a computation in the context of the `Parser` effect stack, if it +fails (via `Except`), reset the state and resort to the supplied callback + +Also supply a version specialized to ignore the error value, returning `Just a` +if the parse succeeds, and `Nothing` if it fails. + +```idris +export +try : (f : Parser a) -> (err : ParseError -> Parser a) -> Parser a +try f err = do + starting_state <- save + result <- lift . runExcept $ f + case result of + Left error => do + load starting_state + err error + Right result => pure result + +export +tryMaybe : (f : Parser a) -> Parser (Maybe a) +tryMaybe f = try (map Just f) (\_ => pure Nothing) + +export +tryEither : (f : Parser a) -> Parser (Either ParseError a) +tryEither f = try (map Right f) (pure . Left) + +||| Converts any errors thrown by `f` into silent backtracking within `Choose` +export +tryEmpty : (f : Parser a) -> Parser a +tryEmpty f = try f (\_ => empty) +``` + +Attempt to parse one of the given input parsers, in the provided order, invoking +the provided error action on failure. This will suppress any errors returned by +the input parsers by mapping them to `empty`. + +The state will not be modified when an input parser fails + +```idris +export +oneOfE : Foldable f => + (err : Parser a) -> f (Parser a) -> Parser a +oneOfE err xs = foldr altE err xs + where + altE : Parser a -> Parser a -> Parser a + altE f rest = (tryEmpty f) `alt` rest +``` + +Attempt to parse 0+ of an item + +```idris +export +many : (f : Parser a) -> Parser (List a) +many f = do + Just next <- tryMaybe f + | _ => pure [] + map (next ::) $ many f +``` + +Attempt to parse 1+ of an item, invoking the supplied error action on failure + +```idris +export +atLeastOne : (err : ParseError -> Parser (List1 a)) -> (f : Parser a) + -> Parser (List1 a) +atLeastOne err f = do + Right next <- tryEither f + | Left e => err e + map (next :::) $ many f +``` + +Lift a parser producing a `List` or `List1` of `Char` into a parser producing a +`String` + +```idris +export +parseString : Parser (List Char) -> Parser String +parseString x = do + xs <- x + pure $ pack xs + +export +parseString' : Parser (List1 Char) -> Parser String +parseString' x = parseString $ map forget x +``` + +### Composition of boolean functions + +```idris +||| Return true if both of the predicates evaluate to true +public export +(>&) : (a : e -> Bool) -> (b : e -> Bool) -> (e -> Bool) +(>&) a b x = a x && b x +``` + +```idris +||| Return true if either of the predicates evaulates to true +public export +(>|) : (a : e -> Bool) -> (b : e -> Bool) -> (e -> Bool) +(>|) a b x = a x || b x +``` diff --git a/src/Parser/ParserState.md b/src/Parser/ParserState.md new file mode 100644 index 0000000..57db768 --- /dev/null +++ b/src/Parser/ParserState.md @@ -0,0 +1,273 @@ +# Parser State + +An effectful description of the text a parser consumes + +```idris +module Parser.ParserState + +import public Data.String +import public Data.DPair +import public Data.Refined +import public Data.Refined.Int64 +import public Data.SortedMap +import public Data.IORef + +import public Control.Eff +``` + +## Barbie Basics + +Barbies are types that can "change their clothes", in Idris, this manifests as a +type indexed by a type-level function that affects the types of the fields. + +Since we know our usage here is going to be quite simple, and not even really +making use of dependently typed fun, we are going to implement all the barbie +functionality we need by hand, but if you feel like barbies might be a good fit +for your problem, or you simply want to learn more, please check out a library +like `barbies`[^1] + +```idris +public export +Id : Type -> Type +Id x = x +``` + +## Internal State of a Parser + +Type alias for our refined `Int64`s + +```idris +public export +0 IsIndex : (length : Int64) -> Int64 -> Type +IsIndex length = From 0 && LessThan length + +public export +record Index (length : Int64) where + constructor MkIndex + index : Int64 + {auto 0 prf : IsIndex length index} +``` + + + +Stores the location we are currently at in the string, and metadata about it for +providing good error messages. Parsing an empty input isn't very interesting, so +we exclude inputs of length zero, since that will make other things eaiser. + +```idris +||| State representing a parser's position in the text +public export +record ParserInternal (f : Type -> Type) where + constructor MkInternal + -- IDEA: Maybe go full barbie and have this be a field, so that we can, say, + -- read directly from a file instead of from an already loaded string using the + -- same parser + ||| The input string + input : String + ||| The length of the input string + length : Int64 + {auto 0 len_prf : length = cast (strLength input)} + ||| A sorted set containing the positions of the start of each line + line_starts : SortedMap (Index length) Nat + ||| The position of the next character to read in the input + position : f (Index length) + ||| True if we have hit the end of input + end_of_input : f Bool +%name ParserInternal pi, pj, pk +``` + +### ParserInternal Methods + +Construct a `ParserInternal` from an input string. Will fail if the input is +empty, because then we can't index it. + +```idris +export +newInternal : (input : String) -> Maybe (ParserInternal Id) +newInternal input = + -- Check if we have at least one character in the input + case refine0 0 {p = IsIndex (cast (strLength input))} of + Nothing => Nothing + Just (Element position _) => Just $ + MkInternal input + (cast (strLength input)) + (mkStarts' input (MkIndex position)) + (MkIndex position) + False + where + partial + mkStarts : + (str : String) -> (acc : List (Index (cast (strLength str)), Nat)) + -> (idx : Index (cast (strLength str))) -> (count : Nat) -> (next : Bool) + -> List (Index (cast (strLength str)), Nat) + mkStarts str acc idx count True = + mkStarts str ((idx, count) :: acc) idx (S count) False + mkStarts str acc idx count False = + case refine0 (idx.index + 1) {p = IsIndex (cast (strLength str))} of + Nothing => acc + Just (Element next _) => + if strIndex str (cast idx.index) == '\n' + then mkStarts str acc (MkIndex next) count True + else mkStarts str acc (MkIndex next) count False + mkStarts' : (str : String) -> (start : Index (cast (strLength str))) + -> SortedMap (Index (cast (strLength str))) Nat + mkStarts' str start = + let + pairs = assert_total $ + mkStarts str [] start 0 True + in fromList pairs +``` + +Get the current line and column number + +```idris +||| Returns the current position of the parser cursor in, zero indexed, (line, +||| column) form +export +positionPair : (pi : ParserInternal Id) -> (Nat, Nat) +positionPair pi = + case lookup pi.position pi.line_starts of + Just line => (line, 0) + Nothing => + case lookupBetween pi.position pi.line_starts of + -- There will always be at least one line start, and we would have hit + -- the previous case if we were at the start of the first one, so if + -- there isn't a before, we can return a nonsense value safely + (Nothing, _) => (0, 0) + (Just (start, linum), after) => + -- Our index will always be after the start of the line, for previously + -- mentioned reasons, so this cast is safe + let col = cast {to = Nat} $ pi.position.index - start.index + in (linum, col) +``` + +### More Barbie Functionality + +Provide the barbie analogs of `map` and `traverse` for our `ParserInternal` +type, allowing us to change the type the values in a `ParserInternal` by mapping +over those values. + +```idris +export +bmap : ({0 a : Type} -> f a -> g a) -> ParserInternal f -> ParserInternal g +-- bmap f = bmap_ (\_ => f) +bmap fun (MkInternal input length line_starts position end_of_input) = + let position' = fun position + end_of_input' = fun end_of_input + in MkInternal input length line_starts position' end_of_input' + +export +btraverse : Applicative e => ({0 a : Type} -> f a -> e (g a)) + -> ParserInternal f -> e (ParserInternal g) +btraverse fun (MkInternal input length line_starts position end_of_input) = + let pures = (MkInternal input length line_starts) + in [| pures (fun position) (fun end_of_input)|] +``` + +## Three way result + +```idris +||| Three way result returned from attempting to parse a single char +public export +data ParseCharResult : Type -> Type where + GotChar : (char : Char) -> ParseCharResult e + GotError : (err : e) -> ParseCharResult e + EndOfInput : ParseCharResult e +``` + +## The Effect Type + +```idris +export +data ParserState : Type -> Type where + Save : ParserState (ParserInternal Id) + Load : (ParserInternal Id) -> ParserState () + -- TODO: Maybe add a ParseString that parses a string of characters as a + -- string using efficent slicing? + ParseChar : (predicate : Char -> Bool) -> (err : Char -> e) + -> ParserState (ParseCharResult e) + ParseEoF : ParserState Bool +``` + +### Actions + +```idris +||| Return the current state, for potential later reloading +export +save : Has ParserState fs => Eff fs (ParserInternal Id) +save = send Save + +||| Reset to the provided state +export +load : Has ParserState fs => ParserInternal Id -> Eff fs () +load pi = send $ Load pi + +||| Attempt to parse a char, checking to see if it complies with the supplied +||| predicate, updates the state if parsing succeeds, does not alter it in an +||| error condition. +export +parseChar : Has ParserState fs => (predicate : Char -> Bool) -> (err : Char -> e) + -> Eff fs (ParseCharResult e) +parseChar predicate err = send $ ParseChar predicate err + +||| "Parse" the end of input, returning `True` if the parser state is currently +||| at the end of the input +export +parseEoF : Has ParserState fs => Eff fs Bool +parseEoF = send ParseEoF +``` + +## Handling a ParserState + +### IO Context + +```idris +export +handleParserStateIO : HasIO io => + IORef (ParserInternal IORef) -> ParserState t -> io t +handleParserStateIO pi Save = do + pi <- readIORef pi + btraverse readIORef pi +handleParserStateIO pi (Load pj) = do + pj <- btraverse newIORef pj + writeIORef pi pj +handleParserStateIO pi (ParseChar predicate err) = do + pi <- readIORef pi + False <- readIORef pi.end_of_input + | _ => pure $ EndOfInput + position <- readIORef pi.position + let char = assert_total $ strIndex pi.input (cast position.index) + True <- pure $ predicate char + | _ => pure . GotError $ err char + -- Our refinement type on the position forces us to check that the length is + -- in bounds after incrementing it, if its out of bounds, set the end_of_input + -- flag + case refine0 (position.index + 1) {p = IsIndex pi.length} of + Nothing => do + writeIORef pi.end_of_input True + pure $ GotChar char + Just (Element next _) => do + writeIORef pi.position $ MkIndex next + pure $ GotChar char +handleParserStateIO pi ParseEoF = do + pi <- readIORef pi + readIORef pi.end_of_input + +export +newInternalIO : HasIO io => String -> io $ Maybe (IORef (ParserInternal IORef)) +newInternalIO str = do + Just internal <- pure $ newInternal str + | _ => pure Nothing + internal <- btraverse newIORef internal + map Just $ newIORef internal +``` + +## Footnotes + +[^1]: https://github.com/stefan-hoeck/idris2-barbies From 5e5ede87b4ae00544e020f89a3fdffd0fa9367e0 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Fri, 24 Jan 2025 17:59:30 -0500 Subject: [PATCH 02/45] core: Add show for ParserError --- src/Parser/Interface.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/Parser/Interface.md b/src/Parser/Interface.md index 32999a6..0867e1c 100644 --- a/src/Parser/Interface.md +++ b/src/Parser/Interface.md @@ -25,6 +25,16 @@ record ParseError where message : String ``` + + ## Type Alias ```idris From 9b12ebcf003e46fa45af7798ef4fb749c8478766 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Fri, 24 Jan 2025 22:17:58 -0500 Subject: [PATCH 03/45] core: Add replaceError method --- src/Parser/Interface.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Parser/Interface.md b/src/Parser/Interface.md index 0867e1c..4535ee0 100644 --- a/src/Parser/Interface.md +++ b/src/Parser/Interface.md @@ -69,6 +69,12 @@ guardMaybe message x = do Just x <- x | _ => throwParseError message pure x + +export +replaceError : (message : String) -> Parser (a -> Parser b) +replaceError message = do + state <- save + pure (\_ => throw $ MkParseError state message) ``` ## Running a parser From fa5eb61d598e486085e9edfe18da594f747a1039 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Fri, 24 Jan 2025 22:21:08 -0500 Subject: [PATCH 04/45] core: spelling --- src/Parser/ParserState.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parser/ParserState.md b/src/Parser/ParserState.md index 57db768..c8eb361 100644 --- a/src/Parser/ParserState.md +++ b/src/Parser/ParserState.md @@ -58,7 +58,7 @@ Ord (Index i) where Stores the location we are currently at in the string, and metadata about it for providing good error messages. Parsing an empty input isn't very interesting, so -we exclude inputs of length zero, since that will make other things eaiser. +we exclude inputs of length zero, since that will make other things easier. ```idris ||| State representing a parser's position in the text From 40dd87a4f3858118f6e1dc43f53c9c1f135a586e Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Fri, 24 Jan 2025 22:26:55 -0500 Subject: [PATCH 05/45] core: ParseCharE --- src/Parser/ParserState.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/Parser/ParserState.md b/src/Parser/ParserState.md index c8eb361..323b6d5 100644 --- a/src/Parser/ParserState.md +++ b/src/Parser/ParserState.md @@ -216,6 +216,19 @@ parseChar : Has ParserState fs => (predicate : Char -> Bool) -> (err : Char -> e -> Eff fs (ParseCharResult e) parseChar predicate err = send $ ParseChar predicate err +||| Wrapper for parseChar that folds the error message into effect stack with the +||| provided callback +export +parseCharE : Has ParserState fs => Has (Except e) fs => + (predicate : Char -> Bool) -> (err : Char -> e) -> (eof : Lazy e) + -> Eff fs Char +parseCharE predicate err eof = do + result <- parseChar predicate id + case result of + GotChar char => pure char + GotError x => throw $ err x + EndOfInput => throw eof + ||| "Parse" the end of input, returning `True` if the parser state is currently ||| at the end of the input export From 7dba4925355cc57ef010914fa28e9cb653f477bb Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Fri, 24 Jan 2025 22:36:08 -0500 Subject: [PATCH 06/45] core: parseExactChar --- src/Parser/Interface.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/Parser/Interface.md b/src/Parser/Interface.md index 4535ee0..f9bc23b 100644 --- a/src/Parser/Interface.md +++ b/src/Parser/Interface.md @@ -185,6 +185,19 @@ parseString' : Parser (List1 Char) -> Parser String parseString' x = parseString $ map forget x ``` +Attempt to parse a specified character + +```idris +export +parseExactChar : Char -> Parser Char +parseExactChar c = do + result <- parseChar (== c) id + case result of + GotChar char => pure char + GotError err => throwParseError "Got \{show err} Expected \{show c}" + EndOfInput => throwParseError "End of input" +``` + ### Composition of boolean functions ```idris From 38c69c0ae391194cbdaca9868dac2d15d6e040ff Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Fri, 24 Jan 2025 23:57:16 -0500 Subject: [PATCH 07/45] core: parseTheseChars --- src/Parser/Interface.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/Parser/Interface.md b/src/Parser/Interface.md index f9bc23b..fe13c13 100644 --- a/src/Parser/Interface.md +++ b/src/Parser/Interface.md @@ -198,6 +198,19 @@ parseExactChar c = do EndOfInput => throwParseError "End of input" ``` +Attempt to parse one of a list of chars + +```idris +export +parseTheseChars : List Char -> Parser Char +parseTheseChars cs = do + result <- parseChar (\x => any (== x) cs) id + case result of + GotChar char => pure char + GotError err => throwParseError "Got \{show err} Expected one of \{show cs}" + EndOfInput => throwParseError "End of input" +``` + ### Composition of boolean functions ```idris From 1658e15487fc98b308fb0cd232eb13b614882a15 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 00:08:01 -0500 Subject: [PATCH 08/45] core: runFirstIO --- src/Parser/Interface.md | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/Parser/Interface.md b/src/Parser/Interface.md index fe13c13..00818ae 100644 --- a/src/Parser/Interface.md +++ b/src/Parser/Interface.md @@ -19,10 +19,9 @@ Combine the parser state at time of error with an error message. ```idris public export -record ParseError where - constructor MkParseError - state : ParserInternal Id - message : String +data ParseError : Type where + MkParseError : (state : ParserInternal Id) -> (message : String) -> ParseError + BeforeParse : (message : String) -> ParseError ``` ## Type Alias @@ -95,6 +96,17 @@ rundownFirst f = runExcept . guardMaybe "No returning parses" . runChoose {f = Maybe} $ f ``` +Provide wrappers for `rundownFirst` for evaluating it in various contexts. + +```idris +export +runFirstIO : (f : Parser a) -> String -> IO (Either ParseError a) +runFirstIO f str = do + Just state <- newInternalIO str + | _ => pure . Left $ BeforeParse "Empty input" + runEff (rundownFirst f) [handleParserStateIO state] +``` + ## Utility functionality ### Parser combinators From 994da7065ce65d55e3d48324e03445bf5bf4cf23 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 00:36:06 -0500 Subject: [PATCH 09/45] core: runParserState --- src/Parser/Interface.md | 12 ++++++++++++ src/Parser/ParserState.md | 30 ++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/src/Parser/Interface.md b/src/Parser/Interface.md index 00818ae..17c4ff9 100644 --- a/src/Parser/Interface.md +++ b/src/Parser/Interface.md @@ -105,6 +105,18 @@ runFirstIO f str = do Just state <- newInternalIO str | _ => pure . Left $ BeforeParse "Empty input" runEff (rundownFirst f) [handleParserStateIO state] + +export +runFirst : (f : Parser a) -> String -> Eff fs (Either ParseError a) +runFirst f str = do + Just state <- pure $ newInternal str + | _ => pure . Left $ BeforeParse "Empty input" + (result, _) <- lift . runParserState state . rundownFirst $ f + pure result + +export +runFirst' : (f : Parser a) -> String -> Either ParseError a +runFirst' f str = extract $ runFirst f str {fs = []} ``` ## Utility functionality diff --git a/src/Parser/ParserState.md b/src/Parser/ParserState.md index 323b6d5..2a06858 100644 --- a/src/Parser/ParserState.md +++ b/src/Parser/ParserState.md @@ -281,6 +281,36 @@ newInternalIO str = do map Just $ newIORef internal ``` +### State context + +```idris +unPS : ParserInternal Id -> ParserState a -> (a, ParserInternal Id) +unPS pi Save = (pi, pi) +unPS pi (Load pj) = ((), pi) +unPS pi (ParseChar predicate err) = + if pi.end_of_input + then (EndOfInput, pi) + else let + char = assert_total $ strIndex pi.input (cast pi.position.index) + in if predicate char + then case refine0 (pi.position.index + 1) {p = IsIndex pi.length} of + Nothing => + (GotChar char, {end_of_input := True} pi) + Just (Element next _) => + (GotChar char, {position := MkIndex next} pi) + else (GotError $ err char, pi) +unPS pi ParseEoF = (pi.end_of_input, pi) + +export +runParserState : Has ParserState fs => + (s : ParserInternal Id) -> Eff fs t + -> Eff (fs - ParserState) (t, ParserInternal Id) +runParserState s = + handleRelayS s (\x, y => pure (y, x)) $ \s2,ps,f => + let (a, st) = unPS s2 ps + in f st a +``` + ## Footnotes [^1]: https://github.com/stefan-hoeck/idris2-barbies From 59f1eb31d0584e962098bff02cad1fc6dbe4068a Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 03:21:53 -0500 Subject: [PATCH 10/45] core: exact string --- src/Parser/Interface.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/Parser/Interface.md b/src/Parser/Interface.md index 17c4ff9..2c65cfa 100644 --- a/src/Parser/Interface.md +++ b/src/Parser/Interface.md @@ -235,6 +235,20 @@ parseTheseChars cs = do EndOfInput => throwParseError "End of input" ``` +Attempt to parse an exact string + +```idris +exactString : String -> Parser String +exactString str with (asList str) + exactString "" | [] = pure "" + exactString input@(strCons c str) | (c :: x) = do + GotChar next <- parseChar (== c) id + | GotError err => throwParseError "Got \{show err} expected \{show c}" + | EndOfInput => throwParseError "End of input" + rest <- exactString str | x + pure input +``` + ### Composition of boolean functions ```idris From 3029432699fa8f74a11cf35a759319aa9ed2e994 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 05:07:42 -0500 Subject: [PATCH 11/45] core: export exactString --- src/Parser/Interface.md | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Parser/Interface.md b/src/Parser/Interface.md index 2c65cfa..3dc4251 100644 --- a/src/Parser/Interface.md +++ b/src/Parser/Interface.md @@ -238,6 +238,7 @@ parseTheseChars cs = do Attempt to parse an exact string ```idris +export exactString : String -> Parser String exactString str with (asList str) exactString "" | [] = pure "" From 59fba4584d34257ed7cda91b41221f9632cd1823 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 12:56:42 -0500 Subject: [PATCH 12/45] core: nom and surround --- src/Parser/Interface.md | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/Parser/Interface.md b/src/Parser/Interface.md index 3dc4251..fd0f433 100644 --- a/src/Parser/Interface.md +++ b/src/Parser/Interface.md @@ -198,6 +198,7 @@ Lift a parser producing a `List` or `List1` of `Char` into a parser producing a `String` ```idris +-- TODO: Rename these export parseString : Parser (List Char) -> Parser String parseString x = do @@ -250,6 +251,40 @@ exactString str with (asList str) pure input ``` +Wrap a parser in delimiter characters, discarding the value of the delimiters + +```idris +export +delimited : (before, after : Char) -> Parser a -> Parser a +delimited before after x = do + starting_state <- save + _ <- parseExactChar before + val <- x + Just _ <- tryMaybe $ parseExactChar after + | _ => throw $ + MkParseError starting_state "Unmatched delimiter \{show before}" + pure val +``` + +Consume any number of characters of the provided character class and discard the +result. Also a version for doing so on both sides of a provided parser + +```idris +export +nom : Parser Char -> Parser () +nom x = do + _ <- many x + pure () + +export +surround : (around : Parser Char) -> (item : Parser a) -> Parser a +surround around item = do + nom around + val <- item + nom around + pure val +``` + ### Composition of boolean functions ```idris From 72ea53becf9ca3d11270f69e8319e5c862349ec4 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 13:37:08 -0500 Subject: [PATCH 13/45] core: oneOfM refactor --- src/Parser/Interface.md | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/src/Parser/Interface.md b/src/Parser/Interface.md index fd0f433..4656469 100644 --- a/src/Parser/Interface.md +++ b/src/Parser/Interface.md @@ -20,8 +20,11 @@ Combine the parser state at time of error with an error message. ```idris public export data ParseError : Type where + -- TODO: Rename this constructor MkParseError : (state : ParserInternal Id) -> (message : String) -> ParseError BeforeParse : (message : String) -> ParseError + NestedErrors : (state : ParserInternal Id) -> (message : String) + -> (rest : List ParseError) -> ParseError ``` ## Type Alias @@ -41,7 +51,7 @@ Show ParseError where ```idris public export Parser : Type -> Type -Parser a = Eff [ParserState, Except ParseError, Choose] a +Parser a = Eff [ParserState, Except ParseError] a ``` ## Error Generation @@ -93,7 +103,7 @@ no paths in the `Choice` effect produce a returning parse. export rundownFirst : (f : Parser a) -> Eff [ParserState] (Either ParseError a) rundownFirst f = - runExcept . guardMaybe "No returning parses" . runChoose {f = Maybe} $ f + runExcept f ``` Provide wrappers for `rundownFirst` for evaluating it in various contexts. @@ -148,27 +158,29 @@ tryMaybe f = try (map Just f) (\_ => pure Nothing) export tryEither : (f : Parser a) -> Parser (Either ParseError a) tryEither f = try (map Right f) (pure . Left) - -||| Converts any errors thrown by `f` into silent backtracking within `Choose` -export -tryEmpty : (f : Parser a) -> Parser a -tryEmpty f = try f (\_ => empty) ``` Attempt to parse one of the given input parsers, in the provided order, invoking -the provided error action on failure. This will suppress any errors returned by -the input parsers by mapping them to `empty`. +the provided error action on failure. The state will not be modified when an input parser fails ```idris export -oneOfE : Foldable f => - (err : Parser a) -> f (Parser a) -> Parser a -oneOfE err xs = foldr altE err xs +oneOfE : (err : String) -> List (Parser a) -> Parser a +oneOfE err xs = do + start <- save + oneOfE' err start [] xs where - altE : Parser a -> Parser a -> Parser a - altE f rest = (tryEmpty f) `alt` rest + oneOfE' : (err : String) -> (start : ParserInternal Id) + -> (errs : List ParseError) -> List (Parser a) -> Parser a + oneOfE' err start errs [] = do + throw $ NestedErrors start err (reverse errs) + oneOfE' err start errs (x :: xs) = do + x <- tryEither x + case x of + Right val => pure val + Left error => oneOfE' err start (error :: errs) xs ``` Attempt to parse 0+ of an item From 46b591283d8fc3a1ba7724f7e8b769dd6a2609c1 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Fri, 24 Jan 2025 16:33:29 -0500 Subject: [PATCH 14/45] numbers: Create numbers module --- README.md | 4 ++++ advent.ipkg | 1 + src/Parser/Numbers.md | 5 +++++ 3 files changed, 10 insertions(+) create mode 100644 src/Parser/Numbers.md diff --git a/README.md b/README.md index 52d86cd..dc78c7c 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,10 @@ solution. Internal state of a parser + - [ParserState](src/Parser/ParserState.md) + + Parsers for numerical values in multiple bases + ## Index of years and days - 2015 diff --git a/advent.ipkg b/advent.ipkg index 56f6e75..e8aff07 100644 --- a/advent.ipkg +++ b/advent.ipkg @@ -32,6 +32,7 @@ modules = Runner , Array , Parser , Parser.Interface + , Parser.Numbers -- main file (i.e. file to load at REPL) main = Main diff --git a/src/Parser/Numbers.md b/src/Parser/Numbers.md new file mode 100644 index 0000000..29e6ece --- /dev/null +++ b/src/Parser/Numbers.md @@ -0,0 +1,5 @@ +# Numerical Parsers + +```idris +module Parser.Numbers +``` From 2b78275a4bf897fddcbc48df02e0bcb9603c90b6 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Fri, 24 Jan 2025 17:54:21 -0500 Subject: [PATCH 15/45] numbers: Basic module structure --- src/Parser/Numbers.md | 69 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/src/Parser/Numbers.md b/src/Parser/Numbers.md index 29e6ece..aa0e156 100644 --- a/src/Parser/Numbers.md +++ b/src/Parser/Numbers.md @@ -2,4 +2,73 @@ ```idris module Parser.Numbers + +import public Parser + +import Data.Vect +import Control.Eff +``` + +## Base Abstraction + +```idris +public export +record Base where + constructor MkBase + base : Nat + digits : Vect base Char +%name Base b + +export +hasDigit : Base -> Char -> Bool +hasDigit (MkBase base digits) c = any (== c) digits + +export +digitValue : Base -> Char -> Maybe Nat +digitValue (MkBase base digits) c = digitValue' digits 0 + where + digitValue' : Vect n Char -> (idx : Nat) -> Maybe Nat + digitValue' [] idx = Nothing + digitValue' (x :: xs) idx = + if x == c + then Just idx + else digitValue' xs (S idx) + +public export +base10 : Base +base10 = MkBase 10 + ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] + +public export +hex : Base +hex = MkBase 16 + ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'] +``` + +## Parsers + +### Nat + +```idris +export +nat : Parser Nat +``` + +## Unit tests + +Test roundtripping a value through the provided parser + +```idris +roundtrip : Eq a => Show a => a -> (p : Parser a) -> IO Bool +roundtrip x p = do + let string = show x + putStrLn "Roundtripping \{string}" + Just state <- newInternalIO string + | _ => do + putStrLn "Failed to produce parser for \{string}" + pure False + Right result <- runEff (rundownFirst p) [handleParserStateIO state] {m = IO} + | Left err => ?show_err + putStrLn "Input: \{string} Output: \{show result}" + pure $ x == result ``` From 82b16a0e63b57a84d94bfd6ea0b3d934a8e16d60 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Fri, 24 Jan 2025 18:09:20 -0500 Subject: [PATCH 16/45] numbers: Nat unit tests --- src/Parser/Numbers.md | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/Parser/Numbers.md b/src/Parser/Numbers.md index aa0e156..195570f 100644 --- a/src/Parser/Numbers.md +++ b/src/Parser/Numbers.md @@ -51,7 +51,11 @@ hex = MkBase 16 ```idris export -nat : Parser Nat +nat : Base -> Parser Nat + +export +natBase10 : Parser Nat +natBase10 = nat base10 ``` ## Unit tests @@ -68,7 +72,24 @@ roundtrip x p = do putStrLn "Failed to produce parser for \{string}" pure False Right result <- runEff (rundownFirst p) [handleParserStateIO state] {m = IO} - | Left err => ?show_err + | Left err => do + printLn err + pure False putStrLn "Input: \{string} Output: \{show result}" pure $ x == result ``` + +Do some roundtrip tests with the nat parser + +```idris +-- @@test Nat round trip +natRoundTrip : IO Bool +natRoundTrip = pure $ + !(roundtrip 0 natBase10) + && !(roundtrip 1 natBase10) + && !(roundtrip 100 natBase10) + && !(roundtrip 1234 natBase10) + && !(roundtrip 1234567890 natBase10) + && !(roundtrip 1234567890000 natBase10) + && !(roundtrip 12345678901234567890 natBase10) +``` From 1cc6bea78efd0cbf87f7036f8d82c038f18989cc Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Fri, 24 Jan 2025 22:18:09 -0500 Subject: [PATCH 17/45] numbers: Nat parser --- src/Parser/Numbers.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/Parser/Numbers.md b/src/Parser/Numbers.md index 195570f..008b649 100644 --- a/src/Parser/Numbers.md +++ b/src/Parser/Numbers.md @@ -9,6 +9,10 @@ import Data.Vect import Control.Eff ``` + + ## Base Abstraction ```idris @@ -52,6 +56,20 @@ hex = MkBase 16 ```idris export nat : Base -> Parser Nat +nat b = do + error <- replaceError "Expected digit" + (first ::: rest) <- atLeastOne error parseDigit + pure $ foldl (\acc, e => 10 * acc + e) first rest + where + parseDigit : Parser Nat + parseDigit = do + GotChar char <- parseChar (hasDigit b) id + | GotError e => throwParseError "\{show e} is not a digit" + | EndOfInput => throwParseError "End Of Input" + case digitValue b char of + Nothing => + throwParseError "Failed to parse as base \{show b.base}: \{show char}" + Just x => pure x export natBase10 : Parser Nat From 9220d4bbaca3667dbdd4c33a6ab31ec8edc22ff6 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Fri, 24 Jan 2025 22:39:06 -0500 Subject: [PATCH 18/45] numbers: Integer parser --- src/Parser/Numbers.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/Parser/Numbers.md b/src/Parser/Numbers.md index 008b649..d25343b 100644 --- a/src/Parser/Numbers.md +++ b/src/Parser/Numbers.md @@ -76,6 +76,23 @@ natBase10 : Parser Nat natBase10 = nat base10 ``` +### Integer + +```idris +export +integer : Base -> Parser Integer +integer b = do + negative <- map isJust . tryMaybe $ parseExactChar '-' + value <- map natToInteger $ nat b + if negative + then pure $ negate value + else pure $ value + +export +integerBase10 : Parser Integer +integerBase10 = integer base10 +``` + ## Unit tests Test roundtripping a value through the provided parser @@ -111,3 +128,22 @@ natRoundTrip = pure $ && !(roundtrip 1234567890000 natBase10) && !(roundtrip 12345678901234567890 natBase10) ``` + +```idris +-- @@test Integer round trip +integerRoundTrip : IO Bool +integerRoundTrip = pure $ + !(roundtrip 0 integerBase10) + && !(roundtrip 1 integerBase10) + && !(roundtrip 100 integerBase10) + && !(roundtrip 1234 integerBase10) + && !(roundtrip 1234567890 integerBase10) + && !(roundtrip 1234567890000 integerBase10) + && !(roundtrip 12345678901234567890 integerBase10) + && !(roundtrip (-1) integerBase10) + && !(roundtrip (-100) integerBase10) + && !(roundtrip (-1234) integerBase10) + && !(roundtrip (-1234567890) integerBase10) + && !(roundtrip (-1234567890000) integerBase10) + && !(roundtrip (-12345678901234567890) integerBase10) +``` From 026476dd91e237c91d5f6bafaf98a6b09a355029 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 00:00:52 -0500 Subject: [PATCH 19/45] numbers: Double Parser --- src/Parser/Numbers.md | 111 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/src/Parser/Numbers.md b/src/Parser/Numbers.md index d25343b..1f65bb5 100644 --- a/src/Parser/Numbers.md +++ b/src/Parser/Numbers.md @@ -93,6 +93,63 @@ integerBase10 : Parser Integer integerBase10 = integer base10 ``` +### Double + +```idris +export +double : Base -> Parser Double +double b = do + starting_state <- save + integer <- integer + fraction <- tryMaybe fraction + exponent <- tryMaybe exponent + let str = case (fraction, exponent) of + (Nothing, Nothing) => + integer + (Nothing, (Just exponent)) => + "\{integer}e\{exponent}" + ((Just fraction), Nothing) => + "\{integer}.\{fraction}" + ((Just fraction), (Just exponent)) => + "\{integer}.\{fraction}e\{exponent}" + Just out <- pure $ parseDouble str + | _ => + throw $ MkParseError starting_state "Std failed to parse as double: \{str}" + pure out + where + parseDigit : Parser Char + parseDigit = do + GotChar char <- parseChar (hasDigit b) id + | GotError e => throwParseError "\{show e} is not a digit" + | EndOfInput => throwParseError "End Of Input" + pure char + integer : Parser String + integer = do + sign <- tryMaybe $ parseExactChar '-' + error <- replaceError "Expected digit" + digits <- map forget $ atLeastOne error parseDigit + case sign of + Nothing => pure $ pack digits + Just x => pure $ pack (x :: digits) + fraction : Parser String + fraction = do + decimal <- parseExactChar '.' + error <- replaceError "Expected digit" + digits <- map forget $ atLeastOne error parseDigit + pure $ pack digits + exponent : Parser String + exponent = do + e <- parseTheseChars ['e', 'E'] + sign <- parseTheseChars ['+', '-'] + error <- replaceError "Expected digit" + digits <- map forget $ atLeastOne error parseDigit + pure . pack $ sign :: digits + +export +doubleBase10 : Parser Double +doubleBase10 = double base10 +``` + ## Unit tests Test roundtripping a value through the provided parser @@ -147,3 +204,57 @@ integerRoundTrip = pure $ && !(roundtrip (-1234567890000) integerBase10) && !(roundtrip (-12345678901234567890) integerBase10) ``` + +Compare our parsing of a double to the standard library's + +```idris +compareDouble : String -> IO Bool +compareDouble string = do + Just state <- newInternalIO string + | _ => do + putStrLn "Failed to produce parser for \{string}" + pure False + Right result <- + runEff (rundownFirst doubleBase10) [handleParserStateIO state] {m = IO} + | Left err => do + printLn err + pure False + putStrLn "Input: \{string} Output: \{show result}" + Just double' <- pure $ parseDouble string + | _ => do + printLn "Std failed to parse as double: \{string}" + pure False + pure $ result == double' +``` + +```idris +-- @@test Double Std Comparison +doubleRoundTrip : IO Bool +doubleRoundTrip = pure $ + !(compareDouble "0") + && !(compareDouble "1") + && !(compareDouble "100") + && !(compareDouble "1234") + && !(compareDouble "1234567890") + && !(compareDouble "1234567890000") + && !(compareDouble "12345678901234567890") + && !(compareDouble "-1") + && !(compareDouble "-100") + && !(compareDouble "-1234") + && !(compareDouble "-1234567890") + && !(compareDouble "-1234567890000") + && !(compareDouble "-12345678901234567890") + && !(compareDouble "0.0") + && !(compareDouble "1.0") + && !(compareDouble "-1.0") + && !(compareDouble "-0.0") + && !(compareDouble "-0.0") + && !(compareDouble "0.1234") + && !(compareDouble "0.01234") + && !(compareDouble "-0.1234") + && !(compareDouble "-0.01234") + && !(compareDouble "1.234e+5") + && !(compareDouble "1.234e-5") + && !(compareDouble "-1.234e+5") + && !(compareDouble "-1.234e-5") +``` From aacabb8b2254e1ba1e59a2e308a54866e7aacb6e Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 02:24:40 -0500 Subject: [PATCH 20/45] numbers: make double non base-sensitive --- src/Parser/Numbers.md | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/Parser/Numbers.md b/src/Parser/Numbers.md index 1f65bb5..fb7b780 100644 --- a/src/Parser/Numbers.md +++ b/src/Parser/Numbers.md @@ -96,9 +96,10 @@ integerBase10 = integer base10 ### Double ```idris +-- TODO: Replicate `parseDouble` logic and make this base-generic export -double : Base -> Parser Double -double b = do +double : Parser Double +double = do starting_state <- save integer <- integer fraction <- tryMaybe fraction @@ -119,7 +120,7 @@ double b = do where parseDigit : Parser Char parseDigit = do - GotChar char <- parseChar (hasDigit b) id + GotChar char <- parseChar (hasDigit base10) id | GotError e => throwParseError "\{show e} is not a digit" | EndOfInput => throwParseError "End Of Input" pure char @@ -144,10 +145,6 @@ double b = do error <- replaceError "Expected digit" digits <- map forget $ atLeastOne error parseDigit pure . pack $ sign :: digits - -export -doubleBase10 : Parser Double -doubleBase10 = double base10 ``` ## Unit tests @@ -215,7 +212,7 @@ compareDouble string = do putStrLn "Failed to produce parser for \{string}" pure False Right result <- - runEff (rundownFirst doubleBase10) [handleParserStateIO state] {m = IO} + runEff (rundownFirst double) [handleParserStateIO state] {m = IO} | Left err => do printLn err pure False From 906ffb78777be562912b05ad81ee7bd0a36b85d3 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 02:26:35 -0500 Subject: [PATCH 21/45] numbers: Fix readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index dc78c7c..fcbee4e 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ solution. Internal state of a parser - - [ParserState](src/Parser/ParserState.md) + - [Numbers](src/Parser/Numbers.md) Parsers for numerical values in multiple bases From b018967cb16d7daba27eae974bb4bb997986c91c Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 02:27:25 -0500 Subject: [PATCH 22/45] json: create module --- README.md | 4 ++++ advent.ipkg | 1 + src/Parser/JSON.md | 12 ++++++++++++ 3 files changed, 17 insertions(+) create mode 100644 src/Parser/JSON.md diff --git a/README.md b/README.md index fcbee4e..48644ce 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,10 @@ solution. Parsers for numerical values in multiple bases + - [JSON](src/Parser/JSON.md) + + JSON Parser + ## Index of years and days - 2015 diff --git a/advent.ipkg b/advent.ipkg index e8aff07..119b03c 100644 --- a/advent.ipkg +++ b/advent.ipkg @@ -33,6 +33,7 @@ modules = Runner , Parser , Parser.Interface , Parser.Numbers + , Parser.JSON -- main file (i.e. file to load at REPL) main = Main diff --git a/src/Parser/JSON.md b/src/Parser/JSON.md new file mode 100644 index 0000000..1f2377c --- /dev/null +++ b/src/Parser/JSON.md @@ -0,0 +1,12 @@ +# JSON Parser + +```idris +module Parser.JSON + +import public Parser +import public Parser.Numbers +``` + + From b70ed0e1478dfa0f53af55cc1d7a87ad8f3d3fb7 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 02:50:22 -0500 Subject: [PATCH 23/45] json: Define types, add sop --- advent.ipkg | 1 + 1 file changed, 1 insertion(+) diff --git a/advent.ipkg b/advent.ipkg index 119b03c..607e26e 100644 --- a/advent.ipkg +++ b/advent.ipkg @@ -19,6 +19,7 @@ depends = base , tailrec , eff , elab-util + , sop , ansi , if-unsolved-implicit , c-ffi From 79d56aeddd5578682bf15355d02dcd87056f98b7 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 03:16:52 -0500 Subject: [PATCH 24/45] json: Parser types --- src/Parser/JSON.md | 84 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/src/Parser/JSON.md b/src/Parser/JSON.md index 1f2377c..1b08b12 100644 --- a/src/Parser/JSON.md +++ b/src/Parser/JSON.md @@ -5,8 +5,92 @@ module Parser.JSON import public Parser import public Parser.Numbers + +import Structures.Dependent.DList ``` + +## JSON components + +Types a JSON value is allowed to have + +```idris +public export +data JSONType : Type where + TObject : JSONType + TArray : JSONType + TString : JSONType + TNumber : JSONType + TBool : JSONType + TNull : JSONType +%runElab derive "JSONType" [Generic, Meta, Eq, Ord, Show, DecEq] +%name JSONType type, type2, type3 +``` + +A JSON value indexed by its type + +```idris +public export +data JSONValue : JSONType -> Type where + VObject : {types : List JSONType} + -> DList JSONType (\t => (String, JSONValue t)) types -> JSONValue TObject + VArray : {types : List JSONType} + -> DList JSONType JSONValue types -> JSONValue TArray + VString : (s : String) -> JSONValue TString + VNumber : (d : Double) -> JSONValue TNumber + VBool : (b : Bool) -> JSONValue TBool + VNull : JSONValue TNull +%name JSONValue value, value2, value3 +``` + + + +## Parsers + +We are going to get mutually recursive here. Instead of using a `mutual` block, +we will use the more modern style of declaring all our types ahead of our +definitions. + +```idris +export +object : Parser (JSONValue TObject) + +export +array : Parser (JSONValue TArray) + +export +string : Parser (JSONValue TString) + +export +number : Parser (JSONValue TNumber) + +export +bool : Parser (JSONValue TBool) + +export +null : Parser (JSONValue TNull) +``` From 38e259fd13c7bc813342d336a1f0d1eb5de28b80 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 04:06:17 -0500 Subject: [PATCH 25/45] json: Object and array --- src/Parser/JSON.md | 114 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 109 insertions(+), 5 deletions(-) diff --git a/src/Parser/JSON.md b/src/Parser/JSON.md index 1b08b12..faca235 100644 --- a/src/Parser/JSON.md +++ b/src/Parser/JSON.md @@ -67,6 +67,8 @@ Show (JSONValue t) where show (VBool False) = "false" show (VBool True) = "true" show VNull = "null" + +%hide Language.Reflection.TT.WithFC.value --> ## Parsers @@ -78,19 +80,121 @@ definitions. ```idris export object : Parser (JSONValue TObject) - export array : Parser (JSONValue TArray) - export string : Parser (JSONValue TString) - export number : Parser (JSONValue TNumber) - export bool : Parser (JSONValue TBool) - export null : Parser (JSONValue TNull) ``` + +Define a `whitespace` character class based on the json specifications + +```idris +whitespace : Parser Char +whitespace = do + result <- + parseChar (\x => any (== x) $ the (List _) [' ', '\n', '\r', '\t']) id + case result of + GotChar char => pure char + GotError err => throwParseError "Expected whitespace, got: \{show err}" + EndOfInput => throwParseError "End of Input" +``` + +Convenience function + +```idris +dpairize : {t : JSONType} -> + Parser (JSONValue t) -> Parser (t' : JSONType ** JSONValue t') +dpairize x = do + x <- x + pure (_ ** x) +``` + +Top level json value parser + +```idris +export +value : Parser (t : JSONType ** JSONValue t) +value = do + _ <- many whitespace + val <- oneOfE + (throwParseError "Expected JSON Value") + (the (List _) + [ + dpairize object + , dpairize array + , dpairize string + , dpairize number + , dpairize bool + , dpairize null + ]) + _ <- many whitespace + pure val +``` + +Now go through our json value types + +```idris +object = do + oneOfE + (throwParseError "Expected Object") + (the (List _) [emptyObject, occupiedObject]) + where + emptyObject : Parser (JSONValue TObject) + emptyObject = do + _ <- parseExactChar '{' + _ <- many whitespace + _ <- parseExactChar '}' + pure $ VObject Nil + firstKeyValue : Parser (t : JSONType ** (String, JSONValue t)) + firstKeyValue = do + _ <- many whitespace + VString key <- string + _ <- many whitespace + _ <- parseExactChar ':' + (typ ** val) <- value + pure (typ ** (key, val)) + restKeyValue : Parser (t : JSONType ** (String, JSONValue t)) + restKeyValue = do + _ <- parseExactChar ',' + firstKeyValue + occupiedObject : Parser (JSONValue TObject) + occupiedObject = do + _ <- parseExactChar '{' + first <- firstKeyValue + rest <- many restKeyValue + _ <- parseExactChar '}' + let (types ** xs) = DList.fromList (first :: rest) + pure $ VObject xs +``` + +```idris +array = do + oneOfE + (throwParseError "Expected Array") + (the (List _) [emptyArray, occupiedArray]) + where + emptyArray : Parser (JSONValue TArray) + emptyArray = do + _ <- parseExactChar '[' + _ <- many whitespace + _ <- parseExactChar ']' + pure $ VArray Nil + restValue : Parser (t : JSONType ** JSONValue t) + restValue = do + _ <- parseExactChar ',' + value + occupiedArray : Parser (JSONValue TArray) + occupiedArray = do + _ <- parseExactChar '[' + first <- value + rest <- many restValue + _ <- parseExactChar ']' + let (types ** xs) = DList.fromList (first :: rest) + pure $ VArray xs +``` From 3ad023ef6a688d5b6ded1437476379a584f0afc7 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 04:39:25 -0500 Subject: [PATCH 26/45] json: Janky string --- src/Parser/JSON.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/Parser/JSON.md b/src/Parser/JSON.md index faca235..453177a 100644 --- a/src/Parser/JSON.md +++ b/src/Parser/JSON.md @@ -198,3 +198,14 @@ array = do let (types ** xs) = DList.fromList (first :: rest) pure $ VArray xs ``` + +```idris +string = do + _ <- parseExactChar '"' + -- TODO: Handle control characters properly + e1 <- parseError "Expected non-quote, got quote" + e2 <- parseError "End of input" + contents <- parseString . many $ parseCharE (not . (== '"')) (\_ => e1) e2 + _ <- parseExactChar '"' + pure $ VString contents +``` From 370bb18c067f90ce513f4ca659c79b0a4bde522a Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 05:04:54 -0500 Subject: [PATCH 27/45] json: number --- src/Parser/JSON.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Parser/JSON.md b/src/Parser/JSON.md index 453177a..c4d987d 100644 --- a/src/Parser/JSON.md +++ b/src/Parser/JSON.md @@ -209,3 +209,9 @@ string = do _ <- parseExactChar '"' pure $ VString contents ``` + +```idris +number = do + d <- double + pure $ VNumber d +``` From 19ce8ac79897c8a7ccdfe00fc6f312a352da6f55 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 05:09:24 -0500 Subject: [PATCH 28/45] json: Bool and null --- src/Parser/JSON.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/Parser/JSON.md b/src/Parser/JSON.md index c4d987d..7e83fd7 100644 --- a/src/Parser/JSON.md +++ b/src/Parser/JSON.md @@ -215,3 +215,25 @@ number = do d <- double pure $ VNumber d ``` + +```idris +bool = do + oneOfE + (throwParseError "Expected Bool") + (the (List _) [true, false]) + where + true : Parser (JSONValue TBool) + true = do + _ <- exactString "true" + pure $ VBool True + false : Parser (JSONValue TBool) + false = do + _ <- exactString "false" + pure $ VBool False +``` + +```idris +null = do + _ <- exactString "null" + pure VNull +``` From aa1ae931650392fb053be27a39b4d52b8c09f561 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 13:16:51 -0500 Subject: [PATCH 29/45] json: object refactor --- src/Parser/JSON.md | 51 ++++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/src/Parser/JSON.md b/src/Parser/JSON.md index 7e83fd7..1f1ccde 100644 --- a/src/Parser/JSON.md +++ b/src/Parser/JSON.md @@ -121,8 +121,7 @@ Top level json value parser export value : Parser (t : JSONType ** JSONValue t) value = do - _ <- many whitespace - val <- oneOfE + surround whitespace $ oneOfE (throwParseError "Expected JSON Value") (the (List _) [ @@ -133,8 +132,6 @@ value = do , dpairize bool , dpairize null ]) - _ <- many whitespace - pure val ``` Now go through our json value types @@ -147,29 +144,27 @@ object = do where emptyObject : Parser (JSONValue TObject) emptyObject = do - _ <- parseExactChar '{' - _ <- many whitespace - _ <- parseExactChar '}' + delimited '{' '}' (nom whitespace) pure $ VObject Nil - firstKeyValue : Parser (t : JSONType ** (String, JSONValue t)) - firstKeyValue = do - _ <- many whitespace - VString key <- string - _ <- many whitespace + keyValue : Parser (t : JSONType ** (String, JSONValue t)) + keyValue = do + VString key <- surround whitespace string _ <- parseExactChar ':' (typ ** val) <- value pure (typ ** (key, val)) restKeyValue : Parser (t : JSONType ** (String, JSONValue t)) restKeyValue = do _ <- parseExactChar ',' - firstKeyValue + keyValue + pairs : Parser (List1 (t : JSONType ** (String, JSONValue t))) + pairs = do + first <- keyValue + rest <- many restKeyValue + pure $ first ::: rest occupiedObject : Parser (JSONValue TObject) occupiedObject = do - _ <- parseExactChar '{' - first <- firstKeyValue - rest <- many restKeyValue - _ <- parseExactChar '}' - let (types ** xs) = DList.fromList (first :: rest) + val <- delimited '{' '}' pairs + let (types ** xs) = DList.fromList (forget val) pure $ VObject xs ``` @@ -237,3 +232,23 @@ null = do _ <- exactString "null" pure VNull ``` + +## Unit tests + +Quick smoke test + +```idris +-- @@test JSON Quick Smoke +quickSmoke : IO Bool +quickSmoke = do + let input = "{\"string\":\"string\",\"number\":5,\"true\":true,\"false\":false,\"null\":null,\"array\":[1,2,3]}" + putStrLn input + Right parsed <- runFirstIO object input + | Left err => do + printLn err + pure False + putStrLn "Input: \{input}\nOutput: \{show parsed}" + case parsed of + VObject xs => ?quickSmoke_rhs_0 + _ => pure False +``` From 77dcc4d95329648f406702ab4fc8e91057bc2b52 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 13:37:20 -0500 Subject: [PATCH 30/45] json: oneOfM refactor --- src/Parser/JSON.md | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/Parser/JSON.md b/src/Parser/JSON.md index 1f1ccde..50c24a2 100644 --- a/src/Parser/JSON.md +++ b/src/Parser/JSON.md @@ -122,16 +122,15 @@ export value : Parser (t : JSONType ** JSONValue t) value = do surround whitespace $ oneOfE - (throwParseError "Expected JSON Value") - (the (List _) - [ - dpairize object - , dpairize array - , dpairize string - , dpairize number - , dpairize bool - , dpairize null - ]) + "Expected JSON Value" + [ + dpairize object + , dpairize array + , dpairize string + , dpairize number + , dpairize bool + , dpairize null + ] ``` Now go through our json value types @@ -139,8 +138,8 @@ Now go through our json value types ```idris object = do oneOfE - (throwParseError "Expected Object") - (the (List _) [emptyObject, occupiedObject]) + "Expected Object" + [emptyObject, occupiedObject] where emptyObject : Parser (JSONValue TObject) emptyObject = do @@ -171,8 +170,8 @@ object = do ```idris array = do oneOfE - (throwParseError "Expected Array") - (the (List _) [emptyArray, occupiedArray]) + "Expected Array" + [emptyArray, occupiedArray] where emptyArray : Parser (JSONValue TArray) emptyArray = do @@ -214,8 +213,8 @@ number = do ```idris bool = do oneOfE - (throwParseError "Expected Bool") - (the (List _) [true, false]) + "Expected Bool" + [true, false] where true : Parser (JSONValue TBool) true = do From a8c390166561ec78856d27779bc9c4167223a3c0 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 13:39:26 -0500 Subject: [PATCH 31/45] json: Show Fix --- src/Parser/JSON.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parser/JSON.md b/src/Parser/JSON.md index 50c24a2..7441ed6 100644 --- a/src/Parser/JSON.md +++ b/src/Parser/JSON.md @@ -61,7 +61,7 @@ Show (JSONValue t) where in assert_total $ "{\{joinBy "," xs}}" show (VArray xs) = let xs = dMap (\_,e => show e) xs - in assert_total $ "[\{joinBy "," . map show $ xs}]" + in assert_total $ "[\{joinBy "," xs}]" show (VString s) = "\"\{s}\"" show (VNumber d) = show d show (VBool False) = "false" From a3c7729ab21af5f12b611f5ca25d19a0ef71a1d1 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 13:58:44 -0500 Subject: [PATCH 32/45] json: Smoke test --- src/Parser/JSON.md | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/src/Parser/JSON.md b/src/Parser/JSON.md index 7441ed6..c69ea2c 100644 --- a/src/Parser/JSON.md +++ b/src/Parser/JSON.md @@ -68,6 +68,16 @@ Show (JSONValue t) where show (VBool True) = "true" show VNull = "null" +Eq (JSONValue t) where + (VObject xs) == (VObject ys) = + assert_total $ xs $== ys + (VArray xs) == (VArray ys) = + assert_total $ xs $== ys + (VString s) == (VString str) = s == str + (VNumber d) == (VNumber dbl) = d == dbl + (VBool b) == (VBool x) = b == x + VNull == VNull = True + %hide Language.Reflection.TT.WithFC.value --> @@ -242,12 +252,30 @@ quickSmoke : IO Bool quickSmoke = do let input = "{\"string\":\"string\",\"number\":5,\"true\":true,\"false\":false,\"null\":null,\"array\":[1,2,3]}" putStrLn input - Right parsed <- runFirstIO object input + Right (type ** parsed) <- runFirstIO value input | Left err => do printLn err pure False - putStrLn "Input: \{input}\nOutput: \{show parsed}" + putStrLn "Input: \{input}\nOutput: \{show type} -> \{show parsed}" case parsed of - VObject xs => ?quickSmoke_rhs_0 + VObject {types} xs => do + putStrLn "Output types: \{show types}" + case decEq types [TString, TNumber, TBool, TBool, TNull, TArray] of + No contra => pure False + Yes Refl => case xs of + [ ("string", str) + , ("number", num) + , ("true", bool_t) + , ("false", bool_f) + , ("null", nul) + , ("array", arr) + ] => pure $ + str == VString "string" + && num == VNumber 5.0 + && bool_t == VBool True + && bool_f == VBool False + && nul == VNull + && arr == VArray [VNumber 1.0, VNumber 2.0, VNumber 3.0] + _ => pure False _ => pure False ``` From 91e1d2c9b17f14ce5d4415032362e84a29ea436d Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sat, 25 Jan 2025 14:21:53 -0500 Subject: [PATCH 33/45] json: More refactor --- src/Parser/JSON.md | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/Parser/JSON.md b/src/Parser/JSON.md index c69ea2c..48ecdbe 100644 --- a/src/Parser/JSON.md +++ b/src/Parser/JSON.md @@ -185,21 +185,25 @@ array = do where emptyArray : Parser (JSONValue TArray) emptyArray = do - _ <- parseExactChar '[' - _ <- many whitespace - _ <- parseExactChar ']' + delimited '[' ']' (nom whitespace) pure $ VArray Nil restValue : Parser (t : JSONType ** JSONValue t) restValue = do _ <- parseExactChar ',' value + values : Parser (List1 (t : JSONType ** JSONValue t)) + values = do + first <- value + rest <- many restValue + pure $ first ::: rest occupiedArray : Parser (JSONValue TArray) occupiedArray = do _ <- parseExactChar '[' - first <- value - rest <- many restValue + xs <- values _ <- parseExactChar ']' - let (types ** xs) = DList.fromList (first :: rest) + -- TODO: Why is this busted? + -- xs <- delimited '[' ']' values + let (types ** xs) = DList.fromList (forget xs) pure $ VArray xs ``` From 4fb5707b25abef3ddc079eba204cd323dc07f1f2 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sun, 26 Jan 2025 15:40:15 -0500 Subject: [PATCH 34/45] json: Refactor string parser --- src/Parser/JSON.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/Parser/JSON.md b/src/Parser/JSON.md index 48ecdbe..2634d83 100644 --- a/src/Parser/JSON.md +++ b/src/Parser/JSON.md @@ -209,13 +209,15 @@ array = do ```idris string = do - _ <- parseExactChar '"' - -- TODO: Handle control characters properly - e1 <- parseError "Expected non-quote, got quote" - e2 <- parseError "End of input" - contents <- parseString . many $ parseCharE (not . (== '"')) (\_ => e1) e2 - _ <- parseExactChar '"' - pure $ VString contents + str <- parseString $ delimited '"' '"' (many stringCharacter) + pure $ VString str + where + -- TODO: Handle control characters properly + stringCharacter : Parser Char + stringCharacter = do + e1 <- parseError "Expected non-quote, got quote" + e2 <- parseError "End of input" + parseCharE (not . (== '"')) (\_ => e1) e2 ``` ```idris From 5a47d5548c3ac23036999e51adf999ed11518394 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sun, 26 Jan 2025 15:47:02 -0500 Subject: [PATCH 35/45] json: Clean up json smoke test --- src/Parser/JSON.md | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/src/Parser/JSON.md b/src/Parser/JSON.md index 2634d83..f09c8dd 100644 --- a/src/Parser/JSON.md +++ b/src/Parser/JSON.md @@ -68,6 +68,7 @@ Show (JSONValue t) where show (VBool True) = "true" show VNull = "null" +-- TODO: Deal with keys potentially having different orders in different objects Eq (JSONValue t) where (VObject xs) == (VObject ys) = assert_total $ xs $== ys @@ -263,25 +264,20 @@ quickSmoke = do printLn err pure False putStrLn "Input: \{input}\nOutput: \{show type} -> \{show parsed}" - case parsed of - VObject {types} xs => do - putStrLn "Output types: \{show types}" - case decEq types [TString, TNumber, TBool, TBool, TNull, TArray] of - No contra => pure False - Yes Refl => case xs of - [ ("string", str) - , ("number", num) - , ("true", bool_t) - , ("false", bool_f) - , ("null", nul) - , ("array", arr) - ] => pure $ - str == VString "string" - && num == VNumber 5.0 - && bool_t == VBool True - && bool_f == VBool False - && nul == VNull - && arr == VArray [VNumber 1.0, VNumber 2.0, VNumber 3.0] - _ => pure False + let reference_object = + VObject [ + ("string", VString "string") + , ("number", VNumber 5.0) + , ("true", VBool True) + , ("false", VBool False) + , ("null", VNull) + , ("array", VArray [ + VNumber 1.0 + , VNumber 2.0 + , VNumber 3.0 + ]) + ] + case type of + TObject => pure $ parsed == reference_object _ => pure False ``` From 06c4c8a9cfe8b3af5086a3cce0a4f90ad99e3e5e Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sun, 26 Jan 2025 19:48:26 -0500 Subject: [PATCH 36/45] core: Debug wrapper for handleParserIO --- src/Parser/Interface.md | 7 +++++++ src/Parser/ParserState.md | 41 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/src/Parser/Interface.md b/src/Parser/Interface.md index 4656469..0aced22 100644 --- a/src/Parser/Interface.md +++ b/src/Parser/Interface.md @@ -116,6 +116,13 @@ runFirstIO f str = do | _ => pure . Left $ BeforeParse "Empty input" runEff (rundownFirst f) [handleParserStateIO state] +export +runFirstIODebug : (f : Parser a) -> String -> IO (Either ParseError a) +runFirstIODebug f str = do + Just state <- newInternalIO str + | _ => pure . Left $ BeforeParse "Empty input" + runEff (rundownFirst f) [handleParserStateIODebug state] + export runFirst : (f : Parser a) -> String -> Eff fs (Either ParseError a) runFirst f str = do diff --git a/src/Parser/ParserState.md b/src/Parser/ParserState.md index 2a06858..be4a783 100644 --- a/src/Parser/ParserState.md +++ b/src/Parser/ParserState.md @@ -12,6 +12,9 @@ import public Data.Refined.Int64 import public Data.SortedMap import public Data.IORef +import Data.Primitives.Interpolation +import System.File + import public Control.Eff ``` @@ -49,11 +52,17 @@ record Index (length : Int64) where ``` Stores the location we are currently at in the string, and metadata about it for @@ -147,6 +156,17 @@ positionPair pi = in (linum, col) ``` + + ### More Barbie Functionality Provide the barbie analogs of `map` and `traverse` for our `ParserInternal` @@ -195,6 +215,14 @@ data ParserState : Type -> Type where ParseEoF : ParserState Bool ``` + + ### Actions ```idris @@ -281,6 +309,19 @@ newInternalIO str = do map Just $ newIORef internal ``` +Wrapper with debugging output + +```idris +export +handleParserStateIODebug : HasIO io => + IORef (ParserInternal IORef) -> ParserState t -> io t +handleParserStateIODebug x y = do + state <- readIORef x + state <- btraverse readIORef state + _ <- fPutStrLn stderr "\{show y} -> \{show state}" + handleParserStateIO x y +``` + ### State context ```idris From da182e813f8c472f5ed8650c987c639fcf32a8a3 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sun, 26 Jan 2025 19:51:46 -0500 Subject: [PATCH 37/45] json: Use debuging runner in tests --- src/Parser/JSON.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parser/JSON.md b/src/Parser/JSON.md index f09c8dd..3fa005e 100644 --- a/src/Parser/JSON.md +++ b/src/Parser/JSON.md @@ -259,7 +259,7 @@ quickSmoke : IO Bool quickSmoke = do let input = "{\"string\":\"string\",\"number\":5,\"true\":true,\"false\":false,\"null\":null,\"array\":[1,2,3]}" putStrLn input - Right (type ** parsed) <- runFirstIO value input + Right (type ** parsed) <- runFirstIODebug value input | Left err => do printLn err pure False From da44cf72cf45dbd3e6a80cfce96c9701ac769eda Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sun, 26 Jan 2025 20:04:14 -0500 Subject: [PATCH 38/45] core: Notes --- src/Parser/Interface.md | 10 +++++++++- src/Parser/ParserState.md | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/Parser/Interface.md b/src/Parser/Interface.md index 0aced22..18df77a 100644 --- a/src/Parser/Interface.md +++ b/src/Parser/Interface.md @@ -235,6 +235,7 @@ Attempt to parse a specified character export parseExactChar : Char -> Parser Char parseExactChar c = do + pnote "Parsing exact char: \{show c}" result <- parseChar (== c) id case result of GotChar char => pure char @@ -248,6 +249,7 @@ Attempt to parse one of a list of chars export parseTheseChars : List Char -> Parser Char parseTheseChars cs = do + pnote "Parsing one of: \{show cs}" result <- parseChar (\x => any (== x) cs) id case result of GotChar char => pure char @@ -261,8 +263,11 @@ Attempt to parse an exact string export exactString : String -> Parser String exactString str with (asList str) - exactString "" | [] = pure "" + exactString "" | [] = do + pnote "Parsing the empty string" + pure "" exactString input@(strCons c str) | (c :: x) = do + pnote "Parsing exact string \{show input}" GotChar next <- parseChar (== c) id | GotError err => throwParseError "Got \{show err} expected \{show c}" | EndOfInput => throwParseError "End of input" @@ -276,6 +281,7 @@ Wrap a parser in delimiter characters, discarding the value of the delimiters export delimited : (before, after : Char) -> Parser a -> Parser a delimited before after x = do + pnote "Parsing delimited by \{show before} \{show after}" starting_state <- save _ <- parseExactChar before val <- x @@ -292,12 +298,14 @@ result. Also a version for doing so on both sides of a provided parser export nom : Parser Char -> Parser () nom x = do + pnote "Nomming" _ <- many x pure () export surround : (around : Parser Char) -> (item : Parser a) -> Parser a surround around item = do + pnote "Surrounding" nom around val <- item nom around diff --git a/src/Parser/ParserState.md b/src/Parser/ParserState.md index be4a783..fb7c481 100644 --- a/src/Parser/ParserState.md +++ b/src/Parser/ParserState.md @@ -213,6 +213,7 @@ data ParserState : Type -> Type where ParseChar : (predicate : Char -> Bool) -> (err : Char -> e) -> ParserState (ParseCharResult e) ParseEoF : ParserState Bool + Note : Lazy String -> ParserState () ``` ### Actions @@ -262,6 +264,11 @@ parseCharE predicate err eof = do export parseEoF : Has ParserState fs => Eff fs Bool parseEoF = send ParseEoF + +||| Make a note to be output when running with a debug handler +export +pnote : Has ParserState fs => Lazy String -> Eff fs () +pnote x = send $ Note x ``` ## Handling a ParserState @@ -299,6 +306,8 @@ handleParserStateIO pi (ParseChar predicate err) = do handleParserStateIO pi ParseEoF = do pi <- readIORef pi readIORef pi.end_of_input +-- We ignore notes in non-debug mode +handleParserStateIO pi (Note _) = pure () export newInternalIO : HasIO io => String -> io $ Maybe (IORef (ParserInternal IORef)) @@ -315,6 +324,11 @@ Wrapper with debugging output export handleParserStateIODebug : HasIO io => IORef (ParserInternal IORef) -> ParserState t -> io t +handleParserStateIODebug x (Note note) = do + state <- readIORef x + state <- btraverse readIORef state + _ <- fPutStrLn stderr "Note \{note} -> \{show state}" + pure () handleParserStateIODebug x y = do state <- readIORef x state <- btraverse readIORef state @@ -341,6 +355,7 @@ unPS pi (ParseChar predicate err) = (GotChar char, {position := MkIndex next} pi) else (GotError $ err char, pi) unPS pi ParseEoF = (pi.end_of_input, pi) +unPS pi (Note _) = ((), pi) export runParserState : Has ParserState fs => From 2e4ab42aa00eca81332d14145a20bb281ca22af4 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sun, 26 Jan 2025 20:06:53 -0500 Subject: [PATCH 39/45] json: Notes --- src/Parser/JSON.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Parser/JSON.md b/src/Parser/JSON.md index 3fa005e..87af1dc 100644 --- a/src/Parser/JSON.md +++ b/src/Parser/JSON.md @@ -108,6 +108,7 @@ Define a `whitespace` character class based on the json specifications ```idris whitespace : Parser Char whitespace = do + pnote "Whitespace character" result <- parseChar (\x => any (== x) $ the (List _) [' ', '\n', '\r', '\t']) id case result of @@ -132,6 +133,7 @@ Top level json value parser export value : Parser (t : JSONType ** JSONValue t) value = do + pnote "JSON Value" surround whitespace $ oneOfE "Expected JSON Value" [ @@ -148,6 +150,7 @@ Now go through our json value types ```idris object = do + pnote "JSON Object" oneOfE "Expected Object" [emptyObject, occupiedObject] @@ -180,6 +183,7 @@ object = do ```idris array = do + pnote "JSON Array" oneOfE "Expected Array" [emptyArray, occupiedArray] @@ -210,6 +214,7 @@ array = do ```idris string = do + pnote "JSON String" str <- parseString $ delimited '"' '"' (many stringCharacter) pure $ VString str where @@ -223,12 +228,14 @@ string = do ```idris number = do + pnote "JSON Number" d <- double pure $ VNumber d ``` ```idris bool = do + pnote "JSON Bool" oneOfE "Expected Bool" [true, false] @@ -245,6 +252,7 @@ bool = do ```idris null = do + pnote "JSON null" _ <- exactString "null" pure VNull ``` From b1a4e1a941b648ceadcc7560767646ddc2c3b15f Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sun, 26 Jan 2025 20:51:00 -0500 Subject: [PATCH 40/45] core: Remove type argument from ParseCharResult --- src/Parser/Interface.md | 6 +++--- src/Parser/ParserState.md | 42 +++++++++++++-------------------------- 2 files changed, 17 insertions(+), 31 deletions(-) diff --git a/src/Parser/Interface.md b/src/Parser/Interface.md index 18df77a..ddf26c5 100644 --- a/src/Parser/Interface.md +++ b/src/Parser/Interface.md @@ -236,7 +236,7 @@ export parseExactChar : Char -> Parser Char parseExactChar c = do pnote "Parsing exact char: \{show c}" - result <- parseChar (== c) id + result <- parseChar (== c) case result of GotChar char => pure char GotError err => throwParseError "Got \{show err} Expected \{show c}" @@ -250,7 +250,7 @@ export parseTheseChars : List Char -> Parser Char parseTheseChars cs = do pnote "Parsing one of: \{show cs}" - result <- parseChar (\x => any (== x) cs) id + result <- parseChar (\x => any (== x) cs) case result of GotChar char => pure char GotError err => throwParseError "Got \{show err} Expected one of \{show cs}" @@ -268,7 +268,7 @@ exactString str with (asList str) pure "" exactString input@(strCons c str) | (c :: x) = do pnote "Parsing exact string \{show input}" - GotChar next <- parseChar (== c) id + GotChar next <- parseChar (== c) | GotError err => throwParseError "Got \{show err} expected \{show c}" | EndOfInput => throwParseError "End of input" rest <- exactString str | x diff --git a/src/Parser/ParserState.md b/src/Parser/ParserState.md index fb7c481..703e5a7 100644 --- a/src/Parser/ParserState.md +++ b/src/Parser/ParserState.md @@ -195,10 +195,10 @@ btraverse fun (MkInternal input length line_starts position end_of_input) = ```idris ||| Three way result returned from attempting to parse a single char public export -data ParseCharResult : Type -> Type where - GotChar : (char : Char) -> ParseCharResult e - GotError : (err : e) -> ParseCharResult e - EndOfInput : ParseCharResult e +data ParseCharResult : Type where + GotChar : (char : Char) -> ParseCharResult + GotError : (err : Char) -> ParseCharResult + EndOfInput : ParseCharResult ``` ## The Effect Type @@ -210,8 +210,7 @@ data ParserState : Type -> Type where Load : (ParserInternal Id) -> ParserState () -- TODO: Maybe add a ParseString that parses a string of characters as a -- string using efficent slicing? - ParseChar : (predicate : Char -> Bool) -> (err : Char -> e) - -> ParserState (ParseCharResult e) + ParseChar : (predicate : Char -> Bool) -> ParserState ParseCharResult ParseEoF : ParserState Bool Note : Lazy String -> ParserState () ``` @@ -220,7 +219,7 @@ data ParserState : Type -> Type where Show (ParserState t) where show Save = "Saving state" show (Load pi) = "Loading state \{show pi}" - show (ParseChar predicate err) = "Parsing char" + show (ParseChar predicate) = "Parsing char" show ParseEoF = "Parsing EoF" show (Note _) = "Note" --> @@ -242,22 +241,9 @@ load pi = send $ Load pi ||| predicate, updates the state if parsing succeeds, does not alter it in an ||| error condition. export -parseChar : Has ParserState fs => (predicate : Char -> Bool) -> (err : Char -> e) - -> Eff fs (ParseCharResult e) -parseChar predicate err = send $ ParseChar predicate err - -||| Wrapper for parseChar that folds the error message into effect stack with the -||| provided callback -export -parseCharE : Has ParserState fs => Has (Except e) fs => - (predicate : Char -> Bool) -> (err : Char -> e) -> (eof : Lazy e) - -> Eff fs Char -parseCharE predicate err eof = do - result <- parseChar predicate id - case result of - GotChar char => pure char - GotError x => throw $ err x - EndOfInput => throw eof +parseChar : Has ParserState fs => (predicate : Char -> Bool) + -> Eff fs ParseCharResult +parseChar predicate = send $ ParseChar predicate ||| "Parse" the end of input, returning `True` if the parser state is currently ||| at the end of the input @@ -285,14 +271,14 @@ handleParserStateIO pi Save = do handleParserStateIO pi (Load pj) = do pj <- btraverse newIORef pj writeIORef pi pj -handleParserStateIO pi (ParseChar predicate err) = do +handleParserStateIO pi (ParseChar predicate) = do pi <- readIORef pi False <- readIORef pi.end_of_input - | _ => pure $ EndOfInput + | _ => pure EndOfInput position <- readIORef pi.position let char = assert_total $ strIndex pi.input (cast position.index) True <- pure $ predicate char - | _ => pure . GotError $ err char + | _ => pure $ GotError char -- Our refinement type on the position forces us to check that the length is -- in bounds after incrementing it, if its out of bounds, set the end_of_input -- flag @@ -342,7 +328,7 @@ handleParserStateIODebug x y = do unPS : ParserInternal Id -> ParserState a -> (a, ParserInternal Id) unPS pi Save = (pi, pi) unPS pi (Load pj) = ((), pi) -unPS pi (ParseChar predicate err) = +unPS pi (ParseChar predicate) = if pi.end_of_input then (EndOfInput, pi) else let @@ -353,7 +339,7 @@ unPS pi (ParseChar predicate err) = (GotChar char, {end_of_input := True} pi) Just (Element next _) => (GotChar char, {position := MkIndex next} pi) - else (GotError $ err char, pi) + else (GotError char, pi) unPS pi ParseEoF = (pi.end_of_input, pi) unPS pi (Note _) = ((), pi) From 40251a145539f64effbdb27ccedc33da2a5b906a Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sun, 26 Jan 2025 20:51:10 -0500 Subject: [PATCH 41/45] numbers: ParseCharResult refactor --- src/Parser/Numbers.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parser/Numbers.md b/src/Parser/Numbers.md index fb7b780..0c069ec 100644 --- a/src/Parser/Numbers.md +++ b/src/Parser/Numbers.md @@ -63,7 +63,7 @@ nat b = do where parseDigit : Parser Nat parseDigit = do - GotChar char <- parseChar (hasDigit b) id + GotChar char <- parseChar (hasDigit b) | GotError e => throwParseError "\{show e} is not a digit" | EndOfInput => throwParseError "End Of Input" case digitValue b char of @@ -120,7 +120,7 @@ double = do where parseDigit : Parser Char parseDigit = do - GotChar char <- parseChar (hasDigit base10) id + GotChar char <- parseChar (hasDigit base10) | GotError e => throwParseError "\{show e} is not a digit" | EndOfInput => throwParseError "End Of Input" pure char From 08a2f263bba4372a80c217476852f4084fa8b186 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sun, 26 Jan 2025 20:51:24 -0500 Subject: [PATCH 42/45] json: parse char result --- src/Parser/JSON.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Parser/JSON.md b/src/Parser/JSON.md index 87af1dc..b990e54 100644 --- a/src/Parser/JSON.md +++ b/src/Parser/JSON.md @@ -110,7 +110,7 @@ whitespace : Parser Char whitespace = do pnote "Whitespace character" result <- - parseChar (\x => any (== x) $ the (List _) [' ', '\n', '\r', '\t']) id + parseChar (\x => any (== x) $ the (List _) [' ', '\n', '\r', '\t']) case result of GotChar char => pure char GotError err => throwParseError "Expected whitespace, got: \{show err}" @@ -221,9 +221,12 @@ string = do -- TODO: Handle control characters properly stringCharacter : Parser Char stringCharacter = do - e1 <- parseError "Expected non-quote, got quote" - e2 <- parseError "End of input" - parseCharE (not . (== '"')) (\_ => e1) e2 + result <- parseChar (not . (== '"')) + case result of + GotChar char => pure char + GotError err => + throwParseError "Expected string character, got \{show err}" + EndOfInput => throwParseError "Unexpected end of input" ``` ```idris From 93d4d876d9eb55a6c435aec8798e45d5f5fb6457 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sun, 26 Jan 2025 21:06:22 -0500 Subject: [PATCH 43/45] core: Factor parseExactChar into the effect --- src/Parser/Interface.md | 3 +-- src/Parser/ParserState.md | 11 +++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/Parser/Interface.md b/src/Parser/Interface.md index ddf26c5..d9c2db2 100644 --- a/src/Parser/Interface.md +++ b/src/Parser/Interface.md @@ -235,8 +235,7 @@ Attempt to parse a specified character export parseExactChar : Char -> Parser Char parseExactChar c = do - pnote "Parsing exact char: \{show c}" - result <- parseChar (== c) + result <- parseExactChar' c case result of GotChar char => pure char GotError err => throwParseError "Got \{show err} Expected \{show c}" diff --git a/src/Parser/ParserState.md b/src/Parser/ParserState.md index 703e5a7..8668a16 100644 --- a/src/Parser/ParserState.md +++ b/src/Parser/ParserState.md @@ -211,6 +211,7 @@ data ParserState : Type -> Type where -- TODO: Maybe add a ParseString that parses a string of characters as a -- string using efficent slicing? ParseChar : (predicate : Char -> Bool) -> ParserState ParseCharResult + ParseExactChar : (char : Char) -> ParserState ParseCharResult ParseEoF : ParserState Bool Note : Lazy String -> ParserState () ``` @@ -220,6 +221,7 @@ Show (ParserState t) where show Save = "Saving state" show (Load pi) = "Loading state \{show pi}" show (ParseChar predicate) = "Parsing char" + show (ParseExactChar char) = "Parsing char \{show char}" show ParseEoF = "Parsing EoF" show (Note _) = "Note" --> @@ -245,6 +247,11 @@ parseChar : Has ParserState fs => (predicate : Char -> Bool) -> Eff fs ParseCharResult parseChar predicate = send $ ParseChar predicate +||| Parse a char by exact value +export +parseExactChar' : Has ParserState fs => (chr : Char) -> Eff fs ParseCharResult +parseExactChar' chr = send $ ParseExactChar chr + ||| "Parse" the end of input, returning `True` if the parser state is currently ||| at the end of the input export @@ -289,6 +296,9 @@ handleParserStateIO pi (ParseChar predicate) = do Just (Element next _) => do writeIORef pi.position $ MkIndex next pure $ GotChar char +handleParserStateIO pi (ParseExactChar chr) = do + -- TODO: do this directly? + handleParserStateIO pi (ParseChar (== chr)) handleParserStateIO pi ParseEoF = do pi <- readIORef pi readIORef pi.end_of_input @@ -340,6 +350,7 @@ unPS pi (ParseChar predicate) = Just (Element next _) => (GotChar char, {position := MkIndex next} pi) else (GotError char, pi) +unPS pi (ParseExactChar chr) = unPS pi (ParseChar (== chr)) unPS pi ParseEoF = (pi.end_of_input, pi) unPS pi (Note _) = ((), pi) From ef690db972e42122a5a67584eacea273252e9bc4 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sun, 26 Jan 2025 21:09:30 -0500 Subject: [PATCH 44/45] core: Properly restore state in `delimited` --- src/Parser/Interface.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Parser/Interface.md b/src/Parser/Interface.md index d9c2db2..49fe300 100644 --- a/src/Parser/Interface.md +++ b/src/Parser/Interface.md @@ -283,10 +283,14 @@ delimited before after x = do pnote "Parsing delimited by \{show before} \{show after}" starting_state <- save _ <- parseExactChar before - val <- x + Right val <- tryEither x + | Left err => do + load starting_state + throw err Just _ <- tryMaybe $ parseExactChar after - | _ => throw $ - MkParseError starting_state "Unmatched delimiter \{show before}" + | _ => do + load starting_state + throw $ MkParseError starting_state "Unmatched delimiter \{show before}" pure val ``` From 90c48b36729a237fa89668e7d608231cff50d301 Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Sun, 26 Jan 2025 21:09:58 -0500 Subject: [PATCH 45/45] json: delimited arrays --- src/Parser/JSON.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/Parser/JSON.md b/src/Parser/JSON.md index b990e54..311057d 100644 --- a/src/Parser/JSON.md +++ b/src/Parser/JSON.md @@ -203,11 +203,7 @@ array = do pure $ first ::: rest occupiedArray : Parser (JSONValue TArray) occupiedArray = do - _ <- parseExactChar '[' - xs <- values - _ <- parseExactChar ']' - -- TODO: Why is this busted? - -- xs <- delimited '[' ']' values + xs <- delimited '[' ']' values let (types ** xs) = DList.fromList (forget xs) pure $ VArray xs ```