diff --git a/README.md b/README.md index 52d86cd..48644ce 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,14 @@ solution. Internal state of a parser + - [Numbers](src/Parser/Numbers.md) + + Parsers for numerical values in multiple bases + + - [JSON](src/Parser/JSON.md) + + JSON Parser + ## Index of years and days - 2015 diff --git a/advent.ipkg b/advent.ipkg index 56f6e75..607e26e 100644 --- a/advent.ipkg +++ b/advent.ipkg @@ -19,6 +19,7 @@ depends = base , tailrec , eff , elab-util + , sop , ansi , if-unsolved-implicit , c-ffi @@ -32,6 +33,8 @@ modules = Runner , Array , Parser , Parser.Interface + , Parser.Numbers + , Parser.JSON -- main file (i.e. file to load at REPL) main = Main diff --git a/src/Parser/Interface.md b/src/Parser/Interface.md index 3dc4251..4656469 100644 --- a/src/Parser/Interface.md +++ b/src/Parser/Interface.md @@ -20,8 +20,11 @@ Combine the parser state at time of error with an error message. ```idris public export data ParseError : Type where + -- TODO: Rename this constructor MkParseError : (state : ParserInternal Id) -> (message : String) -> ParseError BeforeParse : (message : String) -> ParseError + NestedErrors : (state : ParserInternal Id) -> (message : String) + -> (rest : List ParseError) -> ParseError ``` ## Type Alias @@ -41,7 +51,7 @@ Show ParseError where ```idris public export Parser : Type -> Type -Parser a = Eff [ParserState, Except ParseError, Choose] a +Parser a = Eff [ParserState, Except ParseError] a ``` ## Error Generation @@ -93,7 +103,7 @@ no paths in the `Choice` effect produce a returning parse. export rundownFirst : (f : Parser a) -> Eff [ParserState] (Either ParseError a) rundownFirst f = - runExcept . guardMaybe "No returning parses" . runChoose {f = Maybe} $ f + runExcept f ``` Provide wrappers for `rundownFirst` for evaluating it in various contexts. @@ -148,27 +158,29 @@ tryMaybe f = try (map Just f) (\_ => pure Nothing) export tryEither : (f : Parser a) -> Parser (Either ParseError a) tryEither f = try (map Right f) (pure . Left) - -||| Converts any errors thrown by `f` into silent backtracking within `Choose` -export -tryEmpty : (f : Parser a) -> Parser a -tryEmpty f = try f (\_ => empty) ``` Attempt to parse one of the given input parsers, in the provided order, invoking -the provided error action on failure. This will suppress any errors returned by -the input parsers by mapping them to `empty`. +the provided error action on failure. The state will not be modified when an input parser fails ```idris export -oneOfE : Foldable f => - (err : Parser a) -> f (Parser a) -> Parser a -oneOfE err xs = foldr altE err xs +oneOfE : (err : String) -> List (Parser a) -> Parser a +oneOfE err xs = do + start <- save + oneOfE' err start [] xs where - altE : Parser a -> Parser a -> Parser a - altE f rest = (tryEmpty f) `alt` rest + oneOfE' : (err : String) -> (start : ParserInternal Id) + -> (errs : List ParseError) -> List (Parser a) -> Parser a + oneOfE' err start errs [] = do + throw $ NestedErrors start err (reverse errs) + oneOfE' err start errs (x :: xs) = do + x <- tryEither x + case x of + Right val => pure val + Left error => oneOfE' err start (error :: errs) xs ``` Attempt to parse 0+ of an item @@ -198,6 +210,7 @@ Lift a parser producing a `List` or `List1` of `Char` into a parser producing a `String` ```idris +-- TODO: Rename these export parseString : Parser (List Char) -> Parser String parseString x = do @@ -250,6 +263,40 @@ exactString str with (asList str) pure input ``` +Wrap a parser in delimiter characters, discarding the value of the delimiters + +```idris +export +delimited : (before, after : Char) -> Parser a -> Parser a +delimited before after x = do + starting_state <- save + _ <- parseExactChar before + val <- x + Just _ <- tryMaybe $ parseExactChar after + | _ => throw $ + MkParseError starting_state "Unmatched delimiter \{show before}" + pure val +``` + +Consume any number of characters of the provided character class and discard the +result. Also a version for doing so on both sides of a provided parser + +```idris +export +nom : Parser Char -> Parser () +nom x = do + _ <- many x + pure () + +export +surround : (around : Parser Char) -> (item : Parser a) -> Parser a +surround around item = do + nom around + val <- item + nom around + pure val +``` + ### Composition of boolean functions ```idris diff --git a/src/Parser/JSON.md b/src/Parser/JSON.md new file mode 100644 index 0000000..f09c8dd --- /dev/null +++ b/src/Parser/JSON.md @@ -0,0 +1,283 @@ +# JSON Parser + +```idris +module Parser.JSON + +import public Parser +import public Parser.Numbers + +import Structures.Dependent.DList +``` + + + +## JSON components + +Types a JSON value is allowed to have + +```idris +public export +data JSONType : Type where + TObject : JSONType + TArray : JSONType + TString : JSONType + TNumber : JSONType + TBool : JSONType + TNull : JSONType +%runElab derive "JSONType" [Generic, Meta, Eq, Ord, Show, DecEq] +%name JSONType type, type2, type3 +``` + +A JSON value indexed by its type + +```idris +public export +data JSONValue : JSONType -> Type where + VObject : {types : List JSONType} + -> DList JSONType (\t => (String, JSONValue t)) types -> JSONValue TObject + VArray : {types : List JSONType} + -> DList JSONType JSONValue types -> JSONValue TArray + VString : (s : String) -> JSONValue TString + VNumber : (d : Double) -> JSONValue TNumber + VBool : (b : Bool) -> JSONValue TBool + VNull : JSONValue TNull +%name JSONValue value, value2, value3 +``` + + + +## Parsers + +We are going to get mutually recursive here. Instead of using a `mutual` block, +we will use the more modern style of declaring all our types ahead of our +definitions. + +```idris +export +object : Parser (JSONValue TObject) +export +array : Parser (JSONValue TArray) +export +string : Parser (JSONValue TString) +export +number : Parser (JSONValue TNumber) +export +bool : Parser (JSONValue TBool) +export +null : Parser (JSONValue TNull) +``` + +Define a `whitespace` character class based on the json specifications + +```idris +whitespace : Parser Char +whitespace = do + result <- + parseChar (\x => any (== x) $ the (List _) [' ', '\n', '\r', '\t']) id + case result of + GotChar char => pure char + GotError err => throwParseError "Expected whitespace, got: \{show err}" + EndOfInput => throwParseError "End of Input" +``` + +Convenience function + +```idris +dpairize : {t : JSONType} -> + Parser (JSONValue t) -> Parser (t' : JSONType ** JSONValue t') +dpairize x = do + x <- x + pure (_ ** x) +``` + +Top level json value parser + +```idris +export +value : Parser (t : JSONType ** JSONValue t) +value = do + surround whitespace $ oneOfE + "Expected JSON Value" + [ + dpairize object + , dpairize array + , dpairize string + , dpairize number + , dpairize bool + , dpairize null + ] +``` + +Now go through our json value types + +```idris +object = do + oneOfE + "Expected Object" + [emptyObject, occupiedObject] + where + emptyObject : Parser (JSONValue TObject) + emptyObject = do + delimited '{' '}' (nom whitespace) + pure $ VObject Nil + keyValue : Parser (t : JSONType ** (String, JSONValue t)) + keyValue = do + VString key <- surround whitespace string + _ <- parseExactChar ':' + (typ ** val) <- value + pure (typ ** (key, val)) + restKeyValue : Parser (t : JSONType ** (String, JSONValue t)) + restKeyValue = do + _ <- parseExactChar ',' + keyValue + pairs : Parser (List1 (t : JSONType ** (String, JSONValue t))) + pairs = do + first <- keyValue + rest <- many restKeyValue + pure $ first ::: rest + occupiedObject : Parser (JSONValue TObject) + occupiedObject = do + val <- delimited '{' '}' pairs + let (types ** xs) = DList.fromList (forget val) + pure $ VObject xs +``` + +```idris +array = do + oneOfE + "Expected Array" + [emptyArray, occupiedArray] + where + emptyArray : Parser (JSONValue TArray) + emptyArray = do + delimited '[' ']' (nom whitespace) + pure $ VArray Nil + restValue : Parser (t : JSONType ** JSONValue t) + restValue = do + _ <- parseExactChar ',' + value + values : Parser (List1 (t : JSONType ** JSONValue t)) + values = do + first <- value + rest <- many restValue + pure $ first ::: rest + occupiedArray : Parser (JSONValue TArray) + occupiedArray = do + _ <- parseExactChar '[' + xs <- values + _ <- parseExactChar ']' + -- TODO: Why is this busted? + -- xs <- delimited '[' ']' values + let (types ** xs) = DList.fromList (forget xs) + pure $ VArray xs +``` + +```idris +string = do + str <- parseString $ delimited '"' '"' (many stringCharacter) + pure $ VString str + where + -- TODO: Handle control characters properly + stringCharacter : Parser Char + stringCharacter = do + e1 <- parseError "Expected non-quote, got quote" + e2 <- parseError "End of input" + parseCharE (not . (== '"')) (\_ => e1) e2 +``` + +```idris +number = do + d <- double + pure $ VNumber d +``` + +```idris +bool = do + oneOfE + "Expected Bool" + [true, false] + where + true : Parser (JSONValue TBool) + true = do + _ <- exactString "true" + pure $ VBool True + false : Parser (JSONValue TBool) + false = do + _ <- exactString "false" + pure $ VBool False +``` + +```idris +null = do + _ <- exactString "null" + pure VNull +``` + +## Unit tests + +Quick smoke test + +```idris +-- @@test JSON Quick Smoke +quickSmoke : IO Bool +quickSmoke = do + let input = "{\"string\":\"string\",\"number\":5,\"true\":true,\"false\":false,\"null\":null,\"array\":[1,2,3]}" + putStrLn input + Right (type ** parsed) <- runFirstIO value input + | Left err => do + printLn err + pure False + putStrLn "Input: \{input}\nOutput: \{show type} -> \{show parsed}" + let reference_object = + VObject [ + ("string", VString "string") + , ("number", VNumber 5.0) + , ("true", VBool True) + , ("false", VBool False) + , ("null", VNull) + , ("array", VArray [ + VNumber 1.0 + , VNumber 2.0 + , VNumber 3.0 + ]) + ] + case type of + TObject => pure $ parsed == reference_object + _ => pure False +``` diff --git a/src/Parser/Numbers.md b/src/Parser/Numbers.md new file mode 100644 index 0000000..fb7b780 --- /dev/null +++ b/src/Parser/Numbers.md @@ -0,0 +1,257 @@ +# Numerical Parsers + +```idris +module Parser.Numbers + +import public Parser + +import Data.Vect +import Control.Eff +``` + + + +## Base Abstraction + +```idris +public export +record Base where + constructor MkBase + base : Nat + digits : Vect base Char +%name Base b + +export +hasDigit : Base -> Char -> Bool +hasDigit (MkBase base digits) c = any (== c) digits + +export +digitValue : Base -> Char -> Maybe Nat +digitValue (MkBase base digits) c = digitValue' digits 0 + where + digitValue' : Vect n Char -> (idx : Nat) -> Maybe Nat + digitValue' [] idx = Nothing + digitValue' (x :: xs) idx = + if x == c + then Just idx + else digitValue' xs (S idx) + +public export +base10 : Base +base10 = MkBase 10 + ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] + +public export +hex : Base +hex = MkBase 16 + ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'] +``` + +## Parsers + +### Nat + +```idris +export +nat : Base -> Parser Nat +nat b = do + error <- replaceError "Expected digit" + (first ::: rest) <- atLeastOne error parseDigit + pure $ foldl (\acc, e => 10 * acc + e) first rest + where + parseDigit : Parser Nat + parseDigit = do + GotChar char <- parseChar (hasDigit b) id + | GotError e => throwParseError "\{show e} is not a digit" + | EndOfInput => throwParseError "End Of Input" + case digitValue b char of + Nothing => + throwParseError "Failed to parse as base \{show b.base}: \{show char}" + Just x => pure x + +export +natBase10 : Parser Nat +natBase10 = nat base10 +``` + +### Integer + +```idris +export +integer : Base -> Parser Integer +integer b = do + negative <- map isJust . tryMaybe $ parseExactChar '-' + value <- map natToInteger $ nat b + if negative + then pure $ negate value + else pure $ value + +export +integerBase10 : Parser Integer +integerBase10 = integer base10 +``` + +### Double + +```idris +-- TODO: Replicate `parseDouble` logic and make this base-generic +export +double : Parser Double +double = do + starting_state <- save + integer <- integer + fraction <- tryMaybe fraction + exponent <- tryMaybe exponent + let str = case (fraction, exponent) of + (Nothing, Nothing) => + integer + (Nothing, (Just exponent)) => + "\{integer}e\{exponent}" + ((Just fraction), Nothing) => + "\{integer}.\{fraction}" + ((Just fraction), (Just exponent)) => + "\{integer}.\{fraction}e\{exponent}" + Just out <- pure $ parseDouble str + | _ => + throw $ MkParseError starting_state "Std failed to parse as double: \{str}" + pure out + where + parseDigit : Parser Char + parseDigit = do + GotChar char <- parseChar (hasDigit base10) id + | GotError e => throwParseError "\{show e} is not a digit" + | EndOfInput => throwParseError "End Of Input" + pure char + integer : Parser String + integer = do + sign <- tryMaybe $ parseExactChar '-' + error <- replaceError "Expected digit" + digits <- map forget $ atLeastOne error parseDigit + case sign of + Nothing => pure $ pack digits + Just x => pure $ pack (x :: digits) + fraction : Parser String + fraction = do + decimal <- parseExactChar '.' + error <- replaceError "Expected digit" + digits <- map forget $ atLeastOne error parseDigit + pure $ pack digits + exponent : Parser String + exponent = do + e <- parseTheseChars ['e', 'E'] + sign <- parseTheseChars ['+', '-'] + error <- replaceError "Expected digit" + digits <- map forget $ atLeastOne error parseDigit + pure . pack $ sign :: digits +``` + +## Unit tests + +Test roundtripping a value through the provided parser + +```idris +roundtrip : Eq a => Show a => a -> (p : Parser a) -> IO Bool +roundtrip x p = do + let string = show x + putStrLn "Roundtripping \{string}" + Just state <- newInternalIO string + | _ => do + putStrLn "Failed to produce parser for \{string}" + pure False + Right result <- runEff (rundownFirst p) [handleParserStateIO state] {m = IO} + | Left err => do + printLn err + pure False + putStrLn "Input: \{string} Output: \{show result}" + pure $ x == result +``` + +Do some roundtrip tests with the nat parser + +```idris +-- @@test Nat round trip +natRoundTrip : IO Bool +natRoundTrip = pure $ + !(roundtrip 0 natBase10) + && !(roundtrip 1 natBase10) + && !(roundtrip 100 natBase10) + && !(roundtrip 1234 natBase10) + && !(roundtrip 1234567890 natBase10) + && !(roundtrip 1234567890000 natBase10) + && !(roundtrip 12345678901234567890 natBase10) +``` + +```idris +-- @@test Integer round trip +integerRoundTrip : IO Bool +integerRoundTrip = pure $ + !(roundtrip 0 integerBase10) + && !(roundtrip 1 integerBase10) + && !(roundtrip 100 integerBase10) + && !(roundtrip 1234 integerBase10) + && !(roundtrip 1234567890 integerBase10) + && !(roundtrip 1234567890000 integerBase10) + && !(roundtrip 12345678901234567890 integerBase10) + && !(roundtrip (-1) integerBase10) + && !(roundtrip (-100) integerBase10) + && !(roundtrip (-1234) integerBase10) + && !(roundtrip (-1234567890) integerBase10) + && !(roundtrip (-1234567890000) integerBase10) + && !(roundtrip (-12345678901234567890) integerBase10) +``` + +Compare our parsing of a double to the standard library's + +```idris +compareDouble : String -> IO Bool +compareDouble string = do + Just state <- newInternalIO string + | _ => do + putStrLn "Failed to produce parser for \{string}" + pure False + Right result <- + runEff (rundownFirst double) [handleParserStateIO state] {m = IO} + | Left err => do + printLn err + pure False + putStrLn "Input: \{string} Output: \{show result}" + Just double' <- pure $ parseDouble string + | _ => do + printLn "Std failed to parse as double: \{string}" + pure False + pure $ result == double' +``` + +```idris +-- @@test Double Std Comparison +doubleRoundTrip : IO Bool +doubleRoundTrip = pure $ + !(compareDouble "0") + && !(compareDouble "1") + && !(compareDouble "100") + && !(compareDouble "1234") + && !(compareDouble "1234567890") + && !(compareDouble "1234567890000") + && !(compareDouble "12345678901234567890") + && !(compareDouble "-1") + && !(compareDouble "-100") + && !(compareDouble "-1234") + && !(compareDouble "-1234567890") + && !(compareDouble "-1234567890000") + && !(compareDouble "-12345678901234567890") + && !(compareDouble "0.0") + && !(compareDouble "1.0") + && !(compareDouble "-1.0") + && !(compareDouble "-0.0") + && !(compareDouble "-0.0") + && !(compareDouble "0.1234") + && !(compareDouble "0.01234") + && !(compareDouble "-0.1234") + && !(compareDouble "-0.01234") + && !(compareDouble "1.234e+5") + && !(compareDouble "1.234e-5") + && !(compareDouble "-1.234e+5") + && !(compareDouble "-1.234e-5") +```