diff --git a/README.md b/README.md index d8bd7e3..48644ce 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,26 @@ solution. Provider wrappers over the standard library `IOArray` type to make them more ergonomic to use. +- [Parser](src/Parser.md) + + Effectful parser mini-library + + - [Interface](src/Parser/Interface.md) + + Effectful parser API + + - [ParserState](src/Parser/ParserState.md) + + Internal state of a parser + + - [Numbers](src/Parser/Numbers.md) + + Parsers for numerical values in multiple bases + + - [JSON](src/Parser/JSON.md) + + JSON Parser + ## Index of years and days - 2015 diff --git a/advent.ipkg b/advent.ipkg index 23c2130..607e26e 100644 --- a/advent.ipkg +++ b/advent.ipkg @@ -19,6 +19,7 @@ depends = base , tailrec , eff , elab-util + , sop , ansi , if-unsolved-implicit , c-ffi @@ -30,6 +31,10 @@ modules = Runner , Util.Eff , Util.Digits , Array + , Parser + , Parser.Interface + , Parser.Numbers + , Parser.JSON -- main file (i.e. file to load at REPL) main = Main diff --git a/src/Parser.md b/src/Parser.md new file mode 100644 index 0000000..9eacc0b --- /dev/null +++ b/src/Parser.md @@ -0,0 +1,8 @@ +# Parsing Utilties + +```idris +module Parser + +import public Parser.Interface as Parser +import public Parser.ParserState as Parser +``` diff --git a/src/Parser/Interface.md b/src/Parser/Interface.md new file mode 100644 index 0000000..49fe300 --- /dev/null +++ b/src/Parser/Interface.md @@ -0,0 +1,332 @@ +# The interface of a `Parser` + +```idris +module Parser.Interface + +import public Data.List1 + +import public Parser.ParserState + +import public Control.Eff + +export infixr 4 >| +export infixr 5 >& +``` + +## Parser Errors + +Combine the parser state at time of error with an error message. + +```idris +public export +data ParseError : Type where + -- TODO: Rename this constructor + MkParseError : (state : ParserInternal Id) -> (message : String) -> ParseError + BeforeParse : (message : String) -> ParseError + NestedErrors : (state : ParserInternal Id) -> (message : String) + -> (rest : List ParseError) -> ParseError +``` + + + +## Type Alias + +```idris +public export +Parser : Type -> Type +Parser a = Eff [ParserState, Except ParseError] a +``` + +## Error Generation + +Provide a few effectful actions to generate an error from an error message, and +either return it or throw it. + +```idris +export +parseError : Has ParserState fs => (message : String) -> Eff fs ParseError +parseError message = do + state <- save + pure $ MkParseError state message + +export +throwParseError : Has ParserState fs => Has (Except ParseError) fs => + (message : String) -> Eff fs a +throwParseError message = do + err <- parseError message + throw err + +export +guardMaybe : Has ParserState fs => Has (Except ParseError) fs => + (message : String) -> Eff fs (Maybe a) -> Eff fs a +guardMaybe message x = do + Just x <- x + | _ => throwParseError message + pure x + +export +replaceError : (message : String) -> Parser (a -> Parser b) +replaceError message = do + state <- save + pure (\_ => throw $ MkParseError state message) +``` + +## Running a parser + +We will use the phrasing "rundown" to refer to running all the effects in the +parser effect stack except `ParserState`, which is left in the effect stack to +facilitate handling in the context of another monad or effect stack, since it +benefits from mutability. + +Rundown a parser, accepting the first returning parse, which may be failing or +succeding, and automatically generating a "no valid parses" message in the event +no paths in the `Choice` effect produce a returning parse. + +```idris +export +rundownFirst : (f : Parser a) -> Eff [ParserState] (Either ParseError a) +rundownFirst f = + runExcept f +``` + +Provide wrappers for `rundownFirst` for evaluating it in various contexts. + +```idris +export +runFirstIO : (f : Parser a) -> String -> IO (Either ParseError a) +runFirstIO f str = do + Just state <- newInternalIO str + | _ => pure . Left $ BeforeParse "Empty input" + runEff (rundownFirst f) [handleParserStateIO state] + +export +runFirstIODebug : (f : Parser a) -> String -> IO (Either ParseError a) +runFirstIODebug f str = do + Just state <- newInternalIO str + | _ => pure . Left $ BeforeParse "Empty input" + runEff (rundownFirst f) [handleParserStateIODebug state] + +export +runFirst : (f : Parser a) -> String -> Eff fs (Either ParseError a) +runFirst f str = do + Just state <- pure $ newInternal str + | _ => pure . Left $ BeforeParse "Empty input" + (result, _) <- lift . runParserState state . rundownFirst $ f + pure result + +export +runFirst' : (f : Parser a) -> String -> Either ParseError a +runFirst' f str = extract $ runFirst f str {fs = []} +``` + +## Utility functionality + +### Parser combinators + +Try to run a computation in the context of the `Parser` effect stack, if it +fails (via `Except`), reset the state and resort to the supplied callback + +Also supply a version specialized to ignore the error value, returning `Just a` +if the parse succeeds, and `Nothing` if it fails. + +```idris +export +try : (f : Parser a) -> (err : ParseError -> Parser a) -> Parser a +try f err = do + starting_state <- save + result <- lift . runExcept $ f + case result of + Left error => do + load starting_state + err error + Right result => pure result + +export +tryMaybe : (f : Parser a) -> Parser (Maybe a) +tryMaybe f = try (map Just f) (\_ => pure Nothing) + +export +tryEither : (f : Parser a) -> Parser (Either ParseError a) +tryEither f = try (map Right f) (pure . Left) +``` + +Attempt to parse one of the given input parsers, in the provided order, invoking +the provided error action on failure. + +The state will not be modified when an input parser fails + +```idris +export +oneOfE : (err : String) -> List (Parser a) -> Parser a +oneOfE err xs = do + start <- save + oneOfE' err start [] xs + where + oneOfE' : (err : String) -> (start : ParserInternal Id) + -> (errs : List ParseError) -> List (Parser a) -> Parser a + oneOfE' err start errs [] = do + throw $ NestedErrors start err (reverse errs) + oneOfE' err start errs (x :: xs) = do + x <- tryEither x + case x of + Right val => pure val + Left error => oneOfE' err start (error :: errs) xs +``` + +Attempt to parse 0+ of an item + +```idris +export +many : (f : Parser a) -> Parser (List a) +many f = do + Just next <- tryMaybe f + | _ => pure [] + map (next ::) $ many f +``` + +Attempt to parse 1+ of an item, invoking the supplied error action on failure + +```idris +export +atLeastOne : (err : ParseError -> Parser (List1 a)) -> (f : Parser a) + -> Parser (List1 a) +atLeastOne err f = do + Right next <- tryEither f + | Left e => err e + map (next :::) $ many f +``` + +Lift a parser producing a `List` or `List1` of `Char` into a parser producing a +`String` + +```idris +-- TODO: Rename these +export +parseString : Parser (List Char) -> Parser String +parseString x = do + xs <- x + pure $ pack xs + +export +parseString' : Parser (List1 Char) -> Parser String +parseString' x = parseString $ map forget x +``` + +Attempt to parse a specified character + +```idris +export +parseExactChar : Char -> Parser Char +parseExactChar c = do + result <- parseExactChar' c + case result of + GotChar char => pure char + GotError err => throwParseError "Got \{show err} Expected \{show c}" + EndOfInput => throwParseError "End of input" +``` + +Attempt to parse one of a list of chars + +```idris +export +parseTheseChars : List Char -> Parser Char +parseTheseChars cs = do + pnote "Parsing one of: \{show cs}" + result <- parseChar (\x => any (== x) cs) + case result of + GotChar char => pure char + GotError err => throwParseError "Got \{show err} Expected one of \{show cs}" + EndOfInput => throwParseError "End of input" +``` + +Attempt to parse an exact string + +```idris +export +exactString : String -> Parser String +exactString str with (asList str) + exactString "" | [] = do + pnote "Parsing the empty string" + pure "" + exactString input@(strCons c str) | (c :: x) = do + pnote "Parsing exact string \{show input}" + GotChar next <- parseChar (== c) + | GotError err => throwParseError "Got \{show err} expected \{show c}" + | EndOfInput => throwParseError "End of input" + rest <- exactString str | x + pure input +``` + +Wrap a parser in delimiter characters, discarding the value of the delimiters + +```idris +export +delimited : (before, after : Char) -> Parser a -> Parser a +delimited before after x = do + pnote "Parsing delimited by \{show before} \{show after}" + starting_state <- save + _ <- parseExactChar before + Right val <- tryEither x + | Left err => do + load starting_state + throw err + Just _ <- tryMaybe $ parseExactChar after + | _ => do + load starting_state + throw $ MkParseError starting_state "Unmatched delimiter \{show before}" + pure val +``` + +Consume any number of characters of the provided character class and discard the +result. Also a version for doing so on both sides of a provided parser + +```idris +export +nom : Parser Char -> Parser () +nom x = do + pnote "Nomming" + _ <- many x + pure () + +export +surround : (around : Parser Char) -> (item : Parser a) -> Parser a +surround around item = do + pnote "Surrounding" + nom around + val <- item + nom around + pure val +``` + +### Composition of boolean functions + +```idris +||| Return true if both of the predicates evaluate to true +public export +(>&) : (a : e -> Bool) -> (b : e -> Bool) -> (e -> Bool) +(>&) a b x = a x && b x +``` + +```idris +||| Return true if either of the predicates evaulates to true +public export +(>|) : (a : e -> Bool) -> (b : e -> Bool) -> (e -> Bool) +(>|) a b x = a x || b x +``` diff --git a/src/Parser/JSON.md b/src/Parser/JSON.md new file mode 100644 index 0000000..311057d --- /dev/null +++ b/src/Parser/JSON.md @@ -0,0 +1,290 @@ +# JSON Parser + +```idris +module Parser.JSON + +import public Parser +import public Parser.Numbers + +import Structures.Dependent.DList +``` + + + +## JSON components + +Types a JSON value is allowed to have + +```idris +public export +data JSONType : Type where + TObject : JSONType + TArray : JSONType + TString : JSONType + TNumber : JSONType + TBool : JSONType + TNull : JSONType +%runElab derive "JSONType" [Generic, Meta, Eq, Ord, Show, DecEq] +%name JSONType type, type2, type3 +``` + +A JSON value indexed by its type + +```idris +public export +data JSONValue : JSONType -> Type where + VObject : {types : List JSONType} + -> DList JSONType (\t => (String, JSONValue t)) types -> JSONValue TObject + VArray : {types : List JSONType} + -> DList JSONType JSONValue types -> JSONValue TArray + VString : (s : String) -> JSONValue TString + VNumber : (d : Double) -> JSONValue TNumber + VBool : (b : Bool) -> JSONValue TBool + VNull : JSONValue TNull +%name JSONValue value, value2, value3 +``` + + + +## Parsers + +We are going to get mutually recursive here. Instead of using a `mutual` block, +we will use the more modern style of declaring all our types ahead of our +definitions. + +```idris +export +object : Parser (JSONValue TObject) +export +array : Parser (JSONValue TArray) +export +string : Parser (JSONValue TString) +export +number : Parser (JSONValue TNumber) +export +bool : Parser (JSONValue TBool) +export +null : Parser (JSONValue TNull) +``` + +Define a `whitespace` character class based on the json specifications + +```idris +whitespace : Parser Char +whitespace = do + pnote "Whitespace character" + result <- + parseChar (\x => any (== x) $ the (List _) [' ', '\n', '\r', '\t']) + case result of + GotChar char => pure char + GotError err => throwParseError "Expected whitespace, got: \{show err}" + EndOfInput => throwParseError "End of Input" +``` + +Convenience function + +```idris +dpairize : {t : JSONType} -> + Parser (JSONValue t) -> Parser (t' : JSONType ** JSONValue t') +dpairize x = do + x <- x + pure (_ ** x) +``` + +Top level json value parser + +```idris +export +value : Parser (t : JSONType ** JSONValue t) +value = do + pnote "JSON Value" + surround whitespace $ oneOfE + "Expected JSON Value" + [ + dpairize object + , dpairize array + , dpairize string + , dpairize number + , dpairize bool + , dpairize null + ] +``` + +Now go through our json value types + +```idris +object = do + pnote "JSON Object" + oneOfE + "Expected Object" + [emptyObject, occupiedObject] + where + emptyObject : Parser (JSONValue TObject) + emptyObject = do + delimited '{' '}' (nom whitespace) + pure $ VObject Nil + keyValue : Parser (t : JSONType ** (String, JSONValue t)) + keyValue = do + VString key <- surround whitespace string + _ <- parseExactChar ':' + (typ ** val) <- value + pure (typ ** (key, val)) + restKeyValue : Parser (t : JSONType ** (String, JSONValue t)) + restKeyValue = do + _ <- parseExactChar ',' + keyValue + pairs : Parser (List1 (t : JSONType ** (String, JSONValue t))) + pairs = do + first <- keyValue + rest <- many restKeyValue + pure $ first ::: rest + occupiedObject : Parser (JSONValue TObject) + occupiedObject = do + val <- delimited '{' '}' pairs + let (types ** xs) = DList.fromList (forget val) + pure $ VObject xs +``` + +```idris +array = do + pnote "JSON Array" + oneOfE + "Expected Array" + [emptyArray, occupiedArray] + where + emptyArray : Parser (JSONValue TArray) + emptyArray = do + delimited '[' ']' (nom whitespace) + pure $ VArray Nil + restValue : Parser (t : JSONType ** JSONValue t) + restValue = do + _ <- parseExactChar ',' + value + values : Parser (List1 (t : JSONType ** JSONValue t)) + values = do + first <- value + rest <- many restValue + pure $ first ::: rest + occupiedArray : Parser (JSONValue TArray) + occupiedArray = do + xs <- delimited '[' ']' values + let (types ** xs) = DList.fromList (forget xs) + pure $ VArray xs +``` + +```idris +string = do + pnote "JSON String" + str <- parseString $ delimited '"' '"' (many stringCharacter) + pure $ VString str + where + -- TODO: Handle control characters properly + stringCharacter : Parser Char + stringCharacter = do + result <- parseChar (not . (== '"')) + case result of + GotChar char => pure char + GotError err => + throwParseError "Expected string character, got \{show err}" + EndOfInput => throwParseError "Unexpected end of input" +``` + +```idris +number = do + pnote "JSON Number" + d <- double + pure $ VNumber d +``` + +```idris +bool = do + pnote "JSON Bool" + oneOfE + "Expected Bool" + [true, false] + where + true : Parser (JSONValue TBool) + true = do + _ <- exactString "true" + pure $ VBool True + false : Parser (JSONValue TBool) + false = do + _ <- exactString "false" + pure $ VBool False +``` + +```idris +null = do + pnote "JSON null" + _ <- exactString "null" + pure VNull +``` + +## Unit tests + +Quick smoke test + +```idris +-- @@test JSON Quick Smoke +quickSmoke : IO Bool +quickSmoke = do + let input = "{\"string\":\"string\",\"number\":5,\"true\":true,\"false\":false,\"null\":null,\"array\":[1,2,3]}" + putStrLn input + Right (type ** parsed) <- runFirstIODebug value input + | Left err => do + printLn err + pure False + putStrLn "Input: \{input}\nOutput: \{show type} -> \{show parsed}" + let reference_object = + VObject [ + ("string", VString "string") + , ("number", VNumber 5.0) + , ("true", VBool True) + , ("false", VBool False) + , ("null", VNull) + , ("array", VArray [ + VNumber 1.0 + , VNumber 2.0 + , VNumber 3.0 + ]) + ] + case type of + TObject => pure $ parsed == reference_object + _ => pure False +``` diff --git a/src/Parser/Numbers.md b/src/Parser/Numbers.md new file mode 100644 index 0000000..0c069ec --- /dev/null +++ b/src/Parser/Numbers.md @@ -0,0 +1,257 @@ +# Numerical Parsers + +```idris +module Parser.Numbers + +import public Parser + +import Data.Vect +import Control.Eff +``` + + + +## Base Abstraction + +```idris +public export +record Base where + constructor MkBase + base : Nat + digits : Vect base Char +%name Base b + +export +hasDigit : Base -> Char -> Bool +hasDigit (MkBase base digits) c = any (== c) digits + +export +digitValue : Base -> Char -> Maybe Nat +digitValue (MkBase base digits) c = digitValue' digits 0 + where + digitValue' : Vect n Char -> (idx : Nat) -> Maybe Nat + digitValue' [] idx = Nothing + digitValue' (x :: xs) idx = + if x == c + then Just idx + else digitValue' xs (S idx) + +public export +base10 : Base +base10 = MkBase 10 + ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] + +public export +hex : Base +hex = MkBase 16 + ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'] +``` + +## Parsers + +### Nat + +```idris +export +nat : Base -> Parser Nat +nat b = do + error <- replaceError "Expected digit" + (first ::: rest) <- atLeastOne error parseDigit + pure $ foldl (\acc, e => 10 * acc + e) first rest + where + parseDigit : Parser Nat + parseDigit = do + GotChar char <- parseChar (hasDigit b) + | GotError e => throwParseError "\{show e} is not a digit" + | EndOfInput => throwParseError "End Of Input" + case digitValue b char of + Nothing => + throwParseError "Failed to parse as base \{show b.base}: \{show char}" + Just x => pure x + +export +natBase10 : Parser Nat +natBase10 = nat base10 +``` + +### Integer + +```idris +export +integer : Base -> Parser Integer +integer b = do + negative <- map isJust . tryMaybe $ parseExactChar '-' + value <- map natToInteger $ nat b + if negative + then pure $ negate value + else pure $ value + +export +integerBase10 : Parser Integer +integerBase10 = integer base10 +``` + +### Double + +```idris +-- TODO: Replicate `parseDouble` logic and make this base-generic +export +double : Parser Double +double = do + starting_state <- save + integer <- integer + fraction <- tryMaybe fraction + exponent <- tryMaybe exponent + let str = case (fraction, exponent) of + (Nothing, Nothing) => + integer + (Nothing, (Just exponent)) => + "\{integer}e\{exponent}" + ((Just fraction), Nothing) => + "\{integer}.\{fraction}" + ((Just fraction), (Just exponent)) => + "\{integer}.\{fraction}e\{exponent}" + Just out <- pure $ parseDouble str + | _ => + throw $ MkParseError starting_state "Std failed to parse as double: \{str}" + pure out + where + parseDigit : Parser Char + parseDigit = do + GotChar char <- parseChar (hasDigit base10) + | GotError e => throwParseError "\{show e} is not a digit" + | EndOfInput => throwParseError "End Of Input" + pure char + integer : Parser String + integer = do + sign <- tryMaybe $ parseExactChar '-' + error <- replaceError "Expected digit" + digits <- map forget $ atLeastOne error parseDigit + case sign of + Nothing => pure $ pack digits + Just x => pure $ pack (x :: digits) + fraction : Parser String + fraction = do + decimal <- parseExactChar '.' + error <- replaceError "Expected digit" + digits <- map forget $ atLeastOne error parseDigit + pure $ pack digits + exponent : Parser String + exponent = do + e <- parseTheseChars ['e', 'E'] + sign <- parseTheseChars ['+', '-'] + error <- replaceError "Expected digit" + digits <- map forget $ atLeastOne error parseDigit + pure . pack $ sign :: digits +``` + +## Unit tests + +Test roundtripping a value through the provided parser + +```idris +roundtrip : Eq a => Show a => a -> (p : Parser a) -> IO Bool +roundtrip x p = do + let string = show x + putStrLn "Roundtripping \{string}" + Just state <- newInternalIO string + | _ => do + putStrLn "Failed to produce parser for \{string}" + pure False + Right result <- runEff (rundownFirst p) [handleParserStateIO state] {m = IO} + | Left err => do + printLn err + pure False + putStrLn "Input: \{string} Output: \{show result}" + pure $ x == result +``` + +Do some roundtrip tests with the nat parser + +```idris +-- @@test Nat round trip +natRoundTrip : IO Bool +natRoundTrip = pure $ + !(roundtrip 0 natBase10) + && !(roundtrip 1 natBase10) + && !(roundtrip 100 natBase10) + && !(roundtrip 1234 natBase10) + && !(roundtrip 1234567890 natBase10) + && !(roundtrip 1234567890000 natBase10) + && !(roundtrip 12345678901234567890 natBase10) +``` + +```idris +-- @@test Integer round trip +integerRoundTrip : IO Bool +integerRoundTrip = pure $ + !(roundtrip 0 integerBase10) + && !(roundtrip 1 integerBase10) + && !(roundtrip 100 integerBase10) + && !(roundtrip 1234 integerBase10) + && !(roundtrip 1234567890 integerBase10) + && !(roundtrip 1234567890000 integerBase10) + && !(roundtrip 12345678901234567890 integerBase10) + && !(roundtrip (-1) integerBase10) + && !(roundtrip (-100) integerBase10) + && !(roundtrip (-1234) integerBase10) + && !(roundtrip (-1234567890) integerBase10) + && !(roundtrip (-1234567890000) integerBase10) + && !(roundtrip (-12345678901234567890) integerBase10) +``` + +Compare our parsing of a double to the standard library's + +```idris +compareDouble : String -> IO Bool +compareDouble string = do + Just state <- newInternalIO string + | _ => do + putStrLn "Failed to produce parser for \{string}" + pure False + Right result <- + runEff (rundownFirst double) [handleParserStateIO state] {m = IO} + | Left err => do + printLn err + pure False + putStrLn "Input: \{string} Output: \{show result}" + Just double' <- pure $ parseDouble string + | _ => do + printLn "Std failed to parse as double: \{string}" + pure False + pure $ result == double' +``` + +```idris +-- @@test Double Std Comparison +doubleRoundTrip : IO Bool +doubleRoundTrip = pure $ + !(compareDouble "0") + && !(compareDouble "1") + && !(compareDouble "100") + && !(compareDouble "1234") + && !(compareDouble "1234567890") + && !(compareDouble "1234567890000") + && !(compareDouble "12345678901234567890") + && !(compareDouble "-1") + && !(compareDouble "-100") + && !(compareDouble "-1234") + && !(compareDouble "-1234567890") + && !(compareDouble "-1234567890000") + && !(compareDouble "-12345678901234567890") + && !(compareDouble "0.0") + && !(compareDouble "1.0") + && !(compareDouble "-1.0") + && !(compareDouble "-0.0") + && !(compareDouble "-0.0") + && !(compareDouble "0.1234") + && !(compareDouble "0.01234") + && !(compareDouble "-0.1234") + && !(compareDouble "-0.01234") + && !(compareDouble "1.234e+5") + && !(compareDouble "1.234e-5") + && !(compareDouble "-1.234e+5") + && !(compareDouble "-1.234e-5") +``` diff --git a/src/Parser/ParserState.md b/src/Parser/ParserState.md new file mode 100644 index 0000000..8668a16 --- /dev/null +++ b/src/Parser/ParserState.md @@ -0,0 +1,369 @@ +# Parser State + +An effectful description of the text a parser consumes + +```idris +module Parser.ParserState + +import public Data.String +import public Data.DPair +import public Data.Refined +import public Data.Refined.Int64 +import public Data.SortedMap +import public Data.IORef + +import Data.Primitives.Interpolation +import System.File + +import public Control.Eff +``` + +## Barbie Basics + +Barbies are types that can "change their clothes", in Idris, this manifests as a +type indexed by a type-level function that affects the types of the fields. + +Since we know our usage here is going to be quite simple, and not even really +making use of dependently typed fun, we are going to implement all the barbie +functionality we need by hand, but if you feel like barbies might be a good fit +for your problem, or you simply want to learn more, please check out a library +like `barbies`[^1] + +```idris +public export +Id : Type -> Type +Id x = x +``` + +## Internal State of a Parser + +Type alias for our refined `Int64`s + +```idris +public export +0 IsIndex : (length : Int64) -> Int64 -> Type +IsIndex length = From 0 && LessThan length + +public export +record Index (length : Int64) where + constructor MkIndex + index : Int64 + {auto 0 prf : IsIndex length index} +``` + + + +Stores the location we are currently at in the string, and metadata about it for +providing good error messages. Parsing an empty input isn't very interesting, so +we exclude inputs of length zero, since that will make other things easier. + +```idris +||| State representing a parser's position in the text +public export +record ParserInternal (f : Type -> Type) where + constructor MkInternal + -- IDEA: Maybe go full barbie and have this be a field, so that we can, say, + -- read directly from a file instead of from an already loaded string using the + -- same parser + ||| The input string + input : String + ||| The length of the input string + length : Int64 + {auto 0 len_prf : length = cast (strLength input)} + ||| A sorted set containing the positions of the start of each line + line_starts : SortedMap (Index length) Nat + ||| The position of the next character to read in the input + position : f (Index length) + ||| True if we have hit the end of input + end_of_input : f Bool +%name ParserInternal pi, pj, pk +``` + +### ParserInternal Methods + +Construct a `ParserInternal` from an input string. Will fail if the input is +empty, because then we can't index it. + +```idris +export +newInternal : (input : String) -> Maybe (ParserInternal Id) +newInternal input = + -- Check if we have at least one character in the input + case refine0 0 {p = IsIndex (cast (strLength input))} of + Nothing => Nothing + Just (Element position _) => Just $ + MkInternal input + (cast (strLength input)) + (mkStarts' input (MkIndex position)) + (MkIndex position) + False + where + partial + mkStarts : + (str : String) -> (acc : List (Index (cast (strLength str)), Nat)) + -> (idx : Index (cast (strLength str))) -> (count : Nat) -> (next : Bool) + -> List (Index (cast (strLength str)), Nat) + mkStarts str acc idx count True = + mkStarts str ((idx, count) :: acc) idx (S count) False + mkStarts str acc idx count False = + case refine0 (idx.index + 1) {p = IsIndex (cast (strLength str))} of + Nothing => acc + Just (Element next _) => + if strIndex str (cast idx.index) == '\n' + then mkStarts str acc (MkIndex next) count True + else mkStarts str acc (MkIndex next) count False + mkStarts' : (str : String) -> (start : Index (cast (strLength str))) + -> SortedMap (Index (cast (strLength str))) Nat + mkStarts' str start = + let + pairs = assert_total $ + mkStarts str [] start 0 True + in fromList pairs +``` + +Get the current line and column number + +```idris +||| Returns the current position of the parser cursor in, zero indexed, (line, +||| column) form +export +positionPair : (pi : ParserInternal Id) -> (Nat, Nat) +positionPair pi = + case lookup pi.position pi.line_starts of + Just line => (line, 0) + Nothing => + case lookupBetween pi.position pi.line_starts of + -- There will always be at least one line start, and we would have hit + -- the previous case if we were at the start of the first one, so if + -- there isn't a before, we can return a nonsense value safely + (Nothing, _) => (0, 0) + (Just (start, linum), after) => + -- Our index will always be after the start of the line, for previously + -- mentioned reasons, so this cast is safe + let col = cast {to = Nat} $ pi.position.index - start.index + in (linum, col) +``` + + + +### More Barbie Functionality + +Provide the barbie analogs of `map` and `traverse` for our `ParserInternal` +type, allowing us to change the type the values in a `ParserInternal` by mapping +over those values. + +```idris +export +bmap : ({0 a : Type} -> f a -> g a) -> ParserInternal f -> ParserInternal g +-- bmap f = bmap_ (\_ => f) +bmap fun (MkInternal input length line_starts position end_of_input) = + let position' = fun position + end_of_input' = fun end_of_input + in MkInternal input length line_starts position' end_of_input' + +export +btraverse : Applicative e => ({0 a : Type} -> f a -> e (g a)) + -> ParserInternal f -> e (ParserInternal g) +btraverse fun (MkInternal input length line_starts position end_of_input) = + let pures = (MkInternal input length line_starts) + in [| pures (fun position) (fun end_of_input)|] +``` + +## Three way result + +```idris +||| Three way result returned from attempting to parse a single char +public export +data ParseCharResult : Type where + GotChar : (char : Char) -> ParseCharResult + GotError : (err : Char) -> ParseCharResult + EndOfInput : ParseCharResult +``` + +## The Effect Type + +```idris +export +data ParserState : Type -> Type where + Save : ParserState (ParserInternal Id) + Load : (ParserInternal Id) -> ParserState () + -- TODO: Maybe add a ParseString that parses a string of characters as a + -- string using efficent slicing? + ParseChar : (predicate : Char -> Bool) -> ParserState ParseCharResult + ParseExactChar : (char : Char) -> ParserState ParseCharResult + ParseEoF : ParserState Bool + Note : Lazy String -> ParserState () +``` + + + +### Actions + +```idris +||| Return the current state, for potential later reloading +export +save : Has ParserState fs => Eff fs (ParserInternal Id) +save = send Save + +||| Reset to the provided state +export +load : Has ParserState fs => ParserInternal Id -> Eff fs () +load pi = send $ Load pi + +||| Attempt to parse a char, checking to see if it complies with the supplied +||| predicate, updates the state if parsing succeeds, does not alter it in an +||| error condition. +export +parseChar : Has ParserState fs => (predicate : Char -> Bool) + -> Eff fs ParseCharResult +parseChar predicate = send $ ParseChar predicate + +||| Parse a char by exact value +export +parseExactChar' : Has ParserState fs => (chr : Char) -> Eff fs ParseCharResult +parseExactChar' chr = send $ ParseExactChar chr + +||| "Parse" the end of input, returning `True` if the parser state is currently +||| at the end of the input +export +parseEoF : Has ParserState fs => Eff fs Bool +parseEoF = send ParseEoF + +||| Make a note to be output when running with a debug handler +export +pnote : Has ParserState fs => Lazy String -> Eff fs () +pnote x = send $ Note x +``` + +## Handling a ParserState + +### IO Context + +```idris +export +handleParserStateIO : HasIO io => + IORef (ParserInternal IORef) -> ParserState t -> io t +handleParserStateIO pi Save = do + pi <- readIORef pi + btraverse readIORef pi +handleParserStateIO pi (Load pj) = do + pj <- btraverse newIORef pj + writeIORef pi pj +handleParserStateIO pi (ParseChar predicate) = do + pi <- readIORef pi + False <- readIORef pi.end_of_input + | _ => pure EndOfInput + position <- readIORef pi.position + let char = assert_total $ strIndex pi.input (cast position.index) + True <- pure $ predicate char + | _ => pure $ GotError char + -- Our refinement type on the position forces us to check that the length is + -- in bounds after incrementing it, if its out of bounds, set the end_of_input + -- flag + case refine0 (position.index + 1) {p = IsIndex pi.length} of + Nothing => do + writeIORef pi.end_of_input True + pure $ GotChar char + Just (Element next _) => do + writeIORef pi.position $ MkIndex next + pure $ GotChar char +handleParserStateIO pi (ParseExactChar chr) = do + -- TODO: do this directly? + handleParserStateIO pi (ParseChar (== chr)) +handleParserStateIO pi ParseEoF = do + pi <- readIORef pi + readIORef pi.end_of_input +-- We ignore notes in non-debug mode +handleParserStateIO pi (Note _) = pure () + +export +newInternalIO : HasIO io => String -> io $ Maybe (IORef (ParserInternal IORef)) +newInternalIO str = do + Just internal <- pure $ newInternal str + | _ => pure Nothing + internal <- btraverse newIORef internal + map Just $ newIORef internal +``` + +Wrapper with debugging output + +```idris +export +handleParserStateIODebug : HasIO io => + IORef (ParserInternal IORef) -> ParserState t -> io t +handleParserStateIODebug x (Note note) = do + state <- readIORef x + state <- btraverse readIORef state + _ <- fPutStrLn stderr "Note \{note} -> \{show state}" + pure () +handleParserStateIODebug x y = do + state <- readIORef x + state <- btraverse readIORef state + _ <- fPutStrLn stderr "\{show y} -> \{show state}" + handleParserStateIO x y +``` + +### State context + +```idris +unPS : ParserInternal Id -> ParserState a -> (a, ParserInternal Id) +unPS pi Save = (pi, pi) +unPS pi (Load pj) = ((), pi) +unPS pi (ParseChar predicate) = + if pi.end_of_input + then (EndOfInput, pi) + else let + char = assert_total $ strIndex pi.input (cast pi.position.index) + in if predicate char + then case refine0 (pi.position.index + 1) {p = IsIndex pi.length} of + Nothing => + (GotChar char, {end_of_input := True} pi) + Just (Element next _) => + (GotChar char, {position := MkIndex next} pi) + else (GotError char, pi) +unPS pi (ParseExactChar chr) = unPS pi (ParseChar (== chr)) +unPS pi ParseEoF = (pi.end_of_input, pi) +unPS pi (Note _) = ((), pi) + +export +runParserState : Has ParserState fs => + (s : ParserInternal Id) -> Eff fs t + -> Eff (fs - ParserState) (t, ParserInternal Id) +runParserState s = + handleRelayS s (\x, y => pure (y, x)) $ \s2,ps,f => + let (a, st) = unPS s2 ps + in f st a +``` + +## Footnotes + +[^1]: https://github.com/stefan-hoeck/idris2-barbies