Compare commits

...

45 commits

Author SHA1 Message Date
90c48b3672 json: delimited arrays 2025-01-26 21:09:58 -05:00
ef690db972 core: Properly restore state in delimited 2025-01-26 21:09:30 -05:00
93d4d876d9 core: Factor parseExactChar into the effect 2025-01-26 21:06:22 -05:00
08a2f263bb json: parse char result 2025-01-26 20:51:24 -05:00
40251a1455 numbers: ParseCharResult refactor 2025-01-26 20:51:10 -05:00
b1a4e1a941 core: Remove type argument from ParseCharResult 2025-01-26 20:51:00 -05:00
2e4ab42aa0 json: Notes 2025-01-26 20:06:53 -05:00
da44cf72cf core: Notes 2025-01-26 20:04:14 -05:00
da182e813f json: Use debuging runner in tests 2025-01-26 19:51:46 -05:00
06c4c8a9cf core: Debug wrapper for handleParserIO 2025-01-26 19:49:46 -05:00
5a47d5548c json: Clean up json smoke test 2025-01-26 15:48:28 -05:00
4fb5707b25 json: Refactor string parser 2025-01-26 15:48:28 -05:00
91e1d2c9b1 json: More refactor 2025-01-26 15:48:28 -05:00
a3c7729ab2 json: Smoke test 2025-01-26 15:48:28 -05:00
a8c3901665 json: Show Fix 2025-01-26 15:48:28 -05:00
77dcc4d953 json: oneOfM refactor 2025-01-26 15:48:28 -05:00
aa1ae93165 json: object refactor 2025-01-26 15:48:28 -05:00
19ce8ac798 json: Bool and null 2025-01-26 15:48:28 -05:00
370bb18c06 json: number 2025-01-26 15:48:28 -05:00
3ad023ef6a json: Janky string 2025-01-26 15:48:28 -05:00
38e259fd13 json: Object and array 2025-01-26 15:48:28 -05:00
79d56aeddd json: Parser types 2025-01-26 15:48:28 -05:00
b70ed0e147 json: Define types, add sop 2025-01-26 15:48:28 -05:00
b018967cb1 json: create module 2025-01-26 15:48:28 -05:00
906ffb7877 numbers: Fix readme 2025-01-26 15:48:28 -05:00
aacabb8b22 numbers: make double non base-sensitive 2025-01-26 15:48:28 -05:00
026476dd91 numbers: Double Parser 2025-01-26 15:48:28 -05:00
9220d4bbac numbers: Integer parser 2025-01-26 15:48:28 -05:00
1cc6bea78e numbers: Nat parser 2025-01-26 15:48:28 -05:00
82b16a0e63 numbers: Nat unit tests 2025-01-26 15:48:28 -05:00
2b78275a4b numbers: Basic module structure 2025-01-26 15:48:28 -05:00
46b591283d numbers: Create numbers module 2025-01-26 15:48:28 -05:00
72ea53becf core: oneOfM refactor 2025-01-26 15:48:28 -05:00
59fba4584d core: nom and surround 2025-01-26 15:48:28 -05:00
3029432699 core: export exactString 2025-01-25 05:09:41 -05:00
59f1eb31d0 core: exact string 2025-01-25 04:39:50 -05:00
994da7065c core: runParserState 2025-01-25 00:36:35 -05:00
1658e15487 core: runFirstIO 2025-01-25 00:36:35 -05:00
38c69c0ae3 core: parseTheseChars 2025-01-25 00:36:35 -05:00
7dba492535 core: parseExactChar 2025-01-24 22:39:22 -05:00
40dd87a4f3 core: ParseCharE 2025-01-24 22:39:22 -05:00
fa5eb61d59 core: spelling 2025-01-24 22:21:29 -05:00
9b12ebcf00 core: Add replaceError method 2025-01-24 22:21:29 -05:00
5e5ede87b4 core: Add show for ParserError 2025-01-24 22:21:29 -05:00
222ae17180 core: Beginnings of parser module 2025-01-24 07:11:04 -05:00
7 changed files with 1281 additions and 0 deletions

View file

@ -56,6 +56,26 @@ solution.
Provider wrappers over the standard library `IOArray` type to make them more Provider wrappers over the standard library `IOArray` type to make them more
ergonomic to use. ergonomic to use.
- [Parser](src/Parser.md)
Effectful parser mini-library
- [Interface](src/Parser/Interface.md)
Effectful parser API
- [ParserState](src/Parser/ParserState.md)
Internal state of a parser
- [Numbers](src/Parser/Numbers.md)
Parsers for numerical values in multiple bases
- [JSON](src/Parser/JSON.md)
JSON Parser
## Index of years and days ## Index of years and days
- 2015 - 2015

View file

@ -19,6 +19,7 @@ depends = base
, tailrec , tailrec
, eff , eff
, elab-util , elab-util
, sop
, ansi , ansi
, if-unsolved-implicit , if-unsolved-implicit
, c-ffi , c-ffi
@ -30,6 +31,10 @@ modules = Runner
, Util.Eff , Util.Eff
, Util.Digits , Util.Digits
, Array , Array
, Parser
, Parser.Interface
, Parser.Numbers
, Parser.JSON
-- main file (i.e. file to load at REPL) -- main file (i.e. file to load at REPL)
main = Main main = Main

8
src/Parser.md Normal file
View file

@ -0,0 +1,8 @@
# Parsing Utilties
```idris
module Parser
import public Parser.Interface as Parser
import public Parser.ParserState as Parser
```

332
src/Parser/Interface.md Normal file
View file

@ -0,0 +1,332 @@
# The interface of a `Parser`
```idris
module Parser.Interface
import public Data.List1
import public Parser.ParserState
import public Control.Eff
export infixr 4 >|
export infixr 5 >&
```
## Parser Errors
Combine the parser state at time of error with an error message.
```idris
public export
data ParseError : Type where
-- TODO: Rename this constructor
MkParseError : (state : ParserInternal Id) -> (message : String) -> ParseError
BeforeParse : (message : String) -> ParseError
NestedErrors : (state : ParserInternal Id) -> (message : String)
-> (rest : List ParseError) -> ParseError
```
<!-- idris
export
Show ParseError where
show (MkParseError state message) =
let (line, col) = positionPair state
(line, col) = (show line, show col)
position = show state.position.index
in "Error at line \{line}, column \{col} (\{position}): \{message}"
show (BeforeParse message) =
"Error before parsing: \{message}"
show (NestedErrors state message rest) =
let rest = assert_total $joinBy "\n" . map ((" " ++) . show) $ rest
(line, col) = positionPair state
(line, col) = (show line, show col)
position = show state.position.index
first = "Error at line \{line}, column \{col} (\{position}): \{message}"
in "\{first}\n\{rest}"
-->
## Type Alias
```idris
public export
Parser : Type -> Type
Parser a = Eff [ParserState, Except ParseError] a
```
## Error Generation
Provide a few effectful actions to generate an error from an error message, and
either return it or throw it.
```idris
export
parseError : Has ParserState fs => (message : String) -> Eff fs ParseError
parseError message = do
state <- save
pure $ MkParseError state message
export
throwParseError : Has ParserState fs => Has (Except ParseError) fs =>
(message : String) -> Eff fs a
throwParseError message = do
err <- parseError message
throw err
export
guardMaybe : Has ParserState fs => Has (Except ParseError) fs =>
(message : String) -> Eff fs (Maybe a) -> Eff fs a
guardMaybe message x = do
Just x <- x
| _ => throwParseError message
pure x
export
replaceError : (message : String) -> Parser (a -> Parser b)
replaceError message = do
state <- save
pure (\_ => throw $ MkParseError state message)
```
## Running a parser
We will use the phrasing "rundown" to refer to running all the effects in the
parser effect stack except `ParserState`, which is left in the effect stack to
facilitate handling in the context of another monad or effect stack, since it
benefits from mutability.
Rundown a parser, accepting the first returning parse, which may be failing or
succeding, and automatically generating a "no valid parses" message in the event
no paths in the `Choice` effect produce a returning parse.
```idris
export
rundownFirst : (f : Parser a) -> Eff [ParserState] (Either ParseError a)
rundownFirst f =
runExcept f
```
Provide wrappers for `rundownFirst` for evaluating it in various contexts.
```idris
export
runFirstIO : (f : Parser a) -> String -> IO (Either ParseError a)
runFirstIO f str = do
Just state <- newInternalIO str
| _ => pure . Left $ BeforeParse "Empty input"
runEff (rundownFirst f) [handleParserStateIO state]
export
runFirstIODebug : (f : Parser a) -> String -> IO (Either ParseError a)
runFirstIODebug f str = do
Just state <- newInternalIO str
| _ => pure . Left $ BeforeParse "Empty input"
runEff (rundownFirst f) [handleParserStateIODebug state]
export
runFirst : (f : Parser a) -> String -> Eff fs (Either ParseError a)
runFirst f str = do
Just state <- pure $ newInternal str
| _ => pure . Left $ BeforeParse "Empty input"
(result, _) <- lift . runParserState state . rundownFirst $ f
pure result
export
runFirst' : (f : Parser a) -> String -> Either ParseError a
runFirst' f str = extract $ runFirst f str {fs = []}
```
## Utility functionality
### Parser combinators
Try to run a computation in the context of the `Parser` effect stack, if it
fails (via `Except`), reset the state and resort to the supplied callback
Also supply a version specialized to ignore the error value, returning `Just a`
if the parse succeeds, and `Nothing` if it fails.
```idris
export
try : (f : Parser a) -> (err : ParseError -> Parser a) -> Parser a
try f err = do
starting_state <- save
result <- lift . runExcept $ f
case result of
Left error => do
load starting_state
err error
Right result => pure result
export
tryMaybe : (f : Parser a) -> Parser (Maybe a)
tryMaybe f = try (map Just f) (\_ => pure Nothing)
export
tryEither : (f : Parser a) -> Parser (Either ParseError a)
tryEither f = try (map Right f) (pure . Left)
```
Attempt to parse one of the given input parsers, in the provided order, invoking
the provided error action on failure.
The state will not be modified when an input parser fails
```idris
export
oneOfE : (err : String) -> List (Parser a) -> Parser a
oneOfE err xs = do
start <- save
oneOfE' err start [] xs
where
oneOfE' : (err : String) -> (start : ParserInternal Id)
-> (errs : List ParseError) -> List (Parser a) -> Parser a
oneOfE' err start errs [] = do
throw $ NestedErrors start err (reverse errs)
oneOfE' err start errs (x :: xs) = do
x <- tryEither x
case x of
Right val => pure val
Left error => oneOfE' err start (error :: errs) xs
```
Attempt to parse 0+ of an item
```idris
export
many : (f : Parser a) -> Parser (List a)
many f = do
Just next <- tryMaybe f
| _ => pure []
map (next ::) $ many f
```
Attempt to parse 1+ of an item, invoking the supplied error action on failure
```idris
export
atLeastOne : (err : ParseError -> Parser (List1 a)) -> (f : Parser a)
-> Parser (List1 a)
atLeastOne err f = do
Right next <- tryEither f
| Left e => err e
map (next :::) $ many f
```
Lift a parser producing a `List` or `List1` of `Char` into a parser producing a
`String`
```idris
-- TODO: Rename these
export
parseString : Parser (List Char) -> Parser String
parseString x = do
xs <- x
pure $ pack xs
export
parseString' : Parser (List1 Char) -> Parser String
parseString' x = parseString $ map forget x
```
Attempt to parse a specified character
```idris
export
parseExactChar : Char -> Parser Char
parseExactChar c = do
result <- parseExactChar' c
case result of
GotChar char => pure char
GotError err => throwParseError "Got \{show err} Expected \{show c}"
EndOfInput => throwParseError "End of input"
```
Attempt to parse one of a list of chars
```idris
export
parseTheseChars : List Char -> Parser Char
parseTheseChars cs = do
pnote "Parsing one of: \{show cs}"
result <- parseChar (\x => any (== x) cs)
case result of
GotChar char => pure char
GotError err => throwParseError "Got \{show err} Expected one of \{show cs}"
EndOfInput => throwParseError "End of input"
```
Attempt to parse an exact string
```idris
export
exactString : String -> Parser String
exactString str with (asList str)
exactString "" | [] = do
pnote "Parsing the empty string"
pure ""
exactString input@(strCons c str) | (c :: x) = do
pnote "Parsing exact string \{show input}"
GotChar next <- parseChar (== c)
| GotError err => throwParseError "Got \{show err} expected \{show c}"
| EndOfInput => throwParseError "End of input"
rest <- exactString str | x
pure input
```
Wrap a parser in delimiter characters, discarding the value of the delimiters
```idris
export
delimited : (before, after : Char) -> Parser a -> Parser a
delimited before after x = do
pnote "Parsing delimited by \{show before} \{show after}"
starting_state <- save
_ <- parseExactChar before
Right val <- tryEither x
| Left err => do
load starting_state
throw err
Just _ <- tryMaybe $ parseExactChar after
| _ => do
load starting_state
throw $ MkParseError starting_state "Unmatched delimiter \{show before}"
pure val
```
Consume any number of characters of the provided character class and discard the
result. Also a version for doing so on both sides of a provided parser
```idris
export
nom : Parser Char -> Parser ()
nom x = do
pnote "Nomming"
_ <- many x
pure ()
export
surround : (around : Parser Char) -> (item : Parser a) -> Parser a
surround around item = do
pnote "Surrounding"
nom around
val <- item
nom around
pure val
```
### Composition of boolean functions
```idris
||| Return true if both of the predicates evaluate to true
public export
(>&) : (a : e -> Bool) -> (b : e -> Bool) -> (e -> Bool)
(>&) a b x = a x && b x
```
```idris
||| Return true if either of the predicates evaulates to true
public export
(>|) : (a : e -> Bool) -> (b : e -> Bool) -> (e -> Bool)
(>|) a b x = a x || b x
```

290
src/Parser/JSON.md Normal file
View file

@ -0,0 +1,290 @@
# JSON Parser
```idris
module Parser.JSON
import public Parser
import public Parser.Numbers
import Structures.Dependent.DList
```
<!-- idris
import System
import Derive.Prelude
import Generics.Derive
%hide Generics.Derive.Eq
%hide Generics.Derive.Ord
%hide Generics.Derive.Show
%language ElabReflection
-->
## JSON components
Types a JSON value is allowed to have
```idris
public export
data JSONType : Type where
TObject : JSONType
TArray : JSONType
TString : JSONType
TNumber : JSONType
TBool : JSONType
TNull : JSONType
%runElab derive "JSONType" [Generic, Meta, Eq, Ord, Show, DecEq]
%name JSONType type, type2, type3
```
A JSON value indexed by its type
```idris
public export
data JSONValue : JSONType -> Type where
VObject : {types : List JSONType}
-> DList JSONType (\t => (String, JSONValue t)) types -> JSONValue TObject
VArray : {types : List JSONType}
-> DList JSONType JSONValue types -> JSONValue TArray
VString : (s : String) -> JSONValue TString
VNumber : (d : Double) -> JSONValue TNumber
VBool : (b : Bool) -> JSONValue TBool
VNull : JSONValue TNull
%name JSONValue value, value2, value3
```
<!-- idris
Show (JSONValue t) where
show (VObject xs) =
let xs = dMap (\_,(key, value) => "\"\{key}\":\{show value}") xs
in assert_total $ "{\{joinBy "," xs}}"
show (VArray xs) =
let xs = dMap (\_,e => show e) xs
in assert_total $ "[\{joinBy "," xs}]"
show (VString s) = "\"\{s}\""
show (VNumber d) = show d
show (VBool False) = "false"
show (VBool True) = "true"
show VNull = "null"
-- TODO: Deal with keys potentially having different orders in different objects
Eq (JSONValue t) where
(VObject xs) == (VObject ys) =
assert_total $ xs $== ys
(VArray xs) == (VArray ys) =
assert_total $ xs $== ys
(VString s) == (VString str) = s == str
(VNumber d) == (VNumber dbl) = d == dbl
(VBool b) == (VBool x) = b == x
VNull == VNull = True
%hide Language.Reflection.TT.WithFC.value
-->
## Parsers
We are going to get mutually recursive here. Instead of using a `mutual` block,
we will use the more modern style of declaring all our types ahead of our
definitions.
```idris
export
object : Parser (JSONValue TObject)
export
array : Parser (JSONValue TArray)
export
string : Parser (JSONValue TString)
export
number : Parser (JSONValue TNumber)
export
bool : Parser (JSONValue TBool)
export
null : Parser (JSONValue TNull)
```
Define a `whitespace` character class based on the json specifications
```idris
whitespace : Parser Char
whitespace = do
pnote "Whitespace character"
result <-
parseChar (\x => any (== x) $ the (List _) [' ', '\n', '\r', '\t'])
case result of
GotChar char => pure char
GotError err => throwParseError "Expected whitespace, got: \{show err}"
EndOfInput => throwParseError "End of Input"
```
Convenience function
```idris
dpairize : {t : JSONType} ->
Parser (JSONValue t) -> Parser (t' : JSONType ** JSONValue t')
dpairize x = do
x <- x
pure (_ ** x)
```
Top level json value parser
```idris
export
value : Parser (t : JSONType ** JSONValue t)
value = do
pnote "JSON Value"
surround whitespace $ oneOfE
"Expected JSON Value"
[
dpairize object
, dpairize array
, dpairize string
, dpairize number
, dpairize bool
, dpairize null
]
```
Now go through our json value types
```idris
object = do
pnote "JSON Object"
oneOfE
"Expected Object"
[emptyObject, occupiedObject]
where
emptyObject : Parser (JSONValue TObject)
emptyObject = do
delimited '{' '}' (nom whitespace)
pure $ VObject Nil
keyValue : Parser (t : JSONType ** (String, JSONValue t))
keyValue = do
VString key <- surround whitespace string
_ <- parseExactChar ':'
(typ ** val) <- value
pure (typ ** (key, val))
restKeyValue : Parser (t : JSONType ** (String, JSONValue t))
restKeyValue = do
_ <- parseExactChar ','
keyValue
pairs : Parser (List1 (t : JSONType ** (String, JSONValue t)))
pairs = do
first <- keyValue
rest <- many restKeyValue
pure $ first ::: rest
occupiedObject : Parser (JSONValue TObject)
occupiedObject = do
val <- delimited '{' '}' pairs
let (types ** xs) = DList.fromList (forget val)
pure $ VObject xs
```
```idris
array = do
pnote "JSON Array"
oneOfE
"Expected Array"
[emptyArray, occupiedArray]
where
emptyArray : Parser (JSONValue TArray)
emptyArray = do
delimited '[' ']' (nom whitespace)
pure $ VArray Nil
restValue : Parser (t : JSONType ** JSONValue t)
restValue = do
_ <- parseExactChar ','
value
values : Parser (List1 (t : JSONType ** JSONValue t))
values = do
first <- value
rest <- many restValue
pure $ first ::: rest
occupiedArray : Parser (JSONValue TArray)
occupiedArray = do
xs <- delimited '[' ']' values
let (types ** xs) = DList.fromList (forget xs)
pure $ VArray xs
```
```idris
string = do
pnote "JSON String"
str <- parseString $ delimited '"' '"' (many stringCharacter)
pure $ VString str
where
-- TODO: Handle control characters properly
stringCharacter : Parser Char
stringCharacter = do
result <- parseChar (not . (== '"'))
case result of
GotChar char => pure char
GotError err =>
throwParseError "Expected string character, got \{show err}"
EndOfInput => throwParseError "Unexpected end of input"
```
```idris
number = do
pnote "JSON Number"
d <- double
pure $ VNumber d
```
```idris
bool = do
pnote "JSON Bool"
oneOfE
"Expected Bool"
[true, false]
where
true : Parser (JSONValue TBool)
true = do
_ <- exactString "true"
pure $ VBool True
false : Parser (JSONValue TBool)
false = do
_ <- exactString "false"
pure $ VBool False
```
```idris
null = do
pnote "JSON null"
_ <- exactString "null"
pure VNull
```
## Unit tests
Quick smoke test
```idris
-- @@test JSON Quick Smoke
quickSmoke : IO Bool
quickSmoke = do
let input = "{\"string\":\"string\",\"number\":5,\"true\":true,\"false\":false,\"null\":null,\"array\":[1,2,3]}"
putStrLn input
Right (type ** parsed) <- runFirstIODebug value input
| Left err => do
printLn err
pure False
putStrLn "Input: \{input}\nOutput: \{show type} -> \{show parsed}"
let reference_object =
VObject [
("string", VString "string")
, ("number", VNumber 5.0)
, ("true", VBool True)
, ("false", VBool False)
, ("null", VNull)
, ("array", VArray [
VNumber 1.0
, VNumber 2.0
, VNumber 3.0
])
]
case type of
TObject => pure $ parsed == reference_object
_ => pure False
```

257
src/Parser/Numbers.md Normal file
View file

@ -0,0 +1,257 @@
# Numerical Parsers
```idris
module Parser.Numbers
import public Parser
import Data.Vect
import Control.Eff
```
<!-- idris
import System
-->
## Base Abstraction
```idris
public export
record Base where
constructor MkBase
base : Nat
digits : Vect base Char
%name Base b
export
hasDigit : Base -> Char -> Bool
hasDigit (MkBase base digits) c = any (== c) digits
export
digitValue : Base -> Char -> Maybe Nat
digitValue (MkBase base digits) c = digitValue' digits 0
where
digitValue' : Vect n Char -> (idx : Nat) -> Maybe Nat
digitValue' [] idx = Nothing
digitValue' (x :: xs) idx =
if x == c
then Just idx
else digitValue' xs (S idx)
public export
base10 : Base
base10 = MkBase 10
['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
public export
hex : Base
hex = MkBase 16
['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']
```
## Parsers
### Nat
```idris
export
nat : Base -> Parser Nat
nat b = do
error <- replaceError "Expected digit"
(first ::: rest) <- atLeastOne error parseDigit
pure $ foldl (\acc, e => 10 * acc + e) first rest
where
parseDigit : Parser Nat
parseDigit = do
GotChar char <- parseChar (hasDigit b)
| GotError e => throwParseError "\{show e} is not a digit"
| EndOfInput => throwParseError "End Of Input"
case digitValue b char of
Nothing =>
throwParseError "Failed to parse as base \{show b.base}: \{show char}"
Just x => pure x
export
natBase10 : Parser Nat
natBase10 = nat base10
```
### Integer
```idris
export
integer : Base -> Parser Integer
integer b = do
negative <- map isJust . tryMaybe $ parseExactChar '-'
value <- map natToInteger $ nat b
if negative
then pure $ negate value
else pure $ value
export
integerBase10 : Parser Integer
integerBase10 = integer base10
```
### Double
```idris
-- TODO: Replicate `parseDouble` logic and make this base-generic
export
double : Parser Double
double = do
starting_state <- save
integer <- integer
fraction <- tryMaybe fraction
exponent <- tryMaybe exponent
let str = case (fraction, exponent) of
(Nothing, Nothing) =>
integer
(Nothing, (Just exponent)) =>
"\{integer}e\{exponent}"
((Just fraction), Nothing) =>
"\{integer}.\{fraction}"
((Just fraction), (Just exponent)) =>
"\{integer}.\{fraction}e\{exponent}"
Just out <- pure $ parseDouble str
| _ =>
throw $ MkParseError starting_state "Std failed to parse as double: \{str}"
pure out
where
parseDigit : Parser Char
parseDigit = do
GotChar char <- parseChar (hasDigit base10)
| GotError e => throwParseError "\{show e} is not a digit"
| EndOfInput => throwParseError "End Of Input"
pure char
integer : Parser String
integer = do
sign <- tryMaybe $ parseExactChar '-'
error <- replaceError "Expected digit"
digits <- map forget $ atLeastOne error parseDigit
case sign of
Nothing => pure $ pack digits
Just x => pure $ pack (x :: digits)
fraction : Parser String
fraction = do
decimal <- parseExactChar '.'
error <- replaceError "Expected digit"
digits <- map forget $ atLeastOne error parseDigit
pure $ pack digits
exponent : Parser String
exponent = do
e <- parseTheseChars ['e', 'E']
sign <- parseTheseChars ['+', '-']
error <- replaceError "Expected digit"
digits <- map forget $ atLeastOne error parseDigit
pure . pack $ sign :: digits
```
## Unit tests
Test roundtripping a value through the provided parser
```idris
roundtrip : Eq a => Show a => a -> (p : Parser a) -> IO Bool
roundtrip x p = do
let string = show x
putStrLn "Roundtripping \{string}"
Just state <- newInternalIO string
| _ => do
putStrLn "Failed to produce parser for \{string}"
pure False
Right result <- runEff (rundownFirst p) [handleParserStateIO state] {m = IO}
| Left err => do
printLn err
pure False
putStrLn "Input: \{string} Output: \{show result}"
pure $ x == result
```
Do some roundtrip tests with the nat parser
```idris
-- @@test Nat round trip
natRoundTrip : IO Bool
natRoundTrip = pure $
!(roundtrip 0 natBase10)
&& !(roundtrip 1 natBase10)
&& !(roundtrip 100 natBase10)
&& !(roundtrip 1234 natBase10)
&& !(roundtrip 1234567890 natBase10)
&& !(roundtrip 1234567890000 natBase10)
&& !(roundtrip 12345678901234567890 natBase10)
```
```idris
-- @@test Integer round trip
integerRoundTrip : IO Bool
integerRoundTrip = pure $
!(roundtrip 0 integerBase10)
&& !(roundtrip 1 integerBase10)
&& !(roundtrip 100 integerBase10)
&& !(roundtrip 1234 integerBase10)
&& !(roundtrip 1234567890 integerBase10)
&& !(roundtrip 1234567890000 integerBase10)
&& !(roundtrip 12345678901234567890 integerBase10)
&& !(roundtrip (-1) integerBase10)
&& !(roundtrip (-100) integerBase10)
&& !(roundtrip (-1234) integerBase10)
&& !(roundtrip (-1234567890) integerBase10)
&& !(roundtrip (-1234567890000) integerBase10)
&& !(roundtrip (-12345678901234567890) integerBase10)
```
Compare our parsing of a double to the standard library's
```idris
compareDouble : String -> IO Bool
compareDouble string = do
Just state <- newInternalIO string
| _ => do
putStrLn "Failed to produce parser for \{string}"
pure False
Right result <-
runEff (rundownFirst double) [handleParserStateIO state] {m = IO}
| Left err => do
printLn err
pure False
putStrLn "Input: \{string} Output: \{show result}"
Just double' <- pure $ parseDouble string
| _ => do
printLn "Std failed to parse as double: \{string}"
pure False
pure $ result == double'
```
```idris
-- @@test Double Std Comparison
doubleRoundTrip : IO Bool
doubleRoundTrip = pure $
!(compareDouble "0")
&& !(compareDouble "1")
&& !(compareDouble "100")
&& !(compareDouble "1234")
&& !(compareDouble "1234567890")
&& !(compareDouble "1234567890000")
&& !(compareDouble "12345678901234567890")
&& !(compareDouble "-1")
&& !(compareDouble "-100")
&& !(compareDouble "-1234")
&& !(compareDouble "-1234567890")
&& !(compareDouble "-1234567890000")
&& !(compareDouble "-12345678901234567890")
&& !(compareDouble "0.0")
&& !(compareDouble "1.0")
&& !(compareDouble "-1.0")
&& !(compareDouble "-0.0")
&& !(compareDouble "-0.0")
&& !(compareDouble "0.1234")
&& !(compareDouble "0.01234")
&& !(compareDouble "-0.1234")
&& !(compareDouble "-0.01234")
&& !(compareDouble "1.234e+5")
&& !(compareDouble "1.234e-5")
&& !(compareDouble "-1.234e+5")
&& !(compareDouble "-1.234e-5")
```

369
src/Parser/ParserState.md Normal file
View file

@ -0,0 +1,369 @@
# Parser State
An effectful description of the text a parser consumes
```idris
module Parser.ParserState
import public Data.String
import public Data.DPair
import public Data.Refined
import public Data.Refined.Int64
import public Data.SortedMap
import public Data.IORef
import Data.Primitives.Interpolation
import System.File
import public Control.Eff
```
## Barbie Basics
Barbies are types that can "change their clothes", in Idris, this manifests as a
type indexed by a type-level function that affects the types of the fields.
Since we know our usage here is going to be quite simple, and not even really
making use of dependently typed fun, we are going to implement all the barbie
functionality we need by hand, but if you feel like barbies might be a good fit
for your problem, or you simply want to learn more, please check out a library
like `barbies`[^1]
```idris
public export
Id : Type -> Type
Id x = x
```
## Internal State of a Parser
Type alias for our refined `Int64`s
```idris
public export
0 IsIndex : (length : Int64) -> Int64 -> Type
IsIndex length = From 0 && LessThan length
public export
record Index (length : Int64) where
constructor MkIndex
index : Int64
{auto 0 prf : IsIndex length index}
```
<!-- idris
export
Eq (Index i) where
x == y = x.index == y.index
export
Ord (Index i) where
compare x y = compare x.index y.index
export
Show (Index i) where
show (MkIndex index) = show index
-->
Stores the location we are currently at in the string, and metadata about it for
providing good error messages. Parsing an empty input isn't very interesting, so
we exclude inputs of length zero, since that will make other things easier.
```idris
||| State representing a parser's position in the text
public export
record ParserInternal (f : Type -> Type) where
constructor MkInternal
-- IDEA: Maybe go full barbie and have this be a field, so that we can, say,
-- read directly from a file instead of from an already loaded string using the
-- same parser
||| The input string
input : String
||| The length of the input string
length : Int64
{auto 0 len_prf : length = cast (strLength input)}
||| A sorted set containing the positions of the start of each line
line_starts : SortedMap (Index length) Nat
||| The position of the next character to read in the input
position : f (Index length)
||| True if we have hit the end of input
end_of_input : f Bool
%name ParserInternal pi, pj, pk
```
### ParserInternal Methods
Construct a `ParserInternal` from an input string. Will fail if the input is
empty, because then we can't index it.
```idris
export
newInternal : (input : String) -> Maybe (ParserInternal Id)
newInternal input =
-- Check if we have at least one character in the input
case refine0 0 {p = IsIndex (cast (strLength input))} of
Nothing => Nothing
Just (Element position _) => Just $
MkInternal input
(cast (strLength input))
(mkStarts' input (MkIndex position))
(MkIndex position)
False
where
partial
mkStarts :
(str : String) -> (acc : List (Index (cast (strLength str)), Nat))
-> (idx : Index (cast (strLength str))) -> (count : Nat) -> (next : Bool)
-> List (Index (cast (strLength str)), Nat)
mkStarts str acc idx count True =
mkStarts str ((idx, count) :: acc) idx (S count) False
mkStarts str acc idx count False =
case refine0 (idx.index + 1) {p = IsIndex (cast (strLength str))} of
Nothing => acc
Just (Element next _) =>
if strIndex str (cast idx.index) == '\n'
then mkStarts str acc (MkIndex next) count True
else mkStarts str acc (MkIndex next) count False
mkStarts' : (str : String) -> (start : Index (cast (strLength str)))
-> SortedMap (Index (cast (strLength str))) Nat
mkStarts' str start =
let
pairs = assert_total $
mkStarts str [] start 0 True
in fromList pairs
```
Get the current line and column number
```idris
||| Returns the current position of the parser cursor in, zero indexed, (line,
||| column) form
export
positionPair : (pi : ParserInternal Id) -> (Nat, Nat)
positionPair pi =
case lookup pi.position pi.line_starts of
Just line => (line, 0)
Nothing =>
case lookupBetween pi.position pi.line_starts of
-- There will always be at least one line start, and we would have hit
-- the previous case if we were at the start of the first one, so if
-- there isn't a before, we can return a nonsense value safely
(Nothing, _) => (0, 0)
(Just (start, linum), after) =>
-- Our index will always be after the start of the line, for previously
-- mentioned reasons, so this cast is safe
let col = cast {to = Nat} $ pi.position.index - start.index
in (linum, col)
```
<!-- idris
export
Show (ParserInternal Id) where
show pi =
let (line, col) = positionPair pi
current = assert_total $ strIndex pi.input (cast pi.position.index)
pos = pi.position.index
eof = show pi.end_of_input
in "Position: \{pos}(\{line}, \{col}}) Value: \{show current} EoF: \{eof}"
-->
### More Barbie Functionality
Provide the barbie analogs of `map` and `traverse` for our `ParserInternal`
type, allowing us to change the type the values in a `ParserInternal` by mapping
over those values.
```idris
export
bmap : ({0 a : Type} -> f a -> g a) -> ParserInternal f -> ParserInternal g
-- bmap f = bmap_ (\_ => f)
bmap fun (MkInternal input length line_starts position end_of_input) =
let position' = fun position
end_of_input' = fun end_of_input
in MkInternal input length line_starts position' end_of_input'
export
btraverse : Applicative e => ({0 a : Type} -> f a -> e (g a))
-> ParserInternal f -> e (ParserInternal g)
btraverse fun (MkInternal input length line_starts position end_of_input) =
let pures = (MkInternal input length line_starts)
in [| pures (fun position) (fun end_of_input)|]
```
## Three way result
```idris
||| Three way result returned from attempting to parse a single char
public export
data ParseCharResult : Type where
GotChar : (char : Char) -> ParseCharResult
GotError : (err : Char) -> ParseCharResult
EndOfInput : ParseCharResult
```
## The Effect Type
```idris
export
data ParserState : Type -> Type where
Save : ParserState (ParserInternal Id)
Load : (ParserInternal Id) -> ParserState ()
-- TODO: Maybe add a ParseString that parses a string of characters as a
-- string using efficent slicing?
ParseChar : (predicate : Char -> Bool) -> ParserState ParseCharResult
ParseExactChar : (char : Char) -> ParserState ParseCharResult
ParseEoF : ParserState Bool
Note : Lazy String -> ParserState ()
```
<!-- idris
Show (ParserState t) where
show Save = "Saving state"
show (Load pi) = "Loading state \{show pi}"
show (ParseChar predicate) = "Parsing char"
show (ParseExactChar char) = "Parsing char \{show char}"
show ParseEoF = "Parsing EoF"
show (Note _) = "Note"
-->
### Actions
```idris
||| Return the current state, for potential later reloading
export
save : Has ParserState fs => Eff fs (ParserInternal Id)
save = send Save
||| Reset to the provided state
export
load : Has ParserState fs => ParserInternal Id -> Eff fs ()
load pi = send $ Load pi
||| Attempt to parse a char, checking to see if it complies with the supplied
||| predicate, updates the state if parsing succeeds, does not alter it in an
||| error condition.
export
parseChar : Has ParserState fs => (predicate : Char -> Bool)
-> Eff fs ParseCharResult
parseChar predicate = send $ ParseChar predicate
||| Parse a char by exact value
export
parseExactChar' : Has ParserState fs => (chr : Char) -> Eff fs ParseCharResult
parseExactChar' chr = send $ ParseExactChar chr
||| "Parse" the end of input, returning `True` if the parser state is currently
||| at the end of the input
export
parseEoF : Has ParserState fs => Eff fs Bool
parseEoF = send ParseEoF
||| Make a note to be output when running with a debug handler
export
pnote : Has ParserState fs => Lazy String -> Eff fs ()
pnote x = send $ Note x
```
## Handling a ParserState
### IO Context
```idris
export
handleParserStateIO : HasIO io =>
IORef (ParserInternal IORef) -> ParserState t -> io t
handleParserStateIO pi Save = do
pi <- readIORef pi
btraverse readIORef pi
handleParserStateIO pi (Load pj) = do
pj <- btraverse newIORef pj
writeIORef pi pj
handleParserStateIO pi (ParseChar predicate) = do
pi <- readIORef pi
False <- readIORef pi.end_of_input
| _ => pure EndOfInput
position <- readIORef pi.position
let char = assert_total $ strIndex pi.input (cast position.index)
True <- pure $ predicate char
| _ => pure $ GotError char
-- Our refinement type on the position forces us to check that the length is
-- in bounds after incrementing it, if its out of bounds, set the end_of_input
-- flag
case refine0 (position.index + 1) {p = IsIndex pi.length} of
Nothing => do
writeIORef pi.end_of_input True
pure $ GotChar char
Just (Element next _) => do
writeIORef pi.position $ MkIndex next
pure $ GotChar char
handleParserStateIO pi (ParseExactChar chr) = do
-- TODO: do this directly?
handleParserStateIO pi (ParseChar (== chr))
handleParserStateIO pi ParseEoF = do
pi <- readIORef pi
readIORef pi.end_of_input
-- We ignore notes in non-debug mode
handleParserStateIO pi (Note _) = pure ()
export
newInternalIO : HasIO io => String -> io $ Maybe (IORef (ParserInternal IORef))
newInternalIO str = do
Just internal <- pure $ newInternal str
| _ => pure Nothing
internal <- btraverse newIORef internal
map Just $ newIORef internal
```
Wrapper with debugging output
```idris
export
handleParserStateIODebug : HasIO io =>
IORef (ParserInternal IORef) -> ParserState t -> io t
handleParserStateIODebug x (Note note) = do
state <- readIORef x
state <- btraverse readIORef state
_ <- fPutStrLn stderr "Note \{note} -> \{show state}"
pure ()
handleParserStateIODebug x y = do
state <- readIORef x
state <- btraverse readIORef state
_ <- fPutStrLn stderr "\{show y} -> \{show state}"
handleParserStateIO x y
```
### State context
```idris
unPS : ParserInternal Id -> ParserState a -> (a, ParserInternal Id)
unPS pi Save = (pi, pi)
unPS pi (Load pj) = ((), pi)
unPS pi (ParseChar predicate) =
if pi.end_of_input
then (EndOfInput, pi)
else let
char = assert_total $ strIndex pi.input (cast pi.position.index)
in if predicate char
then case refine0 (pi.position.index + 1) {p = IsIndex pi.length} of
Nothing =>
(GotChar char, {end_of_input := True} pi)
Just (Element next _) =>
(GotChar char, {position := MkIndex next} pi)
else (GotError char, pi)
unPS pi (ParseExactChar chr) = unPS pi (ParseChar (== chr))
unPS pi ParseEoF = (pi.end_of_input, pi)
unPS pi (Note _) = ((), pi)
export
runParserState : Has ParserState fs =>
(s : ParserInternal Id) -> Eff fs t
-> Eff (fs - ParserState) (t, ParserInternal Id)
runParserState s =
handleRelayS s (\x, y => pure (y, x)) $ \s2,ps,f =>
let (a, st) = unPS s2 ps
in f st a
```
## Footnotes
[^1]: https://github.com/stefan-hoeck/idris2-barbies