Compare commits

...

24 commits

Author SHA1 Message Date
5a47d5548c json: Clean up json smoke test 2025-01-26 15:48:28 -05:00
4fb5707b25 json: Refactor string parser 2025-01-26 15:48:28 -05:00
91e1d2c9b1 json: More refactor 2025-01-26 15:48:28 -05:00
a3c7729ab2 json: Smoke test 2025-01-26 15:48:28 -05:00
a8c3901665 json: Show Fix 2025-01-26 15:48:28 -05:00
77dcc4d953 json: oneOfM refactor 2025-01-26 15:48:28 -05:00
aa1ae93165 json: object refactor 2025-01-26 15:48:28 -05:00
19ce8ac798 json: Bool and null 2025-01-26 15:48:28 -05:00
370bb18c06 json: number 2025-01-26 15:48:28 -05:00
3ad023ef6a json: Janky string 2025-01-26 15:48:28 -05:00
38e259fd13 json: Object and array 2025-01-26 15:48:28 -05:00
79d56aeddd json: Parser types 2025-01-26 15:48:28 -05:00
b70ed0e147 json: Define types, add sop 2025-01-26 15:48:28 -05:00
b018967cb1 json: create module 2025-01-26 15:48:28 -05:00
906ffb7877 numbers: Fix readme 2025-01-26 15:48:28 -05:00
aacabb8b22 numbers: make double non base-sensitive 2025-01-26 15:48:28 -05:00
026476dd91 numbers: Double Parser 2025-01-26 15:48:28 -05:00
9220d4bbac numbers: Integer parser 2025-01-26 15:48:28 -05:00
1cc6bea78e numbers: Nat parser 2025-01-26 15:48:28 -05:00
82b16a0e63 numbers: Nat unit tests 2025-01-26 15:48:28 -05:00
2b78275a4b numbers: Basic module structure 2025-01-26 15:48:28 -05:00
46b591283d numbers: Create numbers module 2025-01-26 15:48:28 -05:00
72ea53becf core: oneOfM refactor 2025-01-26 15:48:28 -05:00
59fba4584d core: nom and surround 2025-01-26 15:48:28 -05:00
5 changed files with 612 additions and 14 deletions

View file

@ -68,6 +68,14 @@ solution.
Internal state of a parser
- [Numbers](src/Parser/Numbers.md)
Parsers for numerical values in multiple bases
- [JSON](src/Parser/JSON.md)
JSON Parser
## Index of years and days
- 2015

View file

@ -19,6 +19,7 @@ depends = base
, tailrec
, eff
, elab-util
, sop
, ansi
, if-unsolved-implicit
, c-ffi
@ -32,6 +33,8 @@ modules = Runner
, Array
, Parser
, Parser.Interface
, Parser.Numbers
, Parser.JSON
-- main file (i.e. file to load at REPL)
main = Main

View file

@ -20,8 +20,11 @@ Combine the parser state at time of error with an error message.
```idris
public export
data ParseError : Type where
-- TODO: Rename this constructor
MkParseError : (state : ParserInternal Id) -> (message : String) -> ParseError
BeforeParse : (message : String) -> ParseError
NestedErrors : (state : ParserInternal Id) -> (message : String)
-> (rest : List ParseError) -> ParseError
```
<!-- idris
@ -34,6 +37,13 @@ Show ParseError where
in "Error at line \{line}, column \{col} (\{position}): \{message}"
show (BeforeParse message) =
"Error before parsing: \{message}"
show (NestedErrors state message rest) =
let rest = assert_total $joinBy "\n" . map ((" " ++) . show) $ rest
(line, col) = positionPair state
(line, col) = (show line, show col)
position = show state.position.index
first = "Error at line \{line}, column \{col} (\{position}): \{message}"
in "\{first}\n\{rest}"
-->
## Type Alias
@ -41,7 +51,7 @@ Show ParseError where
```idris
public export
Parser : Type -> Type
Parser a = Eff [ParserState, Except ParseError, Choose] a
Parser a = Eff [ParserState, Except ParseError] a
```
## Error Generation
@ -93,7 +103,7 @@ no paths in the `Choice` effect produce a returning parse.
export
rundownFirst : (f : Parser a) -> Eff [ParserState] (Either ParseError a)
rundownFirst f =
runExcept . guardMaybe "No returning parses" . runChoose {f = Maybe} $ f
runExcept f
```
Provide wrappers for `rundownFirst` for evaluating it in various contexts.
@ -148,27 +158,29 @@ tryMaybe f = try (map Just f) (\_ => pure Nothing)
export
tryEither : (f : Parser a) -> Parser (Either ParseError a)
tryEither f = try (map Right f) (pure . Left)
||| Converts any errors thrown by `f` into silent backtracking within `Choose`
export
tryEmpty : (f : Parser a) -> Parser a
tryEmpty f = try f (\_ => empty)
```
Attempt to parse one of the given input parsers, in the provided order, invoking
the provided error action on failure. This will suppress any errors returned by
the input parsers by mapping them to `empty`.
the provided error action on failure.
The state will not be modified when an input parser fails
```idris
export
oneOfE : Foldable f =>
(err : Parser a) -> f (Parser a) -> Parser a
oneOfE err xs = foldr altE err xs
oneOfE : (err : String) -> List (Parser a) -> Parser a
oneOfE err xs = do
start <- save
oneOfE' err start [] xs
where
altE : Parser a -> Parser a -> Parser a
altE f rest = (tryEmpty f) `alt` rest
oneOfE' : (err : String) -> (start : ParserInternal Id)
-> (errs : List ParseError) -> List (Parser a) -> Parser a
oneOfE' err start errs [] = do
throw $ NestedErrors start err (reverse errs)
oneOfE' err start errs (x :: xs) = do
x <- tryEither x
case x of
Right val => pure val
Left error => oneOfE' err start (error :: errs) xs
```
Attempt to parse 0+ of an item
@ -198,6 +210,7 @@ Lift a parser producing a `List` or `List1` of `Char` into a parser producing a
`String`
```idris
-- TODO: Rename these
export
parseString : Parser (List Char) -> Parser String
parseString x = do
@ -250,6 +263,40 @@ exactString str with (asList str)
pure input
```
Wrap a parser in delimiter characters, discarding the value of the delimiters
```idris
export
delimited : (before, after : Char) -> Parser a -> Parser a
delimited before after x = do
starting_state <- save
_ <- parseExactChar before
val <- x
Just _ <- tryMaybe $ parseExactChar after
| _ => throw $
MkParseError starting_state "Unmatched delimiter \{show before}"
pure val
```
Consume any number of characters of the provided character class and discard the
result. Also a version for doing so on both sides of a provided parser
```idris
export
nom : Parser Char -> Parser ()
nom x = do
_ <- many x
pure ()
export
surround : (around : Parser Char) -> (item : Parser a) -> Parser a
surround around item = do
nom around
val <- item
nom around
pure val
```
### Composition of boolean functions
```idris

283
src/Parser/JSON.md Normal file
View file

@ -0,0 +1,283 @@
# JSON Parser
```idris
module Parser.JSON
import public Parser
import public Parser.Numbers
import Structures.Dependent.DList
```
<!-- idris
import System
import Derive.Prelude
import Generics.Derive
%hide Generics.Derive.Eq
%hide Generics.Derive.Ord
%hide Generics.Derive.Show
%language ElabReflection
-->
## JSON components
Types a JSON value is allowed to have
```idris
public export
data JSONType : Type where
TObject : JSONType
TArray : JSONType
TString : JSONType
TNumber : JSONType
TBool : JSONType
TNull : JSONType
%runElab derive "JSONType" [Generic, Meta, Eq, Ord, Show, DecEq]
%name JSONType type, type2, type3
```
A JSON value indexed by its type
```idris
public export
data JSONValue : JSONType -> Type where
VObject : {types : List JSONType}
-> DList JSONType (\t => (String, JSONValue t)) types -> JSONValue TObject
VArray : {types : List JSONType}
-> DList JSONType JSONValue types -> JSONValue TArray
VString : (s : String) -> JSONValue TString
VNumber : (d : Double) -> JSONValue TNumber
VBool : (b : Bool) -> JSONValue TBool
VNull : JSONValue TNull
%name JSONValue value, value2, value3
```
<!-- idris
Show (JSONValue t) where
show (VObject xs) =
let xs = dMap (\_,(key, value) => "\"\{key}\":\{show value}") xs
in assert_total $ "{\{joinBy "," xs}}"
show (VArray xs) =
let xs = dMap (\_,e => show e) xs
in assert_total $ "[\{joinBy "," xs}]"
show (VString s) = "\"\{s}\""
show (VNumber d) = show d
show (VBool False) = "false"
show (VBool True) = "true"
show VNull = "null"
-- TODO: Deal with keys potentially having different orders in different objects
Eq (JSONValue t) where
(VObject xs) == (VObject ys) =
assert_total $ xs $== ys
(VArray xs) == (VArray ys) =
assert_total $ xs $== ys
(VString s) == (VString str) = s == str
(VNumber d) == (VNumber dbl) = d == dbl
(VBool b) == (VBool x) = b == x
VNull == VNull = True
%hide Language.Reflection.TT.WithFC.value
-->
## Parsers
We are going to get mutually recursive here. Instead of using a `mutual` block,
we will use the more modern style of declaring all our types ahead of our
definitions.
```idris
export
object : Parser (JSONValue TObject)
export
array : Parser (JSONValue TArray)
export
string : Parser (JSONValue TString)
export
number : Parser (JSONValue TNumber)
export
bool : Parser (JSONValue TBool)
export
null : Parser (JSONValue TNull)
```
Define a `whitespace` character class based on the json specifications
```idris
whitespace : Parser Char
whitespace = do
result <-
parseChar (\x => any (== x) $ the (List _) [' ', '\n', '\r', '\t']) id
case result of
GotChar char => pure char
GotError err => throwParseError "Expected whitespace, got: \{show err}"
EndOfInput => throwParseError "End of Input"
```
Convenience function
```idris
dpairize : {t : JSONType} ->
Parser (JSONValue t) -> Parser (t' : JSONType ** JSONValue t')
dpairize x = do
x <- x
pure (_ ** x)
```
Top level json value parser
```idris
export
value : Parser (t : JSONType ** JSONValue t)
value = do
surround whitespace $ oneOfE
"Expected JSON Value"
[
dpairize object
, dpairize array
, dpairize string
, dpairize number
, dpairize bool
, dpairize null
]
```
Now go through our json value types
```idris
object = do
oneOfE
"Expected Object"
[emptyObject, occupiedObject]
where
emptyObject : Parser (JSONValue TObject)
emptyObject = do
delimited '{' '}' (nom whitespace)
pure $ VObject Nil
keyValue : Parser (t : JSONType ** (String, JSONValue t))
keyValue = do
VString key <- surround whitespace string
_ <- parseExactChar ':'
(typ ** val) <- value
pure (typ ** (key, val))
restKeyValue : Parser (t : JSONType ** (String, JSONValue t))
restKeyValue = do
_ <- parseExactChar ','
keyValue
pairs : Parser (List1 (t : JSONType ** (String, JSONValue t)))
pairs = do
first <- keyValue
rest <- many restKeyValue
pure $ first ::: rest
occupiedObject : Parser (JSONValue TObject)
occupiedObject = do
val <- delimited '{' '}' pairs
let (types ** xs) = DList.fromList (forget val)
pure $ VObject xs
```
```idris
array = do
oneOfE
"Expected Array"
[emptyArray, occupiedArray]
where
emptyArray : Parser (JSONValue TArray)
emptyArray = do
delimited '[' ']' (nom whitespace)
pure $ VArray Nil
restValue : Parser (t : JSONType ** JSONValue t)
restValue = do
_ <- parseExactChar ','
value
values : Parser (List1 (t : JSONType ** JSONValue t))
values = do
first <- value
rest <- many restValue
pure $ first ::: rest
occupiedArray : Parser (JSONValue TArray)
occupiedArray = do
_ <- parseExactChar '['
xs <- values
_ <- parseExactChar ']'
-- TODO: Why is this busted?
-- xs <- delimited '[' ']' values
let (types ** xs) = DList.fromList (forget xs)
pure $ VArray xs
```
```idris
string = do
str <- parseString $ delimited '"' '"' (many stringCharacter)
pure $ VString str
where
-- TODO: Handle control characters properly
stringCharacter : Parser Char
stringCharacter = do
e1 <- parseError "Expected non-quote, got quote"
e2 <- parseError "End of input"
parseCharE (not . (== '"')) (\_ => e1) e2
```
```idris
number = do
d <- double
pure $ VNumber d
```
```idris
bool = do
oneOfE
"Expected Bool"
[true, false]
where
true : Parser (JSONValue TBool)
true = do
_ <- exactString "true"
pure $ VBool True
false : Parser (JSONValue TBool)
false = do
_ <- exactString "false"
pure $ VBool False
```
```idris
null = do
_ <- exactString "null"
pure VNull
```
## Unit tests
Quick smoke test
```idris
-- @@test JSON Quick Smoke
quickSmoke : IO Bool
quickSmoke = do
let input = "{\"string\":\"string\",\"number\":5,\"true\":true,\"false\":false,\"null\":null,\"array\":[1,2,3]}"
putStrLn input
Right (type ** parsed) <- runFirstIO value input
| Left err => do
printLn err
pure False
putStrLn "Input: \{input}\nOutput: \{show type} -> \{show parsed}"
let reference_object =
VObject [
("string", VString "string")
, ("number", VNumber 5.0)
, ("true", VBool True)
, ("false", VBool False)
, ("null", VNull)
, ("array", VArray [
VNumber 1.0
, VNumber 2.0
, VNumber 3.0
])
]
case type of
TObject => pure $ parsed == reference_object
_ => pure False
```

257
src/Parser/Numbers.md Normal file
View file

@ -0,0 +1,257 @@
# Numerical Parsers
```idris
module Parser.Numbers
import public Parser
import Data.Vect
import Control.Eff
```
<!-- idris
import System
-->
## Base Abstraction
```idris
public export
record Base where
constructor MkBase
base : Nat
digits : Vect base Char
%name Base b
export
hasDigit : Base -> Char -> Bool
hasDigit (MkBase base digits) c = any (== c) digits
export
digitValue : Base -> Char -> Maybe Nat
digitValue (MkBase base digits) c = digitValue' digits 0
where
digitValue' : Vect n Char -> (idx : Nat) -> Maybe Nat
digitValue' [] idx = Nothing
digitValue' (x :: xs) idx =
if x == c
then Just idx
else digitValue' xs (S idx)
public export
base10 : Base
base10 = MkBase 10
['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
public export
hex : Base
hex = MkBase 16
['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']
```
## Parsers
### Nat
```idris
export
nat : Base -> Parser Nat
nat b = do
error <- replaceError "Expected digit"
(first ::: rest) <- atLeastOne error parseDigit
pure $ foldl (\acc, e => 10 * acc + e) first rest
where
parseDigit : Parser Nat
parseDigit = do
GotChar char <- parseChar (hasDigit b) id
| GotError e => throwParseError "\{show e} is not a digit"
| EndOfInput => throwParseError "End Of Input"
case digitValue b char of
Nothing =>
throwParseError "Failed to parse as base \{show b.base}: \{show char}"
Just x => pure x
export
natBase10 : Parser Nat
natBase10 = nat base10
```
### Integer
```idris
export
integer : Base -> Parser Integer
integer b = do
negative <- map isJust . tryMaybe $ parseExactChar '-'
value <- map natToInteger $ nat b
if negative
then pure $ negate value
else pure $ value
export
integerBase10 : Parser Integer
integerBase10 = integer base10
```
### Double
```idris
-- TODO: Replicate `parseDouble` logic and make this base-generic
export
double : Parser Double
double = do
starting_state <- save
integer <- integer
fraction <- tryMaybe fraction
exponent <- tryMaybe exponent
let str = case (fraction, exponent) of
(Nothing, Nothing) =>
integer
(Nothing, (Just exponent)) =>
"\{integer}e\{exponent}"
((Just fraction), Nothing) =>
"\{integer}.\{fraction}"
((Just fraction), (Just exponent)) =>
"\{integer}.\{fraction}e\{exponent}"
Just out <- pure $ parseDouble str
| _ =>
throw $ MkParseError starting_state "Std failed to parse as double: \{str}"
pure out
where
parseDigit : Parser Char
parseDigit = do
GotChar char <- parseChar (hasDigit base10) id
| GotError e => throwParseError "\{show e} is not a digit"
| EndOfInput => throwParseError "End Of Input"
pure char
integer : Parser String
integer = do
sign <- tryMaybe $ parseExactChar '-'
error <- replaceError "Expected digit"
digits <- map forget $ atLeastOne error parseDigit
case sign of
Nothing => pure $ pack digits
Just x => pure $ pack (x :: digits)
fraction : Parser String
fraction = do
decimal <- parseExactChar '.'
error <- replaceError "Expected digit"
digits <- map forget $ atLeastOne error parseDigit
pure $ pack digits
exponent : Parser String
exponent = do
e <- parseTheseChars ['e', 'E']
sign <- parseTheseChars ['+', '-']
error <- replaceError "Expected digit"
digits <- map forget $ atLeastOne error parseDigit
pure . pack $ sign :: digits
```
## Unit tests
Test roundtripping a value through the provided parser
```idris
roundtrip : Eq a => Show a => a -> (p : Parser a) -> IO Bool
roundtrip x p = do
let string = show x
putStrLn "Roundtripping \{string}"
Just state <- newInternalIO string
| _ => do
putStrLn "Failed to produce parser for \{string}"
pure False
Right result <- runEff (rundownFirst p) [handleParserStateIO state] {m = IO}
| Left err => do
printLn err
pure False
putStrLn "Input: \{string} Output: \{show result}"
pure $ x == result
```
Do some roundtrip tests with the nat parser
```idris
-- @@test Nat round trip
natRoundTrip : IO Bool
natRoundTrip = pure $
!(roundtrip 0 natBase10)
&& !(roundtrip 1 natBase10)
&& !(roundtrip 100 natBase10)
&& !(roundtrip 1234 natBase10)
&& !(roundtrip 1234567890 natBase10)
&& !(roundtrip 1234567890000 natBase10)
&& !(roundtrip 12345678901234567890 natBase10)
```
```idris
-- @@test Integer round trip
integerRoundTrip : IO Bool
integerRoundTrip = pure $
!(roundtrip 0 integerBase10)
&& !(roundtrip 1 integerBase10)
&& !(roundtrip 100 integerBase10)
&& !(roundtrip 1234 integerBase10)
&& !(roundtrip 1234567890 integerBase10)
&& !(roundtrip 1234567890000 integerBase10)
&& !(roundtrip 12345678901234567890 integerBase10)
&& !(roundtrip (-1) integerBase10)
&& !(roundtrip (-100) integerBase10)
&& !(roundtrip (-1234) integerBase10)
&& !(roundtrip (-1234567890) integerBase10)
&& !(roundtrip (-1234567890000) integerBase10)
&& !(roundtrip (-12345678901234567890) integerBase10)
```
Compare our parsing of a double to the standard library's
```idris
compareDouble : String -> IO Bool
compareDouble string = do
Just state <- newInternalIO string
| _ => do
putStrLn "Failed to produce parser for \{string}"
pure False
Right result <-
runEff (rundownFirst double) [handleParserStateIO state] {m = IO}
| Left err => do
printLn err
pure False
putStrLn "Input: \{string} Output: \{show result}"
Just double' <- pure $ parseDouble string
| _ => do
printLn "Std failed to parse as double: \{string}"
pure False
pure $ result == double'
```
```idris
-- @@test Double Std Comparison
doubleRoundTrip : IO Bool
doubleRoundTrip = pure $
!(compareDouble "0")
&& !(compareDouble "1")
&& !(compareDouble "100")
&& !(compareDouble "1234")
&& !(compareDouble "1234567890")
&& !(compareDouble "1234567890000")
&& !(compareDouble "12345678901234567890")
&& !(compareDouble "-1")
&& !(compareDouble "-100")
&& !(compareDouble "-1234")
&& !(compareDouble "-1234567890")
&& !(compareDouble "-1234567890000")
&& !(compareDouble "-12345678901234567890")
&& !(compareDouble "0.0")
&& !(compareDouble "1.0")
&& !(compareDouble "-1.0")
&& !(compareDouble "-0.0")
&& !(compareDouble "-0.0")
&& !(compareDouble "0.1234")
&& !(compareDouble "0.01234")
&& !(compareDouble "-0.1234")
&& !(compareDouble "-0.01234")
&& !(compareDouble "1.234e+5")
&& !(compareDouble "1.234e-5")
&& !(compareDouble "-1.234e+5")
&& !(compareDouble "-1.234e-5")
```