core: Beginnings of parser module
This commit is contained in:
parent
6d130cdc3b
commit
222ae17180
5 changed files with 481 additions and 0 deletions
12
README.md
12
README.md
|
@ -56,6 +56,18 @@ solution.
|
|||
Provider wrappers over the standard library `IOArray` type to make them more
|
||||
ergonomic to use.
|
||||
|
||||
- [Parser](src/Parser.md)
|
||||
|
||||
Effectful parser mini-library
|
||||
|
||||
- [Interface](src/Parser/Interface.md)
|
||||
|
||||
Effectful parser API
|
||||
|
||||
- [ParserState](src/Parser/ParserState.md)
|
||||
|
||||
Internal state of a parser
|
||||
|
||||
## Index of years and days
|
||||
|
||||
- 2015
|
||||
|
|
|
@ -30,6 +30,8 @@ modules = Runner
|
|||
, Util.Eff
|
||||
, Util.Digits
|
||||
, Array
|
||||
, Parser
|
||||
, Parser.Interface
|
||||
|
||||
-- main file (i.e. file to load at REPL)
|
||||
main = Main
|
||||
|
|
8
src/Parser.md
Normal file
8
src/Parser.md
Normal file
|
@ -0,0 +1,8 @@
|
|||
# Parsing Utilties
|
||||
|
||||
```idris
|
||||
module Parser
|
||||
|
||||
import public Parser.Interface as Parser
|
||||
import public Parser.ParserState as Parser
|
||||
```
|
186
src/Parser/Interface.md
Normal file
186
src/Parser/Interface.md
Normal file
|
@ -0,0 +1,186 @@
|
|||
# The interface of a `Parser`
|
||||
|
||||
```idris
|
||||
module Parser.Interface
|
||||
|
||||
import public Data.List1
|
||||
|
||||
import public Parser.ParserState
|
||||
|
||||
import public Control.Eff
|
||||
|
||||
export infixr 4 >|
|
||||
export infixr 5 >&
|
||||
```
|
||||
|
||||
## Parser Errors
|
||||
|
||||
Combine the parser state at time of error with an error message.
|
||||
|
||||
```idris
|
||||
public export
|
||||
record ParseError where
|
||||
constructor MkParseError
|
||||
state : ParserInternal Id
|
||||
message : String
|
||||
```
|
||||
|
||||
## Type Alias
|
||||
|
||||
```idris
|
||||
public export
|
||||
Parser : Type -> Type
|
||||
Parser a = Eff [ParserState, Except ParseError, Choose] a
|
||||
```
|
||||
|
||||
## Error Generation
|
||||
|
||||
Provide a few effectful actions to generate an error from an error message, and
|
||||
either return it or throw it.
|
||||
|
||||
```idris
|
||||
export
|
||||
parseError : Has ParserState fs => (message : String) -> Eff fs ParseError
|
||||
parseError message = do
|
||||
state <- save
|
||||
pure $ MkParseError state message
|
||||
|
||||
export
|
||||
throwParseError : Has ParserState fs => Has (Except ParseError) fs =>
|
||||
(message : String) -> Eff fs a
|
||||
throwParseError message = do
|
||||
err <- parseError message
|
||||
throw err
|
||||
|
||||
export
|
||||
guardMaybe : Has ParserState fs => Has (Except ParseError) fs =>
|
||||
(message : String) -> Eff fs (Maybe a) -> Eff fs a
|
||||
guardMaybe message x = do
|
||||
Just x <- x
|
||||
| _ => throwParseError message
|
||||
pure x
|
||||
```
|
||||
|
||||
## Running a parser
|
||||
|
||||
We will use the phrasing "rundown" to refer to running all the effects in the
|
||||
parser effect stack except `ParserState`, which is left in the effect stack to
|
||||
facilitate handling in the context of another monad or effect stack, since it
|
||||
benefits from mutability.
|
||||
|
||||
Rundown a parser, accepting the first returning parse, which may be failing or
|
||||
succeding, and automatically generating a "no valid parses" message in the event
|
||||
no paths in the `Choice` effect produce a returning parse.
|
||||
|
||||
```idris
|
||||
export
|
||||
rundownFirst : (f : Parser a) -> Eff [ParserState] (Either ParseError a)
|
||||
rundownFirst f =
|
||||
runExcept . guardMaybe "No returning parses" . runChoose {f = Maybe} $ f
|
||||
```
|
||||
|
||||
## Utility functionality
|
||||
|
||||
### Parser combinators
|
||||
|
||||
Try to run a computation in the context of the `Parser` effect stack, if it
|
||||
fails (via `Except`), reset the state and resort to the supplied callback
|
||||
|
||||
Also supply a version specialized to ignore the error value, returning `Just a`
|
||||
if the parse succeeds, and `Nothing` if it fails.
|
||||
|
||||
```idris
|
||||
export
|
||||
try : (f : Parser a) -> (err : ParseError -> Parser a) -> Parser a
|
||||
try f err = do
|
||||
starting_state <- save
|
||||
result <- lift . runExcept $ f
|
||||
case result of
|
||||
Left error => do
|
||||
load starting_state
|
||||
err error
|
||||
Right result => pure result
|
||||
|
||||
export
|
||||
tryMaybe : (f : Parser a) -> Parser (Maybe a)
|
||||
tryMaybe f = try (map Just f) (\_ => pure Nothing)
|
||||
|
||||
export
|
||||
tryEither : (f : Parser a) -> Parser (Either ParseError a)
|
||||
tryEither f = try (map Right f) (pure . Left)
|
||||
|
||||
||| Converts any errors thrown by `f` into silent backtracking within `Choose`
|
||||
export
|
||||
tryEmpty : (f : Parser a) -> Parser a
|
||||
tryEmpty f = try f (\_ => empty)
|
||||
```
|
||||
|
||||
Attempt to parse one of the given input parsers, in the provided order, invoking
|
||||
the provided error action on failure. This will suppress any errors returned by
|
||||
the input parsers by mapping them to `empty`.
|
||||
|
||||
The state will not be modified when an input parser fails
|
||||
|
||||
```idris
|
||||
export
|
||||
oneOfE : Foldable f =>
|
||||
(err : Parser a) -> f (Parser a) -> Parser a
|
||||
oneOfE err xs = foldr altE err xs
|
||||
where
|
||||
altE : Parser a -> Parser a -> Parser a
|
||||
altE f rest = (tryEmpty f) `alt` rest
|
||||
```
|
||||
|
||||
Attempt to parse 0+ of an item
|
||||
|
||||
```idris
|
||||
export
|
||||
many : (f : Parser a) -> Parser (List a)
|
||||
many f = do
|
||||
Just next <- tryMaybe f
|
||||
| _ => pure []
|
||||
map (next ::) $ many f
|
||||
```
|
||||
|
||||
Attempt to parse 1+ of an item, invoking the supplied error action on failure
|
||||
|
||||
```idris
|
||||
export
|
||||
atLeastOne : (err : ParseError -> Parser (List1 a)) -> (f : Parser a)
|
||||
-> Parser (List1 a)
|
||||
atLeastOne err f = do
|
||||
Right next <- tryEither f
|
||||
| Left e => err e
|
||||
map (next :::) $ many f
|
||||
```
|
||||
|
||||
Lift a parser producing a `List` or `List1` of `Char` into a parser producing a
|
||||
`String`
|
||||
|
||||
```idris
|
||||
export
|
||||
parseString : Parser (List Char) -> Parser String
|
||||
parseString x = do
|
||||
xs <- x
|
||||
pure $ pack xs
|
||||
|
||||
export
|
||||
parseString' : Parser (List1 Char) -> Parser String
|
||||
parseString' x = parseString $ map forget x
|
||||
```
|
||||
|
||||
### Composition of boolean functions
|
||||
|
||||
```idris
|
||||
||| Return true if both of the predicates evaluate to true
|
||||
public export
|
||||
(>&) : (a : e -> Bool) -> (b : e -> Bool) -> (e -> Bool)
|
||||
(>&) a b x = a x && b x
|
||||
```
|
||||
|
||||
```idris
|
||||
||| Return true if either of the predicates evaulates to true
|
||||
public export
|
||||
(>|) : (a : e -> Bool) -> (b : e -> Bool) -> (e -> Bool)
|
||||
(>|) a b x = a x || b x
|
||||
```
|
273
src/Parser/ParserState.md
Normal file
273
src/Parser/ParserState.md
Normal file
|
@ -0,0 +1,273 @@
|
|||
# Parser State
|
||||
|
||||
An effectful description of the text a parser consumes
|
||||
|
||||
```idris
|
||||
module Parser.ParserState
|
||||
|
||||
import public Data.String
|
||||
import public Data.DPair
|
||||
import public Data.Refined
|
||||
import public Data.Refined.Int64
|
||||
import public Data.SortedMap
|
||||
import public Data.IORef
|
||||
|
||||
import public Control.Eff
|
||||
```
|
||||
|
||||
## Barbie Basics
|
||||
|
||||
Barbies are types that can "change their clothes", in Idris, this manifests as a
|
||||
type indexed by a type-level function that affects the types of the fields.
|
||||
|
||||
Since we know our usage here is going to be quite simple, and not even really
|
||||
making use of dependently typed fun, we are going to implement all the barbie
|
||||
functionality we need by hand, but if you feel like barbies might be a good fit
|
||||
for your problem, or you simply want to learn more, please check out a library
|
||||
like `barbies`[^1]
|
||||
|
||||
```idris
|
||||
public export
|
||||
Id : Type -> Type
|
||||
Id x = x
|
||||
```
|
||||
|
||||
## Internal State of a Parser
|
||||
|
||||
Type alias for our refined `Int64`s
|
||||
|
||||
```idris
|
||||
public export
|
||||
0 IsIndex : (length : Int64) -> Int64 -> Type
|
||||
IsIndex length = From 0 && LessThan length
|
||||
|
||||
public export
|
||||
record Index (length : Int64) where
|
||||
constructor MkIndex
|
||||
index : Int64
|
||||
{auto 0 prf : IsIndex length index}
|
||||
```
|
||||
|
||||
<!-- idris
|
||||
Eq (Index i) where
|
||||
x == y = x.index == y.index
|
||||
|
||||
Ord (Index i) where
|
||||
compare x y = compare x.index y.index
|
||||
-->
|
||||
|
||||
Stores the location we are currently at in the string, and metadata about it for
|
||||
providing good error messages. Parsing an empty input isn't very interesting, so
|
||||
we exclude inputs of length zero, since that will make other things eaiser.
|
||||
|
||||
```idris
|
||||
||| State representing a parser's position in the text
|
||||
public export
|
||||
record ParserInternal (f : Type -> Type) where
|
||||
constructor MkInternal
|
||||
-- IDEA: Maybe go full barbie and have this be a field, so that we can, say,
|
||||
-- read directly from a file instead of from an already loaded string using the
|
||||
-- same parser
|
||||
||| The input string
|
||||
input : String
|
||||
||| The length of the input string
|
||||
length : Int64
|
||||
{auto 0 len_prf : length = cast (strLength input)}
|
||||
||| A sorted set containing the positions of the start of each line
|
||||
line_starts : SortedMap (Index length) Nat
|
||||
||| The position of the next character to read in the input
|
||||
position : f (Index length)
|
||||
||| True if we have hit the end of input
|
||||
end_of_input : f Bool
|
||||
%name ParserInternal pi, pj, pk
|
||||
```
|
||||
|
||||
### ParserInternal Methods
|
||||
|
||||
Construct a `ParserInternal` from an input string. Will fail if the input is
|
||||
empty, because then we can't index it.
|
||||
|
||||
```idris
|
||||
export
|
||||
newInternal : (input : String) -> Maybe (ParserInternal Id)
|
||||
newInternal input =
|
||||
-- Check if we have at least one character in the input
|
||||
case refine0 0 {p = IsIndex (cast (strLength input))} of
|
||||
Nothing => Nothing
|
||||
Just (Element position _) => Just $
|
||||
MkInternal input
|
||||
(cast (strLength input))
|
||||
(mkStarts' input (MkIndex position))
|
||||
(MkIndex position)
|
||||
False
|
||||
where
|
||||
partial
|
||||
mkStarts :
|
||||
(str : String) -> (acc : List (Index (cast (strLength str)), Nat))
|
||||
-> (idx : Index (cast (strLength str))) -> (count : Nat) -> (next : Bool)
|
||||
-> List (Index (cast (strLength str)), Nat)
|
||||
mkStarts str acc idx count True =
|
||||
mkStarts str ((idx, count) :: acc) idx (S count) False
|
||||
mkStarts str acc idx count False =
|
||||
case refine0 (idx.index + 1) {p = IsIndex (cast (strLength str))} of
|
||||
Nothing => acc
|
||||
Just (Element next _) =>
|
||||
if strIndex str (cast idx.index) == '\n'
|
||||
then mkStarts str acc (MkIndex next) count True
|
||||
else mkStarts str acc (MkIndex next) count False
|
||||
mkStarts' : (str : String) -> (start : Index (cast (strLength str)))
|
||||
-> SortedMap (Index (cast (strLength str))) Nat
|
||||
mkStarts' str start =
|
||||
let
|
||||
pairs = assert_total $
|
||||
mkStarts str [] start 0 True
|
||||
in fromList pairs
|
||||
```
|
||||
|
||||
Get the current line and column number
|
||||
|
||||
```idris
|
||||
||| Returns the current position of the parser cursor in, zero indexed, (line,
|
||||
||| column) form
|
||||
export
|
||||
positionPair : (pi : ParserInternal Id) -> (Nat, Nat)
|
||||
positionPair pi =
|
||||
case lookup pi.position pi.line_starts of
|
||||
Just line => (line, 0)
|
||||
Nothing =>
|
||||
case lookupBetween pi.position pi.line_starts of
|
||||
-- There will always be at least one line start, and we would have hit
|
||||
-- the previous case if we were at the start of the first one, so if
|
||||
-- there isn't a before, we can return a nonsense value safely
|
||||
(Nothing, _) => (0, 0)
|
||||
(Just (start, linum), after) =>
|
||||
-- Our index will always be after the start of the line, for previously
|
||||
-- mentioned reasons, so this cast is safe
|
||||
let col = cast {to = Nat} $ pi.position.index - start.index
|
||||
in (linum, col)
|
||||
```
|
||||
|
||||
### More Barbie Functionality
|
||||
|
||||
Provide the barbie analogs of `map` and `traverse` for our `ParserInternal`
|
||||
type, allowing us to change the type the values in a `ParserInternal` by mapping
|
||||
over those values.
|
||||
|
||||
```idris
|
||||
export
|
||||
bmap : ({0 a : Type} -> f a -> g a) -> ParserInternal f -> ParserInternal g
|
||||
-- bmap f = bmap_ (\_ => f)
|
||||
bmap fun (MkInternal input length line_starts position end_of_input) =
|
||||
let position' = fun position
|
||||
end_of_input' = fun end_of_input
|
||||
in MkInternal input length line_starts position' end_of_input'
|
||||
|
||||
export
|
||||
btraverse : Applicative e => ({0 a : Type} -> f a -> e (g a))
|
||||
-> ParserInternal f -> e (ParserInternal g)
|
||||
btraverse fun (MkInternal input length line_starts position end_of_input) =
|
||||
let pures = (MkInternal input length line_starts)
|
||||
in [| pures (fun position) (fun end_of_input)|]
|
||||
```
|
||||
|
||||
## Three way result
|
||||
|
||||
```idris
|
||||
||| Three way result returned from attempting to parse a single char
|
||||
public export
|
||||
data ParseCharResult : Type -> Type where
|
||||
GotChar : (char : Char) -> ParseCharResult e
|
||||
GotError : (err : e) -> ParseCharResult e
|
||||
EndOfInput : ParseCharResult e
|
||||
```
|
||||
|
||||
## The Effect Type
|
||||
|
||||
```idris
|
||||
export
|
||||
data ParserState : Type -> Type where
|
||||
Save : ParserState (ParserInternal Id)
|
||||
Load : (ParserInternal Id) -> ParserState ()
|
||||
-- TODO: Maybe add a ParseString that parses a string of characters as a
|
||||
-- string using efficent slicing?
|
||||
ParseChar : (predicate : Char -> Bool) -> (err : Char -> e)
|
||||
-> ParserState (ParseCharResult e)
|
||||
ParseEoF : ParserState Bool
|
||||
```
|
||||
|
||||
### Actions
|
||||
|
||||
```idris
|
||||
||| Return the current state, for potential later reloading
|
||||
export
|
||||
save : Has ParserState fs => Eff fs (ParserInternal Id)
|
||||
save = send Save
|
||||
|
||||
||| Reset to the provided state
|
||||
export
|
||||
load : Has ParserState fs => ParserInternal Id -> Eff fs ()
|
||||
load pi = send $ Load pi
|
||||
|
||||
||| Attempt to parse a char, checking to see if it complies with the supplied
|
||||
||| predicate, updates the state if parsing succeeds, does not alter it in an
|
||||
||| error condition.
|
||||
export
|
||||
parseChar : Has ParserState fs => (predicate : Char -> Bool) -> (err : Char -> e)
|
||||
-> Eff fs (ParseCharResult e)
|
||||
parseChar predicate err = send $ ParseChar predicate err
|
||||
|
||||
||| "Parse" the end of input, returning `True` if the parser state is currently
|
||||
||| at the end of the input
|
||||
export
|
||||
parseEoF : Has ParserState fs => Eff fs Bool
|
||||
parseEoF = send ParseEoF
|
||||
```
|
||||
|
||||
## Handling a ParserState
|
||||
|
||||
### IO Context
|
||||
|
||||
```idris
|
||||
export
|
||||
handleParserStateIO : HasIO io =>
|
||||
IORef (ParserInternal IORef) -> ParserState t -> io t
|
||||
handleParserStateIO pi Save = do
|
||||
pi <- readIORef pi
|
||||
btraverse readIORef pi
|
||||
handleParserStateIO pi (Load pj) = do
|
||||
pj <- btraverse newIORef pj
|
||||
writeIORef pi pj
|
||||
handleParserStateIO pi (ParseChar predicate err) = do
|
||||
pi <- readIORef pi
|
||||
False <- readIORef pi.end_of_input
|
||||
| _ => pure $ EndOfInput
|
||||
position <- readIORef pi.position
|
||||
let char = assert_total $ strIndex pi.input (cast position.index)
|
||||
True <- pure $ predicate char
|
||||
| _ => pure . GotError $ err char
|
||||
-- Our refinement type on the position forces us to check that the length is
|
||||
-- in bounds after incrementing it, if its out of bounds, set the end_of_input
|
||||
-- flag
|
||||
case refine0 (position.index + 1) {p = IsIndex pi.length} of
|
||||
Nothing => do
|
||||
writeIORef pi.end_of_input True
|
||||
pure $ GotChar char
|
||||
Just (Element next _) => do
|
||||
writeIORef pi.position $ MkIndex next
|
||||
pure $ GotChar char
|
||||
handleParserStateIO pi ParseEoF = do
|
||||
pi <- readIORef pi
|
||||
readIORef pi.end_of_input
|
||||
|
||||
export
|
||||
newInternalIO : HasIO io => String -> io $ Maybe (IORef (ParserInternal IORef))
|
||||
newInternalIO str = do
|
||||
Just internal <- pure $ newInternal str
|
||||
| _ => pure Nothing
|
||||
internal <- btraverse newIORef internal
|
||||
map Just $ newIORef internal
|
||||
```
|
||||
|
||||
## Footnotes
|
||||
|
||||
[^1]: https://github.com/stefan-hoeck/idris2-barbies
|
Loading…
Add table
Reference in a new issue