Basic Parsing Interface

This commit is contained in:
Nathan McCarty 2025-01-24 01:45:02 -05:00
parent 24285db686
commit 5a2ffc1058
5 changed files with 723 additions and 0 deletions

View file

@ -59,6 +59,18 @@ solution.
Provider wrappers over the standard library `IOArray` type to make them more Provider wrappers over the standard library `IOArray` type to make them more
ergonomic to use. ergonomic to use.
- [Parser](src/Parser.md)
Effectful parser mini-library
- [Interface](src/Parser/Interface.md)
Effectful parser API
- [ParserState](src/Parser/ParserState.md)
Internal state of a parser
## Index of years and days ## Index of years and days
- 2015 - 2015

View file

@ -30,6 +30,8 @@ modules = Runner
, Util.Eff , Util.Eff
, Util.Digits , Util.Digits
, Array , Array
, Parser
, Parser.Interface
-- main file (i.e. file to load at REPL) -- main file (i.e. file to load at REPL)
main = Main main = Main

8
src/Parser.md Normal file
View file

@ -0,0 +1,8 @@
# Parsing Utilties
```idris
module Parser
import public Parser.Interface as Parser
import public Parser.ParserState as Parser
```

332
src/Parser/Interface.md Normal file
View file

@ -0,0 +1,332 @@
# The interface of a `Parser`
```idris
module Parser.Interface
import public Data.List1
import public Parser.ParserState
import public Control.Eff
export infixr 4 >|
export infixr 5 >&
```
## Parser Errors
Combine the parser state at time of error with an error message.
```idris
public export
data ParseError : Type where
-- TODO: Rename this constructor
MkParseError : (state : ParserInternal Id) -> (message : String) -> ParseError
BeforeParse : (message : String) -> ParseError
NestedErrors : (state : ParserInternal Id) -> (message : String)
-> (rest : List ParseError) -> ParseError
```
```idris hide
export
Show ParseError where
show (MkParseError state message) =
let (line, col) = positionPair state
(line, col) = (show line, show col)
position = show state.position.index
in "Error at line \{line}, column \{col} (\{position}): \{message}"
show (BeforeParse message) =
"Error before parsing: \{message}"
show (NestedErrors state message rest) =
let rest = assert_total $joinBy "\n" . map ((" " ++) . show) $ rest
(line, col) = positionPair state
(line, col) = (show line, show col)
position = show state.position.index
first = "Error at line \{line}, column \{col} (\{position}): \{message}"
in "\{first}\n\{rest}"
```
## Type Alias
```idris
public export
Parser : Type -> Type
Parser a = Eff [ParserState, Except ParseError] a
```
## Error Generation
Provide a few effectful actions to generate an error from an error message, and
either return it or throw it.
```idris
export
parseError : Has ParserState fs => (message : String) -> Eff fs ParseError
parseError message = do
state <- save
pure $ MkParseError state message
export
throwParseError : Has ParserState fs => Has (Except ParseError) fs =>
(message : String) -> Eff fs a
throwParseError message = do
err <- parseError message
throw err
export
guardMaybe : Has ParserState fs => Has (Except ParseError) fs =>
(message : String) -> Eff fs (Maybe a) -> Eff fs a
guardMaybe message x = do
Just x <- x
| _ => throwParseError message
pure x
export
replaceError : (message : String) -> Parser (a -> Parser b)
replaceError message = do
state <- save
pure (\_ => throw $ MkParseError state message)
```
## Running a parser
We will use the phrasing "rundown" to refer to running all the effects in the
parser effect stack except `ParserState`, which is left in the effect stack to
facilitate handling in the context of another monad or effect stack, since it
benefits from mutability.
Rundown a parser, accepting the first returning parse, which may be failing or
succeding, and automatically generating a "no valid parses" message in the event
no paths in the `Choice` effect produce a returning parse.
```idris
export
rundownFirst : (f : Parser a) -> Eff [ParserState] (Either ParseError a)
rundownFirst f =
runExcept f
```
Provide wrappers for `rundownFirst` for evaluating it in various contexts.
```idris
export
runFirstIO : (f : Parser a) -> String -> IO (Either ParseError a)
runFirstIO f str = do
Just state <- newInternalIO str
| _ => pure . Left $ BeforeParse "Empty input"
runEff (rundownFirst f) [handleParserStateIO state]
export
runFirstIODebug : (f : Parser a) -> String -> IO (Either ParseError a)
runFirstIODebug f str = do
Just state <- newInternalIO str
| _ => pure . Left $ BeforeParse "Empty input"
runEff (rundownFirst f) [handleParserStateIODebug state]
export
runFirst : (f : Parser a) -> String -> Eff fs (Either ParseError a)
runFirst f str = do
Just state <- pure $ newInternal str
| _ => pure . Left $ BeforeParse "Empty input"
(result, _) <- lift . runParserState state . rundownFirst $ f
pure result
export
runFirst' : (f : Parser a) -> String -> Either ParseError a
runFirst' f str = extract $ runFirst f str {fs = []}
```
## Utility functionality
### Parser combinators
Try to run a computation in the context of the `Parser` effect stack, if it
fails (via `Except`), reset the state and resort to the supplied callback
Also supply a version specialized to ignore the error value, returning `Just a`
if the parse succeeds, and `Nothing` if it fails.
```idris
export
try : (f : Parser a) -> (err : ParseError -> Parser a) -> Parser a
try f err = do
starting_state <- save
result <- lift . runExcept $ f
case result of
Left error => do
load starting_state
err error
Right result => pure result
export
tryMaybe : (f : Parser a) -> Parser (Maybe a)
tryMaybe f = try (map Just f) (\_ => pure Nothing)
export
tryEither : (f : Parser a) -> Parser (Either ParseError a)
tryEither f = try (map Right f) (pure . Left)
```
Attempt to parse one of the given input parsers, in the provided order, invoking
the provided error action on failure.
The state will not be modified when an input parser fails
```idris
export
oneOfE : (err : String) -> List (Parser a) -> Parser a
oneOfE err xs = do
start <- save
oneOfE' err start [] xs
where
oneOfE' : (err : String) -> (start : ParserInternal Id)
-> (errs : List ParseError) -> List (Parser a) -> Parser a
oneOfE' err start errs [] = do
throw $ NestedErrors start err (reverse errs)
oneOfE' err start errs (x :: xs) = do
x <- tryEither x
case x of
Right val => pure val
Left error => oneOfE' err start (error :: errs) xs
```
Attempt to parse 0+ of an item
```idris
export
many : (f : Parser a) -> Parser (List a)
many f = do
Just next <- tryMaybe f
| _ => pure []
map (next ::) $ many f
```
Attempt to parse 1+ of an item, invoking the supplied error action on failure
```idris
export
atLeastOne : (err : ParseError -> Parser (List1 a)) -> (f : Parser a)
-> Parser (List1 a)
atLeastOne err f = do
Right next <- tryEither f
| Left e => err e
map (next :::) $ many f
```
Lift a parser producing a `List` or `List1` of `Char` into a parser producing a
`String`
```idris
-- TODO: Rename these
export
liftString : Parser (List Char) -> Parser String
liftString x = do
xs <- x
pure $ pack xs
export
liftString' : Parser (List1 Char) -> Parser String
liftString' x = liftString $ map forget x
```
Attempt to parse a specified character
```idris
export
charExact : Char -> Parser Char
charExact c = do
result <- charExact' c
case result of
GotChar char => pure char
GotError err => throwParseError "Got \{show err} Expected \{show c}"
EndOfInput => throwParseError "End of input"
```
Attempt to parse one of a list of chars
```idris
export
theseChars : List Char -> Parser Char
theseChars cs = do
pnote "Parsing one of: \{show cs}"
result <- charPredicate (\x => any (== x) cs)
case result of
GotChar char => pure char
GotError err => throwParseError "Got \{show err} Expected one of \{show cs}"
EndOfInput => throwParseError "End of input"
```
Attempt to parse an exact string
```idris
export
exactString : String -> Parser String
exactString str with (asList str)
exactString "" | [] = do
pnote "Parsing the empty string"
pure ""
exactString input@(strCons c str) | (c :: x) = do
pnote "Parsing exact string \{show input}"
GotChar next <- charPredicate (== c)
| GotError err => throwParseError "Got \{show err} expected \{show c}"
| EndOfInput => throwParseError "End of input"
rest <- exactString str | x
pure input
```
Wrap a parser in delimiter characters, discarding the value of the delimiters
```idris
export
delimited : (before, after : Char) -> Parser a -> Parser a
delimited before after x = do
pnote "Parsing delimited by \{show before} \{show after}"
starting_state <- save
_ <- charExact before
Right val <- tryEither x
| Left err => do
load starting_state
throw err
Just _ <- tryMaybe $ charExact after
| _ => do
load starting_state
throw $ MkParseError starting_state "Unmatched delimiter \{show before}"
pure val
```
Consume any number of characters of the provided character class and discard the
result. Also a version for doing so on both sides of a provided parser
```idris
export
nom : Parser Char -> Parser ()
nom x = do
pnote "Nomming"
_ <- many x
pure ()
export
surround : (around : Parser Char) -> (item : Parser a) -> Parser a
surround around item = do
pnote "Surrounding"
nom around
val <- item
nom around
pure val
```
### Composition of boolean functions
```idris
||| Return true if both of the predicates evaluate to true
public export
(>&) : (a : e -> Bool) -> (b : e -> Bool) -> (e -> Bool)
(>&) a b x = a x && b x
```
```idris
||| Return true if either of the predicates evaulates to true
public export
(>|) : (a : e -> Bool) -> (b : e -> Bool) -> (e -> Bool)
(>|) a b x = a x || b x
```

369
src/Parser/ParserState.md Normal file
View file

@ -0,0 +1,369 @@
# Parser State
An effectful description of the text a parser consumes
```idris
module Parser.ParserState
import public Data.String
import public Data.DPair
import public Data.Refined
import public Data.Refined.Int64
import public Data.SortedMap
import public Data.IORef
import Data.Primitives.Interpolation
import System.File
import public Control.Eff
```
## Barbie Basics
Barbies are types that can "change their clothes", in Idris, this manifests as a
type indexed by a type-level function that affects the types of the fields.
Since we know our usage here is going to be quite simple, and not even really
making use of dependently typed fun, we are going to implement all the barbie
functionality we need by hand, but if you feel like barbies might be a good fit
for your problem, or you simply want to learn more, please check out a library
like `barbies`[^1]
```idris
public export
Id : Type -> Type
Id x = x
```
## Internal State of a Parser
Type alias for our refined `Int64`s
```idris
public export
0 IsIndex : (length : Int64) -> Int64 -> Type
IsIndex length = From 0 && LessThan length
public export
record Index (length : Int64) where
constructor MkIndex
index : Int64
{auto 0 prf : IsIndex length index}
```
```idris hide
export
Eq (Index i) where
x == y = x.index == y.index
export
Ord (Index i) where
compare x y = compare x.index y.index
export
Show (Index i) where
show (MkIndex index) = show index
```
Stores the location we are currently at in the string, and metadata about it for
providing good error messages. Parsing an empty input isn't very interesting, so
we exclude inputs of length zero, since that will make other things easier.
```idris
||| State representing a parser's position in the text
public export
record ParserInternal (f : Type -> Type) where
constructor MkInternal
-- IDEA: Maybe go full barbie and have this be a field, so that we can, say,
-- read directly from a file instead of from an already loaded string using the
-- same parser
||| The input string
input : String
||| The length of the input string
length : Int64
{auto 0 len_prf : length = cast (strLength input)}
||| A sorted set containing the positions of the start of each line
line_starts : SortedMap (Index length) Nat
||| The position of the next character to read in the input
position : f (Index length)
||| True if we have hit the end of input
end_of_input : f Bool
%name ParserInternal pi, pj, pk
```
### ParserInternal Methods
Construct a `ParserInternal` from an input string. Will fail if the input is
empty, because then we can't index it.
```idris
export
newInternal : (input : String) -> Maybe (ParserInternal Id)
newInternal input =
-- Check if we have at least one character in the input
case refine0 0 {p = IsIndex (cast (strLength input))} of
Nothing => Nothing
Just (Element position _) => Just $
MkInternal input
(cast (strLength input))
(mkStarts' input (MkIndex position))
(MkIndex position)
False
where
partial
mkStarts :
(str : String) -> (acc : List (Index (cast (strLength str)), Nat))
-> (idx : Index (cast (strLength str))) -> (count : Nat) -> (next : Bool)
-> List (Index (cast (strLength str)), Nat)
mkStarts str acc idx count True =
mkStarts str ((idx, count) :: acc) idx (S count) False
mkStarts str acc idx count False =
case refine0 (idx.index + 1) {p = IsIndex (cast (strLength str))} of
Nothing => acc
Just (Element next _) =>
if strIndex str (cast idx.index) == '\n'
then mkStarts str acc (MkIndex next) count True
else mkStarts str acc (MkIndex next) count False
mkStarts' : (str : String) -> (start : Index (cast (strLength str)))
-> SortedMap (Index (cast (strLength str))) Nat
mkStarts' str start =
let
pairs = assert_total $
mkStarts str [] start 0 True
in fromList pairs
```
Get the current line and column number
```idris
||| Returns the current position of the parser cursor in, zero indexed, (line,
||| column) form
export
positionPair : (pi : ParserInternal Id) -> (Nat, Nat)
positionPair pi =
case lookup pi.position pi.line_starts of
Just line => (line, 0)
Nothing =>
case lookupBetween pi.position pi.line_starts of
-- There will always be at least one line start, and we would have hit
-- the previous case if we were at the start of the first one, so if
-- there isn't a before, we can return a nonsense value safely
(Nothing, _) => (0, 0)
(Just (start, linum), after) =>
-- Our index will always be after the start of the line, for previously
-- mentioned reasons, so this cast is safe
let col = cast {to = Nat} $ pi.position.index - start.index
in (linum, col)
```
```idris hide
export
Show (ParserInternal Id) where
show pi =
let (line, col) = positionPair pi
current = assert_total $ strIndex pi.input (cast pi.position.index)
pos = pi.position.index
eof = show pi.end_of_input
in "Position: \{pos}(\{line}, \{col}}) Value: \{show current} EoF: \{eof}"
```
### More Barbie Functionality
Provide the barbie analogs of `map` and `traverse` for our `ParserInternal`
type, allowing us to change the type the values in a `ParserInternal` by mapping
over those values.
```idris
export
bmap : ({0 a : Type} -> f a -> g a) -> ParserInternal f -> ParserInternal g
-- bmap f = bmap_ (\_ => f)
bmap fun (MkInternal input length line_starts position end_of_input) =
let position' = fun position
end_of_input' = fun end_of_input
in MkInternal input length line_starts position' end_of_input'
export
btraverse : Applicative e => ({0 a : Type} -> f a -> e (g a))
-> ParserInternal f -> e (ParserInternal g)
btraverse fun (MkInternal input length line_starts position end_of_input) =
let pures = (MkInternal input length line_starts)
in [| pures (fun position) (fun end_of_input)|]
```
## Three way result
```idris
||| Three way result returned from attempting to parse a single char
public export
data ParseCharResult : Type where
GotChar : (char : Char) -> ParseCharResult
GotError : (err : Char) -> ParseCharResult
EndOfInput : ParseCharResult
```
## The Effect Type
```idris
export
data ParserState : Type -> Type where
Save : ParserState (ParserInternal Id)
Load : (ParserInternal Id) -> ParserState ()
-- TODO: Maybe add a ParseString that parses a string of characters as a
-- string using efficent slicing?
ParseChar : (predicate : Char -> Bool) -> ParserState ParseCharResult
ParseExactChar : (char : Char) -> ParserState ParseCharResult
ParseEoF : ParserState Bool
Note : Lazy String -> ParserState ()
```
```idris hide
Show (ParserState t) where
show Save = "Saving state"
show (Load pi) = "Loading state \{show pi}"
show (ParseChar predicate) = "Parsing char"
show (ParseExactChar char) = "Parsing char \{show char}"
show ParseEoF = "Parsing EoF"
show (Note _) = "Note"
```
### Actions
```idris
||| Return the current state, for potential later reloading
export
save : Has ParserState fs => Eff fs (ParserInternal Id)
save = send Save
||| Reset to the provided state
export
load : Has ParserState fs => ParserInternal Id -> Eff fs ()
load pi = send $ Load pi
||| Attempt to parse a char, checking to see if it complies with the supplied
||| predicate, updates the state if parsing succeeds, does not alter it in an
||| error condition.
export
charPredicate : Has ParserState fs => (predicate : Char -> Bool)
-> Eff fs ParseCharResult
charPredicate predicate = send $ ParseChar predicate
||| Parse a char by exact value
export
charExact' : Has ParserState fs => (chr : Char) -> Eff fs ParseCharResult
charExact' chr = send $ ParseExactChar chr
||| "Parse" the end of input, returning `True` if the parser state is currently
||| at the end of the input
export
parseEoF : Has ParserState fs => Eff fs Bool
parseEoF = send ParseEoF
||| Make a note to be output when running with a debug handler
export
pnote : Has ParserState fs => Lazy String -> Eff fs ()
pnote x = send $ Note x
```
## Handling a ParserState
### IO Context
```idris
export
handleParserStateIO : HasIO io =>
IORef (ParserInternal IORef) -> ParserState t -> io t
handleParserStateIO pi Save = do
pi <- readIORef pi
btraverse readIORef pi
handleParserStateIO pi (Load pj) = do
pj <- btraverse newIORef pj
writeIORef pi pj
handleParserStateIO pi (ParseChar predicate) = do
pi <- readIORef pi
False <- readIORef pi.end_of_input
| _ => pure EndOfInput
position <- readIORef pi.position
let char = assert_total $ strIndex pi.input (cast position.index)
True <- pure $ predicate char
| _ => pure $ GotError char
-- Our refinement type on the position forces us to check that the length is
-- in bounds after incrementing it, if its out of bounds, set the end_of_input
-- flag
case refine0 (position.index + 1) {p = IsIndex pi.length} of
Nothing => do
writeIORef pi.end_of_input True
pure $ GotChar char
Just (Element next _) => do
writeIORef pi.position $ MkIndex next
pure $ GotChar char
handleParserStateIO pi (ParseExactChar chr) = do
-- TODO: do this directly?
handleParserStateIO pi (ParseChar (== chr))
handleParserStateIO pi ParseEoF = do
pi <- readIORef pi
readIORef pi.end_of_input
-- We ignore notes in non-debug mode
handleParserStateIO pi (Note _) = pure ()
export
newInternalIO : HasIO io => String -> io $ Maybe (IORef (ParserInternal IORef))
newInternalIO str = do
Just internal <- pure $ newInternal str
| _ => pure Nothing
internal <- btraverse newIORef internal
map Just $ newIORef internal
```
Wrapper with debugging output
```idris
export
handleParserStateIODebug : HasIO io =>
IORef (ParserInternal IORef) -> ParserState t -> io t
handleParserStateIODebug x (Note note) = do
state <- readIORef x
state <- btraverse readIORef state
_ <- fPutStrLn stderr "Note \{note} -> \{show state}"
pure ()
handleParserStateIODebug x y = do
state <- readIORef x
state <- btraverse readIORef state
_ <- fPutStrLn stderr "\{show y} -> \{show state}"
handleParserStateIO x y
```
### State context
```idris
unPS : ParserInternal Id -> ParserState a -> (a, ParserInternal Id)
unPS pi Save = (pi, pi)
unPS pi (Load pj) = ((), pi)
unPS pi (ParseChar predicate) =
if pi.end_of_input
then (EndOfInput, pi)
else let
char = assert_total $ strIndex pi.input (cast pi.position.index)
in if predicate char
then case refine0 (pi.position.index + 1) {p = IsIndex pi.length} of
Nothing =>
(GotChar char, {end_of_input := True} pi)
Just (Element next _) =>
(GotChar char, {position := MkIndex next} pi)
else (GotError char, pi)
unPS pi (ParseExactChar chr) = unPS pi (ParseChar (== chr))
unPS pi ParseEoF = (pi.end_of_input, pi)
unPS pi (Note _) = ((), pi)
export
runParserState : Has ParserState fs =>
(s : ParserInternal Id) -> Eff fs t
-> Eff (fs - ParserState) (t, ParserInternal Id)
runParserState s =
handleRelayS s (\x, y => pure (y, x)) $ \s2,ps,f =>
let (a, st) = unPS s2 ps
in f st a
```
## Footnotes
[^1]: https://github.com/stefan-hoeck/idris2-barbies