369 lines
11 KiB
Markdown
369 lines
11 KiB
Markdown
# Parser State
|
|
|
|
An effectful description of the text a parser consumes
|
|
|
|
```idris
|
|
module Parser.ParserState
|
|
|
|
import public Data.String
|
|
import public Data.DPair
|
|
import public Data.Refined
|
|
import public Data.Refined.Int64
|
|
import public Data.SortedMap
|
|
import public Data.IORef
|
|
|
|
import Data.Primitives.Interpolation
|
|
import System.File
|
|
|
|
import public Control.Eff
|
|
```
|
|
|
|
## Barbie Basics
|
|
|
|
Barbies are types that can "change their clothes", in Idris, this manifests as a
|
|
type indexed by a type-level function that affects the types of the fields.
|
|
|
|
Since we know our usage here is going to be quite simple, and not even really
|
|
making use of dependently typed fun, we are going to implement all the barbie
|
|
functionality we need by hand, but if you feel like barbies might be a good fit
|
|
for your problem, or you simply want to learn more, please check out a library
|
|
like `barbies`[^1]
|
|
|
|
```idris
|
|
public export
|
|
Id : Type -> Type
|
|
Id x = x
|
|
```
|
|
|
|
## Internal State of a Parser
|
|
|
|
Type alias for our refined `Int64`s
|
|
|
|
```idris
|
|
public export
|
|
0 IsIndex : (length : Int64) -> Int64 -> Type
|
|
IsIndex length = From 0 && LessThan length
|
|
|
|
public export
|
|
record Index (length : Int64) where
|
|
constructor MkIndex
|
|
index : Int64
|
|
{auto 0 prf : IsIndex length index}
|
|
```
|
|
|
|
```idris hide
|
|
export
|
|
Eq (Index i) where
|
|
x == y = x.index == y.index
|
|
|
|
export
|
|
Ord (Index i) where
|
|
compare x y = compare x.index y.index
|
|
|
|
export
|
|
Show (Index i) where
|
|
show (MkIndex index) = show index
|
|
```
|
|
|
|
Stores the location we are currently at in the string, and metadata about it for
|
|
providing good error messages. Parsing an empty input isn't very interesting, so
|
|
we exclude inputs of length zero, since that will make other things easier.
|
|
|
|
```idris
|
|
||| State representing a parser's position in the text
|
|
public export
|
|
record ParserInternal (f : Type -> Type) where
|
|
constructor MkInternal
|
|
-- IDEA: Maybe go full barbie and have this be a field, so that we can, say,
|
|
-- read directly from a file instead of from an already loaded string using the
|
|
-- same parser
|
|
||| The input string
|
|
input : String
|
|
||| The length of the input string
|
|
length : Int64
|
|
{auto 0 len_prf : length = cast (strLength input)}
|
|
||| A sorted set containing the positions of the start of each line
|
|
line_starts : SortedMap (Index length) Nat
|
|
||| The position of the next character to read in the input
|
|
position : f (Index length)
|
|
||| True if we have hit the end of input
|
|
end_of_input : f Bool
|
|
%name ParserInternal pi, pj, pk
|
|
```
|
|
|
|
### ParserInternal Methods
|
|
|
|
Construct a `ParserInternal` from an input string. Will fail if the input is
|
|
empty, because then we can't index it.
|
|
|
|
```idris
|
|
export
|
|
newInternal : (input : String) -> Maybe (ParserInternal Id)
|
|
newInternal input =
|
|
-- Check if we have at least one character in the input
|
|
case refine0 0 {p = IsIndex (cast (strLength input))} of
|
|
Nothing => Nothing
|
|
Just (Element position _) => Just $
|
|
MkInternal input
|
|
(cast (strLength input))
|
|
(mkStarts' input (MkIndex position))
|
|
(MkIndex position)
|
|
False
|
|
where
|
|
partial
|
|
mkStarts :
|
|
(str : String) -> (acc : List (Index (cast (strLength str)), Nat))
|
|
-> (idx : Index (cast (strLength str))) -> (count : Nat) -> (next : Bool)
|
|
-> List (Index (cast (strLength str)), Nat)
|
|
mkStarts str acc idx count True =
|
|
mkStarts str ((idx, count) :: acc) idx (S count) False
|
|
mkStarts str acc idx count False =
|
|
case refine0 (idx.index + 1) {p = IsIndex (cast (strLength str))} of
|
|
Nothing => acc
|
|
Just (Element next _) =>
|
|
if strIndex str (cast idx.index) == '\n'
|
|
then mkStarts str acc (MkIndex next) count True
|
|
else mkStarts str acc (MkIndex next) count False
|
|
mkStarts' : (str : String) -> (start : Index (cast (strLength str)))
|
|
-> SortedMap (Index (cast (strLength str))) Nat
|
|
mkStarts' str start =
|
|
let
|
|
pairs = assert_total $
|
|
mkStarts str [] start 0 True
|
|
in fromList pairs
|
|
```
|
|
|
|
Get the current line and column number
|
|
|
|
```idris
|
|
||| Returns the current position of the parser cursor in, zero indexed, (line,
|
|
||| column) form
|
|
export
|
|
positionPair : (pi : ParserInternal Id) -> (Nat, Nat)
|
|
positionPair pi =
|
|
case lookup pi.position pi.line_starts of
|
|
Just line => (line, 0)
|
|
Nothing =>
|
|
case lookupBetween pi.position pi.line_starts of
|
|
-- There will always be at least one line start, and we would have hit
|
|
-- the previous case if we were at the start of the first one, so if
|
|
-- there isn't a before, we can return a nonsense value safely
|
|
(Nothing, _) => (0, 0)
|
|
(Just (start, linum), after) =>
|
|
-- Our index will always be after the start of the line, for previously
|
|
-- mentioned reasons, so this cast is safe
|
|
let col = cast {to = Nat} $ pi.position.index - start.index
|
|
in (linum, col)
|
|
```
|
|
|
|
```idris hide
|
|
export
|
|
Show (ParserInternal Id) where
|
|
show pi =
|
|
let (line, col) = positionPair pi
|
|
current = assert_total $ strIndex pi.input (cast pi.position.index)
|
|
pos = pi.position.index
|
|
eof = show pi.end_of_input
|
|
in "Position: \{pos}(\{line}, \{col}}) Value: \{show current} EoF: \{eof}"
|
|
```
|
|
|
|
### More Barbie Functionality
|
|
|
|
Provide the barbie analogs of `map` and `traverse` for our `ParserInternal`
|
|
type, allowing us to change the type the values in a `ParserInternal` by mapping
|
|
over those values.
|
|
|
|
```idris
|
|
export
|
|
bmap : ({0 a : Type} -> f a -> g a) -> ParserInternal f -> ParserInternal g
|
|
-- bmap f = bmap_ (\_ => f)
|
|
bmap fun (MkInternal input length line_starts position end_of_input) =
|
|
let position' = fun position
|
|
end_of_input' = fun end_of_input
|
|
in MkInternal input length line_starts position' end_of_input'
|
|
|
|
export
|
|
btraverse : Applicative e => ({0 a : Type} -> f a -> e (g a))
|
|
-> ParserInternal f -> e (ParserInternal g)
|
|
btraverse fun (MkInternal input length line_starts position end_of_input) =
|
|
let pures = (MkInternal input length line_starts)
|
|
in [| pures (fun position) (fun end_of_input)|]
|
|
```
|
|
|
|
## Three way result
|
|
|
|
```idris
|
|
||| Three way result returned from attempting to parse a single char
|
|
public export
|
|
data ParseCharResult : Type where
|
|
GotChar : (char : Char) -> ParseCharResult
|
|
GotError : (err : Char) -> ParseCharResult
|
|
EndOfInput : ParseCharResult
|
|
```
|
|
|
|
## The Effect Type
|
|
|
|
```idris
|
|
export
|
|
data ParserState : Type -> Type where
|
|
Save : ParserState (ParserInternal Id)
|
|
Load : (ParserInternal Id) -> ParserState ()
|
|
-- TODO: Maybe add a ParseString that parses a string of characters as a
|
|
-- string using efficent slicing?
|
|
ParseChar : (predicate : Char -> Bool) -> ParserState ParseCharResult
|
|
ParseExactChar : (char : Char) -> ParserState ParseCharResult
|
|
ParseEoF : ParserState Bool
|
|
Note : Lazy String -> ParserState ()
|
|
```
|
|
|
|
```idris hide
|
|
Show (ParserState t) where
|
|
show Save = "Saving state"
|
|
show (Load pi) = "Loading state \{show pi}"
|
|
show (ParseChar predicate) = "Parsing char"
|
|
show (ParseExactChar char) = "Parsing char \{show char}"
|
|
show ParseEoF = "Parsing EoF"
|
|
show (Note _) = "Note"
|
|
```
|
|
|
|
### Actions
|
|
|
|
```idris
|
|
||| Return the current state, for potential later reloading
|
|
export
|
|
save : Has ParserState fs => Eff fs (ParserInternal Id)
|
|
save = send Save
|
|
|
|
||| Reset to the provided state
|
|
export
|
|
load : Has ParserState fs => ParserInternal Id -> Eff fs ()
|
|
load pi = send $ Load pi
|
|
|
|
||| Attempt to parse a char, checking to see if it complies with the supplied
|
|
||| predicate, updates the state if parsing succeeds, does not alter it in an
|
|
||| error condition.
|
|
export
|
|
charPredicate : Has ParserState fs => (predicate : Char -> Bool)
|
|
-> Eff fs ParseCharResult
|
|
charPredicate predicate = send $ ParseChar predicate
|
|
|
|
||| Parse a char by exact value
|
|
export
|
|
charExact' : Has ParserState fs => (chr : Char) -> Eff fs ParseCharResult
|
|
charExact' chr = send $ ParseExactChar chr
|
|
|
|
||| "Parse" the end of input, returning `True` if the parser state is currently
|
|
||| at the end of the input
|
|
export
|
|
parseEoF : Has ParserState fs => Eff fs Bool
|
|
parseEoF = send ParseEoF
|
|
|
|
||| Make a note to be output when running with a debug handler
|
|
export
|
|
pnote : Has ParserState fs => Lazy String -> Eff fs ()
|
|
pnote x = send $ Note x
|
|
```
|
|
|
|
## Handling a ParserState
|
|
|
|
### IO Context
|
|
|
|
```idris
|
|
export
|
|
handleParserStateIO : HasIO io =>
|
|
IORef (ParserInternal IORef) -> ParserState t -> io t
|
|
handleParserStateIO pi Save = do
|
|
pi <- readIORef pi
|
|
btraverse readIORef pi
|
|
handleParserStateIO pi (Load pj) = do
|
|
pj <- btraverse newIORef pj
|
|
writeIORef pi pj
|
|
handleParserStateIO pi (ParseChar predicate) = do
|
|
pi <- readIORef pi
|
|
False <- readIORef pi.end_of_input
|
|
| _ => pure EndOfInput
|
|
position <- readIORef pi.position
|
|
let char = assert_total $ strIndex pi.input (cast position.index)
|
|
True <- pure $ predicate char
|
|
| _ => pure $ GotError char
|
|
-- Our refinement type on the position forces us to check that the length is
|
|
-- in bounds after incrementing it, if its out of bounds, set the end_of_input
|
|
-- flag
|
|
case refine0 (position.index + 1) {p = IsIndex pi.length} of
|
|
Nothing => do
|
|
writeIORef pi.end_of_input True
|
|
pure $ GotChar char
|
|
Just (Element next _) => do
|
|
writeIORef pi.position $ MkIndex next
|
|
pure $ GotChar char
|
|
handleParserStateIO pi (ParseExactChar chr) = do
|
|
-- TODO: do this directly?
|
|
handleParserStateIO pi (ParseChar (== chr))
|
|
handleParserStateIO pi ParseEoF = do
|
|
pi <- readIORef pi
|
|
readIORef pi.end_of_input
|
|
-- We ignore notes in non-debug mode
|
|
handleParserStateIO pi (Note _) = pure ()
|
|
|
|
export
|
|
newInternalIO : HasIO io => String -> io $ Maybe (IORef (ParserInternal IORef))
|
|
newInternalIO str = do
|
|
Just internal <- pure $ newInternal str
|
|
| _ => pure Nothing
|
|
internal <- btraverse newIORef internal
|
|
map Just $ newIORef internal
|
|
```
|
|
|
|
Wrapper with debugging output
|
|
|
|
```idris
|
|
export
|
|
handleParserStateIODebug : HasIO io =>
|
|
IORef (ParserInternal IORef) -> ParserState t -> io t
|
|
handleParserStateIODebug x (Note note) = do
|
|
state <- readIORef x
|
|
state <- btraverse readIORef state
|
|
_ <- fPutStrLn stderr "Note \{note} -> \{show state}"
|
|
pure ()
|
|
handleParserStateIODebug x y = do
|
|
state <- readIORef x
|
|
state <- btraverse readIORef state
|
|
_ <- fPutStrLn stderr "\{show y} -> \{show state}"
|
|
handleParserStateIO x y
|
|
```
|
|
|
|
### State context
|
|
|
|
```idris
|
|
unPS : ParserInternal Id -> ParserState a -> (a, ParserInternal Id)
|
|
unPS pi Save = (pi, pi)
|
|
unPS pi (Load pj) = ((), pi)
|
|
unPS pi (ParseChar predicate) =
|
|
if pi.end_of_input
|
|
then (EndOfInput, pi)
|
|
else let
|
|
char = assert_total $ strIndex pi.input (cast pi.position.index)
|
|
in if predicate char
|
|
then case refine0 (pi.position.index + 1) {p = IsIndex pi.length} of
|
|
Nothing =>
|
|
(GotChar char, {end_of_input := True} pi)
|
|
Just (Element next _) =>
|
|
(GotChar char, {position := MkIndex next} pi)
|
|
else (GotError char, pi)
|
|
unPS pi (ParseExactChar chr) = unPS pi (ParseChar (== chr))
|
|
unPS pi ParseEoF = (pi.end_of_input, pi)
|
|
unPS pi (Note _) = ((), pi)
|
|
|
|
export
|
|
runParserState : Has ParserState fs =>
|
|
(s : ParserInternal Id) -> Eff fs t
|
|
-> Eff (fs - ParserState) (t, ParserInternal Id)
|
|
runParserState s =
|
|
handleRelayS s (\x, y => pure (y, x)) $ \s2,ps,f =>
|
|
let (a, st) = unPS s2 ps
|
|
in f st a
|
|
```
|
|
|
|
## Footnotes
|
|
|
|
[^1]: <https://github.com/stefan-hoeck/idris2-barbies>
|