Refactor Djot into modules

This commit is contained in:
Nathan McCarty 2025-02-21 04:16:28 -05:00
parent 79c74426c1
commit 6c36d6e62a
5 changed files with 470 additions and 405 deletions

View file

@ -22,6 +22,9 @@ depends = structures
modules = SSG.Parser.Core
, SSG.Parser.Util
, SSG.Djot
, SSG.Djot.Common
, SSG.Djot.Inline
, SSG.Djot.Block
, SSG.HTML
, SSG.HTML.ElementTypes

View file

@ -1,407 +1,4 @@
module SSG.Djot
import SSG.Parser.Core
import SSG.Parser.Util
import Data.List1
import Data.Vect
import Data.Nat
import Data.String
import Control.Eff
import Derive.Prelude
-- For iutils unit tests
import System
%language ElabReflection
--*****************************************
--* Data Types *
--*****************************************
public export
data Inline : Type where
HardLineBreak : Inline
SoftLineBreak : Inline
NonBreakingSpace : Inline
Text : (c : Char) -> Inline
%runElab derive "Inline" [Show, Eq]
public export
data HeaderLevel : Type where
H1 : HeaderLevel
H2 : HeaderLevel
H3 : HeaderLevel
H4 : HeaderLevel
H5 : HeaderLevel
H6 : HeaderLevel
%runElab derive "HeaderLevel" [Eq]
export
Show HeaderLevel where
show H1 = "1"
show H2 = "2"
show H3 = "3"
show H4 = "4"
show H5 = "5"
show H6 = "6"
namespace HeaderLevel
public export
integerToHeader : Integer -> Maybe HeaderLevel
integerToHeader 1 = Just H1
integerToHeader 2 = Just H2
integerToHeader 3 = Just H3
integerToHeader 4 = Just H4
integerToHeader 5 = Just H5
integerToHeader 6 = Just H6
integerToHeader i = Nothing
export
fromInteger : (x : Integer) -> {auto prf : IsJust (integerToHeader x)} -> HeaderLevel
fromInteger x = fromJust $ integerToHeader x
public export
data Block : Type where
Paragraph : (contents : List1 Inline) -> Block
Heading : (level : HeaderLevel) -> (contents : List1 Inline) -> Block
%runElab derive "Block" [Show, Eq]
--*****************************************
--* Character Classes and String Escaping *
--*****************************************
-----------------------
-- Character classes --
-----------------------
-- Class contents
punctuationChars : List Char
punctuationChars =
[
'!', '"', '#', '$', '%', '&', '\''
, '(' , ')' , '*' , '+' , ',' , '-'
, '.' , '/' , ':' , ';' , '<' , '='
, '>' , '?' , '@' , '[' , ']' , '^'
, '_' , '`' , '{' , '|' , '}' , '~'
]
horizontalWhitespaceChars : List Char
horizontalWhitespaceChars =
[' ', '\t']
verticalWhitespaceChars : List Char
verticalWhitespaceChars =
['\n', '\r']
-- Class parsers
punctuation : PS Char
punctuation = theseChars punctuationChars
--------------
-- Escaping --
--------------
escapedChar : PS Char
escapedChar = do
_ <- thisString "\\"
oneOfE "Expected an escapable code"
[ punctuation
, exactReplace "n" '\n'
, exactReplace "t" '\t'
, exactReplace "r" '\r'
]
------------------------------------
-- Line Qualifying And Whitespace --
------------------------------------
space : PS Char
space = theseChars horizontalWhitespaceChars
spaces : PS Nat
spaces = do
xs <- many space
pure $ length xs
nonTerminal : PS Char
nonTerminal = notTheseChars verticalWhitespaceChars
lineEnding : PS String
lineEnding = theseStrings ["\r\n", "\n", "\r"]
terminal : PS ()
terminal = do
Nothing <- tryMaybe lineEnding
| _ => pure ()
test <- parseEoF
case test of
False => throw "Expected line terminal"
True => pure ()
line : PS (List Char)
line = do
cs <- many nonTerminal
_ <- lineEnding
pure cs
isHorizontalWhitespace : Char -> Bool
isHorizontalWhitespace c = any (== c) horizontalWhitespaceChars
blankLine : PS (List Char)
blankLine = do
cs <- line
case all isHorizontalWhitespace cs of
False => throw "nonblank line"
True => pure cs
blankLineOrEnd : PS ()
blankLineOrEnd = do
Nothing <- tryMaybe blankLine
| Just _ => pure ()
eof <- parseEoF
case eof of
False => throw "Expected newline or end of document"
True => pure ()
blankLines : PS ()
blankLines = do
xs <- many blankLine
if length xs > 0
then pure ()
else blankLineOrEnd
--*****************************************
--* Inline syntax *
--*****************************************
------------------------
-- Escaped Whitespace --
------------------------
hardLineBreak : PS Inline
hardLineBreak = do
_ <- thisString "\\"
_ <- spaces
_ <- lineEnding
pure $ HardLineBreak
nbsp : PS Inline
nbsp = do
_ <- thisString "\\ "
pure $ NonBreakingSpace
softLineBreak : PS Inline
softLineBreak = do
_ <- lineEnding
-- Check to see if the next line is empty, if it is, we are at the end of the inline
-- content, go ahead and bail
state <- save
Nothing <- tryMaybe blankLine
| Just _ => throw "End of inline"
load state
pure $ SoftLineBreak
---------------------
-- Emphasis/Strong --
---------------------
----------
-- Text --
----------
-- Process escape codes before processing as text
escapedText : PS Inline
escapedText = do
c <- escapedChar
pure $ Text c
-- Any non-line-ending character can be part of regular text
text : PS Inline
text = do
c <- nonTerminal
pure $ Text c
---------------------------
-- Overall Inline Parser --
---------------------------
inlineElementsNoNewlines : PS Inline
inlineElementsNoNewlines = oneOfE "" [
nbsp
, escapedText
-- Text is last so that anything can superseed it
, text
]
inlineElement : PS Inline
inlineElement = oneOfE "" [
hardLineBreak
, softLineBreak
, inlineElementsNoNewlines
]
inline : PS (List1 Inline)
inline = atLeastOne "Expected Inline Content" inlineElement
--*****************************************
--* Block Syntax *
--*****************************************
---------------
-- Paragraph --
---------------
paragraph : PS Block
paragraph = do
inlines <- inline
blankLineOrEnd
pure $ Paragraph inlines
-------------
-- Heading --
-------------
heading : PS Block
heading = do
levelMarker <- atLeastOne "Expected Header Marker" $ thisString "#"
ws <- atLeastOne "Expected Whitespace after header marker" $ space
case integerToHeader (natToInteger $ length levelMarker) of
Nothing => throw "Invalid header length: \{show $ length levelMarker}"
Just level => do
inlines <- inline
pure $ Heading level inlines
--------------------------
-- Overall Block Parser --
--------------------------
block : PS Block
block = do
-- eat up any blank lines
_ <- many blankLine
oneOfE "" [
heading
, paragraph
]
blocks : PS (List Block)
blocks = many block
--*****************************************
--* Unit Tests *
--*****************************************
-------------------------------
-- Testing Utility Functions --
-------------------------------
-- Test a parser against a golden value with the supplied input
golden : Show a => Eq a =>
(input : String) -> (reference : a) -> (parser : PS a) -> IO Bool
golden input ref parser = do
putStrLn "Input: \{input}"
putStrLn "Expected: \{show ref}"
let got = runPS parser input
case got of
Left err => do
putStrLn "Failed with error: \{err}"
pure False
Right x => do
putStrLn "Output: \{show x}"
pure $ x == ref
inlineFromString : String -> List (Inline)
inlineFromString str with (asList str)
inlineFromString "" | [] = []
inlineFromString (strCons c str) | (c :: x) =
Text c :: inlineFromString str | x
inlineFromString' : String -> List1 (Inline)
inlineFromString' str =
case inlineFromString str of
[] => assert_total $ idris_crash "Bad unit test fromString"
(x :: xs) => x ::: xs
-------------------------
-- Inline Syntax Tests --
-------------------------
-- @@test Plain Text Hello World
inlineTextSmoke : IO Bool
inlineTextSmoke =
let input = "Hello World!" in
golden input (map Text . unpack $ input) (map forget inline)
-- @@test Escaped Text
inlineEscapedSmoke : IO Bool
inlineEscapedSmoke =
let input = "Hello\\n\\*World"
ref = inlineFromString "Hello" ++ [Text '\n', Text '*'] ++ inlineFromString "World"
in golden input ref (map forget inline)
-- @@test Hard Line Break
inlineHardBreakSmoke : IO Bool
inlineHardBreakSmoke =
let input = "Hello\\\nWorld"
ref = inlineFromString "Hello" ++ [HardLineBreak] ++ inlineFromString "World"
in golden input ref (map forget inline)
-- @@test Soft Line Break
inlineSoftBreakSmoke : IO Bool
inlineSoftBreakSmoke =
let input = "Hello\nWorld"
ref = inlineFromString "Hello" ++ [SoftLineBreak] ++ inlineFromString "World"
in golden input ref (map forget inline)
-- @@test Nonbreaking Space
inlineNbspSmoke : IO Bool
inlineNbspSmoke =
let input = "Hello\\ World"
ref = inlineFromString "Hello" ++ [NonBreakingSpace] ++ inlineFromString "World"
in golden input ref (map forget inline)
------------------------
-- Block Syntax Tests --
------------------------
-- @@test Paragraph
blockParagraphSmoke : IO Bool
blockParagraphSmoke =
let input = "Hello World"
ref = [Paragraph (inlineFromString' "Hello World")]
in golden input ref blocks
-- @@test Two Paragraph
blockTwoParagraphSmoke : IO Bool
blockTwoParagraphSmoke =
let input = "Hello World\n\nHello Again"
ref = [
Paragraph (inlineFromString' "Hello World")
, Paragraph (inlineFromString' "Hello Again")
]
in golden input ref blocks
-- @@test Level 1 heading
blockLevelOneHeadingSmoke : IO Bool
blockLevelOneHeadingSmoke =
let input = "# Level 1"
ref = [
Heading 1 (inlineFromString' "Level 1")
]
in golden input ref blocks
-- @@test Level 2 heading
blockLevelTwoHeadingSmoke : IO Bool
blockLevelTwoHeadingSmoke =
let input = "## Level 2"
ref = [
Heading 2 (inlineFromString' "Level 2")
]
in golden input ref blocks
import public SSG.Djot.Inline as SSG.Djot
import public SSG.Djot.Block as SSG.Djot

153
src/SSG/Djot/Block.idr Normal file
View file

@ -0,0 +1,153 @@
module SSG.Djot.Block
import SSG.Parser.Core
import SSG.Parser.Util
import SSG.Djot.Common
import SSG.Djot.Inline
import Data.List1
import Data.Vect
import Data.Nat
import Data.String
import Control.Eff
import Derive.Prelude
-- For iutils unit tests
import System
%language ElabReflection
--**************
--* Data Types *
--**************
public export
data HeaderLevel : Type where
H1 : HeaderLevel
H2 : HeaderLevel
H3 : HeaderLevel
H4 : HeaderLevel
H5 : HeaderLevel
H6 : HeaderLevel
%runElab derive "HeaderLevel" [Eq]
export
Show HeaderLevel where
show H1 = "1"
show H2 = "2"
show H3 = "3"
show H4 = "4"
show H5 = "5"
show H6 = "6"
namespace HeaderLevel
public export
integerToHeader : Integer -> Maybe HeaderLevel
integerToHeader 1 = Just H1
integerToHeader 2 = Just H2
integerToHeader 3 = Just H3
integerToHeader 4 = Just H4
integerToHeader 5 = Just H5
integerToHeader 6 = Just H6
integerToHeader i = Nothing
export
fromInteger : (x : Integer) -> {auto prf : IsJust (integerToHeader x)} -> HeaderLevel
fromInteger x = fromJust $ integerToHeader x
public export
data Block : Type where
Paragraph : (contents : List1 Inline) -> Block
Heading : (level : HeaderLevel) -> (contents : List1 Inline) -> Block
%runElab derive "Block" [Show, Eq]
--**************
--* Syntax *
--**************
---------------
-- Paragraph --
---------------
paragraph : PS Block
paragraph = do
inlines <- inline
blankLineOrEnd
pure $ Paragraph inlines
-------------
-- Heading --
-------------
heading : PS Block
heading = do
levelMarker <- atLeastOne "Expected Header Marker" $ thisString "#"
ws <- atLeastOne "Expected Whitespace after header marker" $ space
case integerToHeader (natToInteger $ length levelMarker) of
Nothing => throw "Invalid header length: \{show $ length levelMarker}"
Just level => do
inlines <- inline
pure $ Heading level inlines
--------------------------
-- Overall Block Parser --
--------------------------
block : PS Block
block = do
-- eat up any blank lines
_ <- many blankLine
oneOfE "" [
heading
, paragraph
]
blocks : PS (List Block)
blocks = many block
--**************
--* Unit Tests *
--**************
------------------------
-- Block Syntax Tests --
------------------------
-- @@test Paragraph
blockParagraphSmoke : IO Bool
blockParagraphSmoke =
let input = "Hello World"
ref = [Paragraph (inlineFromString' "Hello World")]
in golden input ref blocks
-- @@test Two Paragraph
blockTwoParagraphSmoke : IO Bool
blockTwoParagraphSmoke =
let input = "Hello World\n\nHello Again"
ref = [
Paragraph (inlineFromString' "Hello World")
, Paragraph (inlineFromString' "Hello Again")
]
in golden input ref blocks
-- @@test Level 1 heading
blockLevelOneHeadingSmoke : IO Bool
blockLevelOneHeadingSmoke =
let input = "# Level 1"
ref = [
Heading 1 (inlineFromString' "Level 1")
]
in golden input ref blocks
-- @@test Level 2 heading
blockLevelTwoHeadingSmoke : IO Bool
blockLevelTwoHeadingSmoke =
let input = "## Level 2"
ref = [
Heading 2 (inlineFromString' "Level 2")
]
in golden input ref blocks

151
src/SSG/Djot/Common.idr Normal file
View file

@ -0,0 +1,151 @@
module SSG.Djot.Common
import SSG.Parser.Core
import SSG.Parser.Util
import Control.Eff
--*****************************************
--* Character Classes and String Escaping *
--*****************************************
-----------------------
-- Character classes --
-----------------------
-- Class contents
export
punctuationChars : List Char
punctuationChars =
[
'!', '"', '#', '$', '%', '&', '\''
, '(' , ')' , '*' , '+' , ',' , '-'
, '.' , '/' , ':' , ';' , '<' , '='
, '>' , '?' , '@' , '[' , ']' , '^'
, '_' , '`' , '{' , '|' , '}' , '~'
]
export
horizontalWhitespaceChars : List Char
horizontalWhitespaceChars =
[' ', '\t']
export
verticalWhitespaceChars : List Char
verticalWhitespaceChars =
['\n', '\r']
-- Class parsers
export
punctuation : PS Char
punctuation = theseChars punctuationChars
--------------
-- Escaping --
--------------
export
escapedChar : PS Char
escapedChar = do
_ <- thisString "\\"
oneOfE "Expected an escapable code"
[ punctuation
, exactReplace "n" '\n'
, exactReplace "t" '\t'
, exactReplace "r" '\r'
]
------------------------------------
-- Line Qualifying And Whitespace --
------------------------------------
export
space : PS Char
space = theseChars horizontalWhitespaceChars
export
spaces : PS Nat
spaces = do
xs <- many space
pure $ length xs
export
nonTerminal : PS Char
nonTerminal = notTheseChars verticalWhitespaceChars
export
lineEnding : PS String
lineEnding = theseStrings ["\r\n", "\n", "\r"]
export
terminal : PS ()
terminal = do
Nothing <- tryMaybe lineEnding
| _ => pure ()
test <- parseEoF
case test of
False => throw "Expected line terminal"
True => pure ()
export
line : PS (List Char)
line = do
cs <- many nonTerminal
_ <- lineEnding
pure cs
export
isHorizontalWhitespace : Char -> Bool
isHorizontalWhitespace c = any (== c) horizontalWhitespaceChars
export
blankLine : PS (List Char)
blankLine = do
cs <- line
case all isHorizontalWhitespace cs of
False => throw "nonblank line"
True => pure cs
export
blankLineOrEnd : PS ()
blankLineOrEnd = do
Nothing <- tryMaybe blankLine
| Just _ => pure ()
eof <- parseEoF
case eof of
False => throw "Expected newline or end of document"
True => pure ()
export
blankLines : PS ()
blankLines = do
xs <- many blankLine
if length xs > 0
then pure ()
else blankLineOrEnd
--*****************************************
--* Unit Tests *
--*****************************************
-------------------------------
-- Testing Utility Functions --
-------------------------------
-- Test a parser against a golden value with the supplied input
export
golden : Show a => Eq a =>
(input : String) -> (reference : a) -> (parser : PS a) -> IO Bool
golden input ref parser = do
putStrLn "Input: \{input}"
putStrLn "Expected: \{show ref}"
let got = runPS parser input
case got of
Left err => do
putStrLn "Failed with error: \{err}"
pure False
Right x => do
putStrLn "Output: \{show x}"
pure $ x == ref

161
src/SSG/Djot/Inline.idr Normal file
View file

@ -0,0 +1,161 @@
module SSG.Djot.Inline
import SSG.Parser.Core
import SSG.Parser.Util
import SSG.Djot.Common
import Control.Eff
import Derive.Prelude
-- For iutils unit tests
import System
%language ElabReflection
--**************
--* Data Types *
--**************
public export
data Inline : Type where
HardLineBreak : Inline
SoftLineBreak : Inline
NonBreakingSpace : Inline
Text : (c : Char) -> Inline
%runElab derive "Inline" [Show, Eq]
--**************
--* Syntax *
--**************
------------------------
-- Escaped Whitespace --
------------------------
hardLineBreak : PS Inline
hardLineBreak = do
_ <- thisString "\\"
_ <- spaces
_ <- lineEnding
pure $ HardLineBreak
nbsp : PS Inline
nbsp = do
_ <- thisString "\\ "
pure $ NonBreakingSpace
softLineBreak : PS Inline
softLineBreak = do
_ <- lineEnding
-- Check to see if the next line is empty, if it is, we are at the end of the inline
-- content, go ahead and bail
state <- save
Nothing <- tryMaybe blankLine
| Just _ => throw "End of inline"
load state
pure $ SoftLineBreak
---------------------
-- Emphasis/Strong --
---------------------
----------
-- Text --
----------
-- Process escape codes before processing as text
escapedText : PS Inline
escapedText = do
c <- escapedChar
pure $ Text c
-- Any non-line-ending character can be part of regular text
text : PS Inline
text = do
c <- nonTerminal
pure $ Text c
---------------------------
-- Overall Inline Parser --
---------------------------
inlineElementsNoNewlines : PS Inline
inlineElementsNoNewlines = oneOfE "" [
nbsp
, escapedText
-- Text is last so that anything can superseed it
, text
]
inlineElement : PS Inline
inlineElement = oneOfE "" [
hardLineBreak
, softLineBreak
, inlineElementsNoNewlines
]
export
inline : PS (List1 Inline)
inline = atLeastOne "Expected Inline Content" inlineElement
--**************
--* Unit Tests *
--**************
-------------------------------
-- Testing Utility Functions --
-------------------------------
export
inlineFromString : String -> List (Inline)
inlineFromString str with (asList str)
inlineFromString "" | [] = []
inlineFromString (strCons c str) | (c :: x) =
Text c :: inlineFromString str | x
export
inlineFromString' : String -> List1 (Inline)
inlineFromString' str =
case inlineFromString str of
[] => assert_total $ idris_crash "Bad unit test fromString"
(x :: xs) => x ::: xs
-----------
-- Tests --
-----------
-- @@test Plain Text Hello World
inlineTextSmoke : IO Bool
inlineTextSmoke =
let input = "Hello World!" in
golden input (map Text . unpack $ input) (map forget inline)
-- @@test Escaped Text
inlineEscapedSmoke : IO Bool
inlineEscapedSmoke =
let input = "Hello\\n\\*World"
ref = inlineFromString "Hello" ++ [Text '\n', Text '*'] ++ inlineFromString "World"
in golden input ref (map forget inline)
-- @@test Hard Line Break
inlineHardBreakSmoke : IO Bool
inlineHardBreakSmoke =
let input = "Hello\\\nWorld"
ref = inlineFromString "Hello" ++ [HardLineBreak] ++ inlineFromString "World"
in golden input ref (map forget inline)
-- @@test Soft Line Break
inlineSoftBreakSmoke : IO Bool
inlineSoftBreakSmoke =
let input = "Hello\nWorld"
ref = inlineFromString "Hello" ++ [SoftLineBreak] ++ inlineFromString "World"
in golden input ref (map forget inline)
-- @@test Nonbreaking Space
inlineNbspSmoke : IO Bool
inlineNbspSmoke =
let input = "Hello\\ World"
ref = inlineFromString "Hello" ++ [NonBreakingSpace] ++ inlineFromString "World"
in golden input ref (map forget inline)