From 6c36d6e62a5aadad997ffad37f5759a437d163ea Mon Sep 17 00:00:00 2001 From: Nathan McCarty Date: Fri, 21 Feb 2025 04:16:28 -0500 Subject: [PATCH] Refactor Djot into modules --- SSG.ipkg | 3 + src/SSG/Djot.idr | 407 +--------------------------------------- src/SSG/Djot/Block.idr | 153 +++++++++++++++ src/SSG/Djot/Common.idr | 151 +++++++++++++++ src/SSG/Djot/Inline.idr | 161 ++++++++++++++++ 5 files changed, 470 insertions(+), 405 deletions(-) create mode 100644 src/SSG/Djot/Block.idr create mode 100644 src/SSG/Djot/Common.idr create mode 100644 src/SSG/Djot/Inline.idr diff --git a/SSG.ipkg b/SSG.ipkg index ef48458..adfdb9e 100644 --- a/SSG.ipkg +++ b/SSG.ipkg @@ -22,6 +22,9 @@ depends = structures modules = SSG.Parser.Core , SSG.Parser.Util , SSG.Djot + , SSG.Djot.Common + , SSG.Djot.Inline + , SSG.Djot.Block , SSG.HTML , SSG.HTML.ElementTypes diff --git a/src/SSG/Djot.idr b/src/SSG/Djot.idr index 41924ca..274ed75 100644 --- a/src/SSG/Djot.idr +++ b/src/SSG/Djot.idr @@ -1,407 +1,4 @@ module SSG.Djot -import SSG.Parser.Core -import SSG.Parser.Util - -import Data.List1 -import Data.Vect -import Data.Nat -import Data.String - -import Control.Eff -import Derive.Prelude - --- For iutils unit tests -import System - -%language ElabReflection - ---***************************************** ---* Data Types * ---***************************************** - -public export -data Inline : Type where - HardLineBreak : Inline - SoftLineBreak : Inline - NonBreakingSpace : Inline - Text : (c : Char) -> Inline - -%runElab derive "Inline" [Show, Eq] - -public export -data HeaderLevel : Type where - H1 : HeaderLevel - H2 : HeaderLevel - H3 : HeaderLevel - H4 : HeaderLevel - H5 : HeaderLevel - H6 : HeaderLevel - -%runElab derive "HeaderLevel" [Eq] - -export -Show HeaderLevel where - show H1 = "1" - show H2 = "2" - show H3 = "3" - show H4 = "4" - show H5 = "5" - show H6 = "6" - -namespace HeaderLevel - public export - integerToHeader : Integer -> Maybe HeaderLevel - integerToHeader 1 = Just H1 - integerToHeader 2 = Just H2 - integerToHeader 3 = Just H3 - integerToHeader 4 = Just H4 - integerToHeader 5 = Just H5 - integerToHeader 6 = Just H6 - integerToHeader i = Nothing - - export - fromInteger : (x : Integer) -> {auto prf : IsJust (integerToHeader x)} -> HeaderLevel - fromInteger x = fromJust $ integerToHeader x - -public export -data Block : Type where - Paragraph : (contents : List1 Inline) -> Block - Heading : (level : HeaderLevel) -> (contents : List1 Inline) -> Block - -%runElab derive "Block" [Show, Eq] - ---***************************************** ---* Character Classes and String Escaping * ---***************************************** - ------------------------ --- Character classes -- ------------------------ - --- Class contents - -punctuationChars : List Char -punctuationChars = - [ - '!', '"', '#', '$', '%', '&', '\'' - , '(' , ')' , '*' , '+' , ',' , '-' - , '.' , '/' , ':' , ';' , '<' , '=' - , '>' , '?' , '@' , '[' , ']' , '^' - , '_' , '`' , '{' , '|' , '}' , '~' - ] - -horizontalWhitespaceChars : List Char -horizontalWhitespaceChars = - [' ', '\t'] - -verticalWhitespaceChars : List Char -verticalWhitespaceChars = - ['\n', '\r'] - --- Class parsers - -punctuation : PS Char -punctuation = theseChars punctuationChars - --------------- --- Escaping -- --------------- - -escapedChar : PS Char -escapedChar = do - _ <- thisString "\\" - oneOfE "Expected an escapable code" - [ punctuation - , exactReplace "n" '\n' - , exactReplace "t" '\t' - , exactReplace "r" '\r' - ] - ------------------------------------- --- Line Qualifying And Whitespace -- ------------------------------------- - -space : PS Char -space = theseChars horizontalWhitespaceChars - -spaces : PS Nat -spaces = do - xs <- many space - pure $ length xs - -nonTerminal : PS Char -nonTerminal = notTheseChars verticalWhitespaceChars - -lineEnding : PS String -lineEnding = theseStrings ["\r\n", "\n", "\r"] - -terminal : PS () -terminal = do - Nothing <- tryMaybe lineEnding - | _ => pure () - test <- parseEoF - case test of - False => throw "Expected line terminal" - True => pure () - -line : PS (List Char) -line = do - cs <- many nonTerminal - _ <- lineEnding - pure cs - -isHorizontalWhitespace : Char -> Bool -isHorizontalWhitespace c = any (== c) horizontalWhitespaceChars - -blankLine : PS (List Char) -blankLine = do - cs <- line - case all isHorizontalWhitespace cs of - False => throw "nonblank line" - True => pure cs - -blankLineOrEnd : PS () -blankLineOrEnd = do - Nothing <- tryMaybe blankLine - | Just _ => pure () - eof <- parseEoF - case eof of - False => throw "Expected newline or end of document" - True => pure () - -blankLines : PS () -blankLines = do - xs <- many blankLine - if length xs > 0 - then pure () - else blankLineOrEnd - ---***************************************** ---* Inline syntax * ---***************************************** - ------------------------- --- Escaped Whitespace -- ------------------------- - -hardLineBreak : PS Inline -hardLineBreak = do - _ <- thisString "\\" - _ <- spaces - _ <- lineEnding - pure $ HardLineBreak - -nbsp : PS Inline -nbsp = do - _ <- thisString "\\ " - pure $ NonBreakingSpace - -softLineBreak : PS Inline -softLineBreak = do - _ <- lineEnding - -- Check to see if the next line is empty, if it is, we are at the end of the inline - -- content, go ahead and bail - state <- save - Nothing <- tryMaybe blankLine - | Just _ => throw "End of inline" - load state - pure $ SoftLineBreak - ---------------------- --- Emphasis/Strong -- ---------------------- - ----------- --- Text -- ----------- - --- Process escape codes before processing as text -escapedText : PS Inline -escapedText = do - c <- escapedChar - pure $ Text c - --- Any non-line-ending character can be part of regular text -text : PS Inline -text = do - c <- nonTerminal - pure $ Text c - ---------------------------- --- Overall Inline Parser -- ---------------------------- - -inlineElementsNoNewlines : PS Inline -inlineElementsNoNewlines = oneOfE "" [ - nbsp - , escapedText - -- Text is last so that anything can superseed it - , text - ] - -inlineElement : PS Inline -inlineElement = oneOfE "" [ - hardLineBreak - , softLineBreak - , inlineElementsNoNewlines - ] - -inline : PS (List1 Inline) -inline = atLeastOne "Expected Inline Content" inlineElement - ---***************************************** ---* Block Syntax * ---***************************************** - ---------------- --- Paragraph -- ---------------- - -paragraph : PS Block -paragraph = do - inlines <- inline - blankLineOrEnd - pure $ Paragraph inlines - -------------- --- Heading -- -------------- - -heading : PS Block -heading = do - levelMarker <- atLeastOne "Expected Header Marker" $ thisString "#" - ws <- atLeastOne "Expected Whitespace after header marker" $ space - case integerToHeader (natToInteger $ length levelMarker) of - Nothing => throw "Invalid header length: \{show $ length levelMarker}" - Just level => do - inlines <- inline - pure $ Heading level inlines - --------------------------- --- Overall Block Parser -- --------------------------- - -block : PS Block -block = do - -- eat up any blank lines - _ <- many blankLine - oneOfE "" [ - heading - , paragraph - ] - -blocks : PS (List Block) -blocks = many block - ---***************************************** ---* Unit Tests * ---***************************************** - -------------------------------- --- Testing Utility Functions -- -------------------------------- - --- Test a parser against a golden value with the supplied input -golden : Show a => Eq a => - (input : String) -> (reference : a) -> (parser : PS a) -> IO Bool -golden input ref parser = do - putStrLn "Input: \{input}" - putStrLn "Expected: \{show ref}" - let got = runPS parser input - case got of - Left err => do - putStrLn "Failed with error: \{err}" - pure False - Right x => do - putStrLn "Output: \{show x}" - pure $ x == ref - -inlineFromString : String -> List (Inline) -inlineFromString str with (asList str) - inlineFromString "" | [] = [] - inlineFromString (strCons c str) | (c :: x) = - Text c :: inlineFromString str | x - -inlineFromString' : String -> List1 (Inline) -inlineFromString' str = - case inlineFromString str of - [] => assert_total $ idris_crash "Bad unit test fromString" - (x :: xs) => x ::: xs - -------------------------- --- Inline Syntax Tests -- -------------------------- - --- @@test Plain Text Hello World -inlineTextSmoke : IO Bool -inlineTextSmoke = - let input = "Hello World!" in - golden input (map Text . unpack $ input) (map forget inline) - --- @@test Escaped Text -inlineEscapedSmoke : IO Bool -inlineEscapedSmoke = - let input = "Hello\\n\\*World" - ref = inlineFromString "Hello" ++ [Text '\n', Text '*'] ++ inlineFromString "World" - in golden input ref (map forget inline) - --- @@test Hard Line Break -inlineHardBreakSmoke : IO Bool -inlineHardBreakSmoke = - let input = "Hello\\\nWorld" - ref = inlineFromString "Hello" ++ [HardLineBreak] ++ inlineFromString "World" - in golden input ref (map forget inline) - --- @@test Soft Line Break -inlineSoftBreakSmoke : IO Bool -inlineSoftBreakSmoke = - let input = "Hello\nWorld" - ref = inlineFromString "Hello" ++ [SoftLineBreak] ++ inlineFromString "World" - in golden input ref (map forget inline) - --- @@test Nonbreaking Space -inlineNbspSmoke : IO Bool -inlineNbspSmoke = - let input = "Hello\\ World" - ref = inlineFromString "Hello" ++ [NonBreakingSpace] ++ inlineFromString "World" - in golden input ref (map forget inline) - ------------------------- --- Block Syntax Tests -- ------------------------- - --- @@test Paragraph -blockParagraphSmoke : IO Bool -blockParagraphSmoke = - let input = "Hello World" - ref = [Paragraph (inlineFromString' "Hello World")] - in golden input ref blocks - --- @@test Two Paragraph -blockTwoParagraphSmoke : IO Bool -blockTwoParagraphSmoke = - let input = "Hello World\n\nHello Again" - ref = [ - Paragraph (inlineFromString' "Hello World") - , Paragraph (inlineFromString' "Hello Again") - ] - in golden input ref blocks - --- @@test Level 1 heading -blockLevelOneHeadingSmoke : IO Bool -blockLevelOneHeadingSmoke = - let input = "# Level 1" - ref = [ - Heading 1 (inlineFromString' "Level 1") - ] - in golden input ref blocks - --- @@test Level 2 heading -blockLevelTwoHeadingSmoke : IO Bool -blockLevelTwoHeadingSmoke = - let input = "## Level 2" - ref = [ - Heading 2 (inlineFromString' "Level 2") - ] - in golden input ref blocks +import public SSG.Djot.Inline as SSG.Djot +import public SSG.Djot.Block as SSG.Djot diff --git a/src/SSG/Djot/Block.idr b/src/SSG/Djot/Block.idr new file mode 100644 index 0000000..f3f8522 --- /dev/null +++ b/src/SSG/Djot/Block.idr @@ -0,0 +1,153 @@ +module SSG.Djot.Block + +import SSG.Parser.Core +import SSG.Parser.Util + +import SSG.Djot.Common +import SSG.Djot.Inline + +import Data.List1 +import Data.Vect +import Data.Nat +import Data.String + +import Control.Eff +import Derive.Prelude + +-- For iutils unit tests +import System + +%language ElabReflection + +--************** +--* Data Types * +--************** + +public export +data HeaderLevel : Type where + H1 : HeaderLevel + H2 : HeaderLevel + H3 : HeaderLevel + H4 : HeaderLevel + H5 : HeaderLevel + H6 : HeaderLevel + +%runElab derive "HeaderLevel" [Eq] + +export +Show HeaderLevel where + show H1 = "1" + show H2 = "2" + show H3 = "3" + show H4 = "4" + show H5 = "5" + show H6 = "6" + +namespace HeaderLevel + public export + integerToHeader : Integer -> Maybe HeaderLevel + integerToHeader 1 = Just H1 + integerToHeader 2 = Just H2 + integerToHeader 3 = Just H3 + integerToHeader 4 = Just H4 + integerToHeader 5 = Just H5 + integerToHeader 6 = Just H6 + integerToHeader i = Nothing + + export + fromInteger : (x : Integer) -> {auto prf : IsJust (integerToHeader x)} -> HeaderLevel + fromInteger x = fromJust $ integerToHeader x + +public export +data Block : Type where + Paragraph : (contents : List1 Inline) -> Block + Heading : (level : HeaderLevel) -> (contents : List1 Inline) -> Block + +%runElab derive "Block" [Show, Eq] + +--************** +--* Syntax * +--************** + +--------------- +-- Paragraph -- +--------------- + +paragraph : PS Block +paragraph = do + inlines <- inline + blankLineOrEnd + pure $ Paragraph inlines + +------------- +-- Heading -- +------------- + +heading : PS Block +heading = do + levelMarker <- atLeastOne "Expected Header Marker" $ thisString "#" + ws <- atLeastOne "Expected Whitespace after header marker" $ space + case integerToHeader (natToInteger $ length levelMarker) of + Nothing => throw "Invalid header length: \{show $ length levelMarker}" + Just level => do + inlines <- inline + pure $ Heading level inlines + +-------------------------- +-- Overall Block Parser -- +-------------------------- + +block : PS Block +block = do + -- eat up any blank lines + _ <- many blankLine + oneOfE "" [ + heading + , paragraph + ] + +blocks : PS (List Block) +blocks = many block + +--************** +--* Unit Tests * +--************** + +------------------------ +-- Block Syntax Tests -- +------------------------ + +-- @@test Paragraph +blockParagraphSmoke : IO Bool +blockParagraphSmoke = + let input = "Hello World" + ref = [Paragraph (inlineFromString' "Hello World")] + in golden input ref blocks + +-- @@test Two Paragraph +blockTwoParagraphSmoke : IO Bool +blockTwoParagraphSmoke = + let input = "Hello World\n\nHello Again" + ref = [ + Paragraph (inlineFromString' "Hello World") + , Paragraph (inlineFromString' "Hello Again") + ] + in golden input ref blocks + +-- @@test Level 1 heading +blockLevelOneHeadingSmoke : IO Bool +blockLevelOneHeadingSmoke = + let input = "# Level 1" + ref = [ + Heading 1 (inlineFromString' "Level 1") + ] + in golden input ref blocks + +-- @@test Level 2 heading +blockLevelTwoHeadingSmoke : IO Bool +blockLevelTwoHeadingSmoke = + let input = "## Level 2" + ref = [ + Heading 2 (inlineFromString' "Level 2") + ] + in golden input ref blocks diff --git a/src/SSG/Djot/Common.idr b/src/SSG/Djot/Common.idr new file mode 100644 index 0000000..addd20e --- /dev/null +++ b/src/SSG/Djot/Common.idr @@ -0,0 +1,151 @@ +module SSG.Djot.Common + +import SSG.Parser.Core +import SSG.Parser.Util + +import Control.Eff + +--***************************************** +--* Character Classes and String Escaping * +--***************************************** + +----------------------- +-- Character classes -- +----------------------- + +-- Class contents + +export +punctuationChars : List Char +punctuationChars = + [ + '!', '"', '#', '$', '%', '&', '\'' + , '(' , ')' , '*' , '+' , ',' , '-' + , '.' , '/' , ':' , ';' , '<' , '=' + , '>' , '?' , '@' , '[' , ']' , '^' + , '_' , '`' , '{' , '|' , '}' , '~' + ] + +export +horizontalWhitespaceChars : List Char +horizontalWhitespaceChars = + [' ', '\t'] + +export +verticalWhitespaceChars : List Char +verticalWhitespaceChars = + ['\n', '\r'] + +-- Class parsers + +export +punctuation : PS Char +punctuation = theseChars punctuationChars + +-------------- +-- Escaping -- +-------------- + +export +escapedChar : PS Char +escapedChar = do + _ <- thisString "\\" + oneOfE "Expected an escapable code" + [ punctuation + , exactReplace "n" '\n' + , exactReplace "t" '\t' + , exactReplace "r" '\r' + ] + +------------------------------------ +-- Line Qualifying And Whitespace -- +------------------------------------ + +export +space : PS Char +space = theseChars horizontalWhitespaceChars + +export +spaces : PS Nat +spaces = do + xs <- many space + pure $ length xs + +export +nonTerminal : PS Char +nonTerminal = notTheseChars verticalWhitespaceChars + +export +lineEnding : PS String +lineEnding = theseStrings ["\r\n", "\n", "\r"] + +export +terminal : PS () +terminal = do + Nothing <- tryMaybe lineEnding + | _ => pure () + test <- parseEoF + case test of + False => throw "Expected line terminal" + True => pure () + +export +line : PS (List Char) +line = do + cs <- many nonTerminal + _ <- lineEnding + pure cs + +export +isHorizontalWhitespace : Char -> Bool +isHorizontalWhitespace c = any (== c) horizontalWhitespaceChars + +export +blankLine : PS (List Char) +blankLine = do + cs <- line + case all isHorizontalWhitespace cs of + False => throw "nonblank line" + True => pure cs + +export +blankLineOrEnd : PS () +blankLineOrEnd = do + Nothing <- tryMaybe blankLine + | Just _ => pure () + eof <- parseEoF + case eof of + False => throw "Expected newline or end of document" + True => pure () + +export +blankLines : PS () +blankLines = do + xs <- many blankLine + if length xs > 0 + then pure () + else blankLineOrEnd + +--***************************************** +--* Unit Tests * +--***************************************** + +------------------------------- +-- Testing Utility Functions -- +------------------------------- + +-- Test a parser against a golden value with the supplied input +export +golden : Show a => Eq a => + (input : String) -> (reference : a) -> (parser : PS a) -> IO Bool +golden input ref parser = do + putStrLn "Input: \{input}" + putStrLn "Expected: \{show ref}" + let got = runPS parser input + case got of + Left err => do + putStrLn "Failed with error: \{err}" + pure False + Right x => do + putStrLn "Output: \{show x}" + pure $ x == ref diff --git a/src/SSG/Djot/Inline.idr b/src/SSG/Djot/Inline.idr new file mode 100644 index 0000000..7573062 --- /dev/null +++ b/src/SSG/Djot/Inline.idr @@ -0,0 +1,161 @@ +module SSG.Djot.Inline + +import SSG.Parser.Core +import SSG.Parser.Util + +import SSG.Djot.Common + +import Control.Eff +import Derive.Prelude + +-- For iutils unit tests +import System + +%language ElabReflection + +--************** +--* Data Types * +--************** + +public export +data Inline : Type where + HardLineBreak : Inline + SoftLineBreak : Inline + NonBreakingSpace : Inline + Text : (c : Char) -> Inline + +%runElab derive "Inline" [Show, Eq] + +--************** +--* Syntax * +--************** + +------------------------ +-- Escaped Whitespace -- +------------------------ + +hardLineBreak : PS Inline +hardLineBreak = do + _ <- thisString "\\" + _ <- spaces + _ <- lineEnding + pure $ HardLineBreak + +nbsp : PS Inline +nbsp = do + _ <- thisString "\\ " + pure $ NonBreakingSpace + +softLineBreak : PS Inline +softLineBreak = do + _ <- lineEnding + -- Check to see if the next line is empty, if it is, we are at the end of the inline + -- content, go ahead and bail + state <- save + Nothing <- tryMaybe blankLine + | Just _ => throw "End of inline" + load state + pure $ SoftLineBreak + +--------------------- +-- Emphasis/Strong -- +--------------------- + +---------- +-- Text -- +---------- + +-- Process escape codes before processing as text +escapedText : PS Inline +escapedText = do + c <- escapedChar + pure $ Text c + +-- Any non-line-ending character can be part of regular text +text : PS Inline +text = do + c <- nonTerminal + pure $ Text c + +--------------------------- +-- Overall Inline Parser -- +--------------------------- + +inlineElementsNoNewlines : PS Inline +inlineElementsNoNewlines = oneOfE "" [ + nbsp + , escapedText + -- Text is last so that anything can superseed it + , text + ] + +inlineElement : PS Inline +inlineElement = oneOfE "" [ + hardLineBreak + , softLineBreak + , inlineElementsNoNewlines + ] + +export +inline : PS (List1 Inline) +inline = atLeastOne "Expected Inline Content" inlineElement + +--************** +--* Unit Tests * +--************** + +------------------------------- +-- Testing Utility Functions -- +------------------------------- + +export +inlineFromString : String -> List (Inline) +inlineFromString str with (asList str) + inlineFromString "" | [] = [] + inlineFromString (strCons c str) | (c :: x) = + Text c :: inlineFromString str | x + +export +inlineFromString' : String -> List1 (Inline) +inlineFromString' str = + case inlineFromString str of + [] => assert_total $ idris_crash "Bad unit test fromString" + (x :: xs) => x ::: xs + +----------- +-- Tests -- +----------- + +-- @@test Plain Text Hello World +inlineTextSmoke : IO Bool +inlineTextSmoke = + let input = "Hello World!" in + golden input (map Text . unpack $ input) (map forget inline) + +-- @@test Escaped Text +inlineEscapedSmoke : IO Bool +inlineEscapedSmoke = + let input = "Hello\\n\\*World" + ref = inlineFromString "Hello" ++ [Text '\n', Text '*'] ++ inlineFromString "World" + in golden input ref (map forget inline) + +-- @@test Hard Line Break +inlineHardBreakSmoke : IO Bool +inlineHardBreakSmoke = + let input = "Hello\\\nWorld" + ref = inlineFromString "Hello" ++ [HardLineBreak] ++ inlineFromString "World" + in golden input ref (map forget inline) + +-- @@test Soft Line Break +inlineSoftBreakSmoke : IO Bool +inlineSoftBreakSmoke = + let input = "Hello\nWorld" + ref = inlineFromString "Hello" ++ [SoftLineBreak] ++ inlineFromString "World" + in golden input ref (map forget inline) + +-- @@test Nonbreaking Space +inlineNbspSmoke : IO Bool +inlineNbspSmoke = + let input = "Hello\\ World" + ref = inlineFromString "Hello" ++ [NonBreakingSpace] ++ inlineFromString "World" + in golden input ref (map forget inline)