src/node.gleam π
@@ -1,4 +1,9 @@
pub type Node {
Let
EndOfLine
+ Tab
+ LineCommentPrefix
+ ValidNonAscii(String)
+ Printable(String)
+ NotEndOfLine(Node)
}
Phillip Davis created
src/node.gleam | 5 +
src/parser.gleam | 91 ++++++++++++++++++++++++++++++
test/ghall_test.gleam | 131 +++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 226 insertions(+), 1 deletion(-)
@@ -1,4 +1,9 @@
pub type Node {
Let
EndOfLine
+ Tab
+ LineCommentPrefix
+ ValidNonAscii(String)
+ Printable(String)
+ NotEndOfLine(Node)
}
@@ -1,7 +1,10 @@
import gleam/option.{None, Some}
import gleam/string
import nibble.{type Parser, do, return}
-import node.{type Node, EndOfLine}
+import node.{
+ type Node, EndOfLine, LineCommentPrefix, NotEndOfLine, Printable, Tab,
+ ValidNonAscii,
+}
pub fn exact_string(expected: String, node: Node) -> Parser(Node, String, ctx) {
use _ <- do(string.to_graphemes(expected) |> match_chars(expected))
@@ -47,3 +50,89 @@ pub fn end_of_line() -> Parser(Node, String, ctx) {
return(EndOfLine)
}
+
+pub fn tab() -> Parser(Node, String, ctx) {
+ use _ <- nibble.do(nibble.token("\t"))
+
+ return(Tab)
+}
+
+pub fn line_comment_prefix() -> Parser(Node, String, ctx) {
+ use _ <- nibble.do(consume_exact_string("--"))
+
+ return(LineCommentPrefix)
+}
+
+/// Helper function to parse a grapheme if its codepoint satisfies a predicate
+fn codepoint_satisfies(
+ predicate: fn(Int) -> Bool,
+ error_msg: String,
+) -> Parser(String, String, ctx) {
+ nibble.take_map(error_msg, fn(grapheme) {
+ case string.to_utf_codepoints(grapheme) {
+ [codepoint] -> {
+ let cp_value = string.utf_codepoint_to_int(codepoint)
+ case predicate(cp_value) {
+ True -> Some(grapheme)
+ False -> None
+ }
+ }
+ _ -> None
+ }
+ })
+}
+
+/// Helper function to check if a codepoint is in the valid-non-ascii ranges
+/// as defined by the Dhall ABNF specification
+fn is_valid_non_ascii_codepoint(codepoint: Int) -> Bool {
+ case codepoint {
+ _ if codepoint >= 0x80 && codepoint <= 0xD7FF -> True
+ _ if codepoint >= 0xE000 && codepoint <= 0xFFFD -> True
+ _ if codepoint >= 0x10000 && codepoint <= 0x1FFFD -> True
+ _ if codepoint >= 0x20000 && codepoint <= 0x2FFFD -> True
+ _ if codepoint >= 0x30000 && codepoint <= 0x3FFFD -> True
+ _ if codepoint >= 0x40000 && codepoint <= 0x4FFFD -> True
+ _ if codepoint >= 0x50000 && codepoint <= 0x5FFFD -> True
+ _ if codepoint >= 0x60000 && codepoint <= 0x6FFFD -> True
+ _ if codepoint >= 0x70000 && codepoint <= 0x7FFFD -> True
+ _ if codepoint >= 0x80000 && codepoint <= 0x8FFFD -> True
+ _ if codepoint >= 0x90000 && codepoint <= 0x9FFFD -> True
+ _ if codepoint >= 0xA0000 && codepoint <= 0xAFFFD -> True
+ _ if codepoint >= 0xB0000 && codepoint <= 0xBFFFD -> True
+ _ if codepoint >= 0xC0000 && codepoint <= 0xCFFFD -> True
+ _ if codepoint >= 0xD0000 && codepoint <= 0xDFFFD -> True
+ _ if codepoint >= 0xE0000 && codepoint <= 0xEFFFD -> True
+ _ if codepoint >= 0xF0000 && codepoint <= 0xFFFFD -> True
+ _ if codepoint >= 0x100000 && codepoint <= 0x10FFFD -> True
+ _ -> False
+ }
+}
+
+/// Parser for valid-non-ascii as defined by Dhall ABNF
+pub fn valid_non_ascii() -> Parser(Node, String, ctx) {
+ use char <- nibble.do(
+ codepoint_satisfies(is_valid_non_ascii_codepoint, "valid-non-ascii character"),
+ )
+
+ return(ValidNonAscii(char))
+}
+
+/// Parser for ASCII printable characters (%x20-7F)
+pub fn printable() -> Parser(Node, String, ctx) {
+ use char <- nibble.do(
+ codepoint_satisfies(
+ fn(cp) { cp >= 0x20 && cp <= 0x7F },
+ "ASCII printable character (0x20-0x7F)",
+ ),
+ )
+
+ return(Printable(char))
+}
+
+/// Parser for not-end-of-line as defined by Dhall ABNF:
+/// not-end-of-line = %x20-7F / valid-non-ascii / tab
+pub fn not_end_of_line() -> Parser(Node, String, ctx) {
+ use inner <- nibble.do(nibble.one_of([printable(), valid_non_ascii(), tab()]))
+
+ return(NotEndOfLine(inner))
+}
@@ -183,3 +183,134 @@ pub fn parse_multiple_line_endings_test() {
title: "Multiple line endings should all parse as EndOfLine",
)
}
+
+// Tests for printable parser (%x20-7F)
+
+pub fn parse_printable_space_test() {
+ let input = " "
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.printable()
+ let assert Ok(node.Printable(" ")) = nibble.run(tokens, parser)
+}
+
+pub fn parse_printable_ascii_test() {
+ let input = "a"
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.printable()
+ let assert Ok(node.Printable("a")) = nibble.run(tokens, parser)
+}
+
+pub fn parse_printable_tilde_test() {
+ let input = "~"
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.printable()
+ let assert Ok(node.Printable("~")) = nibble.run(tokens, parser)
+}
+
+pub fn parse_printable_rejects_tab_test() {
+ let input = "\t"
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.printable()
+ let assert Error(_) = nibble.run(tokens, parser)
+}
+
+pub fn parse_printable_rejects_newline_test() {
+ let input = "\n"
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.printable()
+ let assert Error(_) = nibble.run(tokens, parser)
+}
+
+pub fn parse_printable_rejects_non_ascii_test() {
+ let input = "Γ©"
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.printable()
+ let assert Error(_) = nibble.run(tokens, parser)
+}
+
+// Tests for valid-non-ascii parser
+
+pub fn parse_valid_non_ascii_latin_test() {
+ let input = "Γ©"
+ // Γ© is U+00E9, in range 0x80-0xD7FF
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.valid_non_ascii()
+ let assert Ok(node.ValidNonAscii("Γ©")) = nibble.run(tokens, parser)
+}
+
+pub fn parse_valid_non_ascii_emoji_test() {
+ let input = "π"
+ // π is U+1F389, in range 0x10000-0x1FFFD
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.valid_non_ascii()
+ let assert Ok(node.ValidNonAscii("π")) = nibble.run(tokens, parser)
+}
+
+pub fn parse_valid_non_ascii_chinese_test() {
+ let input = "δΈ"
+ // δΈ is U+4E2D, in range 0xE000-0xFFFD
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.valid_non_ascii()
+ let assert Ok(node.ValidNonAscii("δΈ")) = nibble.run(tokens, parser)
+}
+
+pub fn parse_valid_non_ascii_rejects_ascii_test() {
+ let input = "a"
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.valid_non_ascii()
+ let assert Error(_) = nibble.run(tokens, parser)
+}
+
+pub fn parse_valid_non_ascii_rejects_tab_test() {
+ let input = "\t"
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.valid_non_ascii()
+ let assert Error(_) = nibble.run(tokens, parser)
+}
+
+// Tests for not-end-of-line parser
+
+pub fn parse_not_end_of_line_printable_test() {
+ let input = "a"
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.not_end_of_line()
+ let assert Ok(node.NotEndOfLine(node.Printable("a"))) =
+ nibble.run(tokens, parser)
+}
+
+pub fn parse_not_end_of_line_space_test() {
+ let input = " "
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.not_end_of_line()
+ let assert Ok(node.NotEndOfLine(node.Printable(" "))) =
+ nibble.run(tokens, parser)
+}
+
+pub fn parse_not_end_of_line_tab_test() {
+ let input = "\t"
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.not_end_of_line()
+ let assert Ok(node.NotEndOfLine(node.Tab)) = nibble.run(tokens, parser)
+}
+
+pub fn parse_not_end_of_line_valid_non_ascii_test() {
+ let input = "Ξ»"
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.not_end_of_line()
+ let assert Ok(node.NotEndOfLine(node.ValidNonAscii("Ξ»"))) =
+ nibble.run(tokens, parser)
+}
+
+pub fn parse_not_end_of_line_rejects_newline_test() {
+ let input = "\n"
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.not_end_of_line()
+ let assert Error(_) = nibble.run(tokens, parser)
+}
+
+pub fn parse_not_end_of_line_rejects_carriage_return_test() {
+ let input = "\r"
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.not_end_of_line()
+ let assert Error(_) = nibble.run(tokens, parser)
+}