whsp1 test

Phillip Davis created

Change summary

src/node.gleam        |   5 +
src/parser.gleam      |  91 ++++++++++++++++++++++++++++++
test/ghall_test.gleam | 131 +++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 226 insertions(+), 1 deletion(-)

Detailed changes

src/node.gleam πŸ”—

@@ -1,4 +1,9 @@
 pub type Node {
   Let
   EndOfLine
+  Tab
+  LineCommentPrefix
+  ValidNonAscii(String)
+  Printable(String)
+  NotEndOfLine(Node)
 }

src/parser.gleam πŸ”—

@@ -1,7 +1,10 @@
 import gleam/option.{None, Some}
 import gleam/string
 import nibble.{type Parser, do, return}
-import node.{type Node, EndOfLine}
+import node.{
+  type Node, EndOfLine, LineCommentPrefix, NotEndOfLine, Printable, Tab,
+  ValidNonAscii,
+}
 
 pub fn exact_string(expected: String, node: Node) -> Parser(Node, String, ctx) {
   use _ <- do(string.to_graphemes(expected) |> match_chars(expected))
@@ -47,3 +50,89 @@ pub fn end_of_line() -> Parser(Node, String, ctx) {
 
   return(EndOfLine)
 }
+
+pub fn tab() -> Parser(Node, String, ctx) {
+  use _ <- nibble.do(nibble.token("\t"))
+
+  return(Tab)
+}
+
+pub fn line_comment_prefix() -> Parser(Node, String, ctx) {
+  use _ <- nibble.do(consume_exact_string("--"))
+
+  return(LineCommentPrefix)
+}
+
+/// Helper function to parse a grapheme if its codepoint satisfies a predicate
+fn codepoint_satisfies(
+  predicate: fn(Int) -> Bool,
+  error_msg: String,
+) -> Parser(String, String, ctx) {
+  nibble.take_map(error_msg, fn(grapheme) {
+    case string.to_utf_codepoints(grapheme) {
+      [codepoint] -> {
+        let cp_value = string.utf_codepoint_to_int(codepoint)
+        case predicate(cp_value) {
+          True -> Some(grapheme)
+          False -> None
+        }
+      }
+      _ -> None
+    }
+  })
+}
+
+/// Helper function to check if a codepoint is in the valid-non-ascii ranges
+/// as defined by the Dhall ABNF specification
+fn is_valid_non_ascii_codepoint(codepoint: Int) -> Bool {
+  case codepoint {
+    _ if codepoint >= 0x80 && codepoint <= 0xD7FF -> True
+    _ if codepoint >= 0xE000 && codepoint <= 0xFFFD -> True
+    _ if codepoint >= 0x10000 && codepoint <= 0x1FFFD -> True
+    _ if codepoint >= 0x20000 && codepoint <= 0x2FFFD -> True
+    _ if codepoint >= 0x30000 && codepoint <= 0x3FFFD -> True
+    _ if codepoint >= 0x40000 && codepoint <= 0x4FFFD -> True
+    _ if codepoint >= 0x50000 && codepoint <= 0x5FFFD -> True
+    _ if codepoint >= 0x60000 && codepoint <= 0x6FFFD -> True
+    _ if codepoint >= 0x70000 && codepoint <= 0x7FFFD -> True
+    _ if codepoint >= 0x80000 && codepoint <= 0x8FFFD -> True
+    _ if codepoint >= 0x90000 && codepoint <= 0x9FFFD -> True
+    _ if codepoint >= 0xA0000 && codepoint <= 0xAFFFD -> True
+    _ if codepoint >= 0xB0000 && codepoint <= 0xBFFFD -> True
+    _ if codepoint >= 0xC0000 && codepoint <= 0xCFFFD -> True
+    _ if codepoint >= 0xD0000 && codepoint <= 0xDFFFD -> True
+    _ if codepoint >= 0xE0000 && codepoint <= 0xEFFFD -> True
+    _ if codepoint >= 0xF0000 && codepoint <= 0xFFFFD -> True
+    _ if codepoint >= 0x100000 && codepoint <= 0x10FFFD -> True
+    _ -> False
+  }
+}
+
+/// Parser for valid-non-ascii as defined by Dhall ABNF
+pub fn valid_non_ascii() -> Parser(Node, String, ctx) {
+  use char <- nibble.do(
+    codepoint_satisfies(is_valid_non_ascii_codepoint, "valid-non-ascii character"),
+  )
+
+  return(ValidNonAscii(char))
+}
+
+/// Parser for ASCII printable characters (%x20-7F)
+pub fn printable() -> Parser(Node, String, ctx) {
+  use char <- nibble.do(
+    codepoint_satisfies(
+      fn(cp) { cp >= 0x20 && cp <= 0x7F },
+      "ASCII printable character (0x20-0x7F)",
+    ),
+  )
+
+  return(Printable(char))
+}
+
+/// Parser for not-end-of-line as defined by Dhall ABNF:
+/// not-end-of-line = %x20-7F / valid-non-ascii / tab
+pub fn not_end_of_line() -> Parser(Node, String, ctx) {
+  use inner <- nibble.do(nibble.one_of([printable(), valid_non_ascii(), tab()]))
+
+  return(NotEndOfLine(inner))
+}

test/ghall_test.gleam πŸ”—

@@ -183,3 +183,134 @@ pub fn parse_multiple_line_endings_test() {
     title: "Multiple line endings should all parse as EndOfLine",
   )
 }
+
+// Tests for printable parser (%x20-7F)
+
+pub fn parse_printable_space_test() {
+  let input = " "
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.printable()
+  let assert Ok(node.Printable(" ")) = nibble.run(tokens, parser)
+}
+
+pub fn parse_printable_ascii_test() {
+  let input = "a"
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.printable()
+  let assert Ok(node.Printable("a")) = nibble.run(tokens, parser)
+}
+
+pub fn parse_printable_tilde_test() {
+  let input = "~"
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.printable()
+  let assert Ok(node.Printable("~")) = nibble.run(tokens, parser)
+}
+
+pub fn parse_printable_rejects_tab_test() {
+  let input = "\t"
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.printable()
+  let assert Error(_) = nibble.run(tokens, parser)
+}
+
+pub fn parse_printable_rejects_newline_test() {
+  let input = "\n"
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.printable()
+  let assert Error(_) = nibble.run(tokens, parser)
+}
+
+pub fn parse_printable_rejects_non_ascii_test() {
+  let input = "Γ©"
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.printable()
+  let assert Error(_) = nibble.run(tokens, parser)
+}
+
+// Tests for valid-non-ascii parser
+
+pub fn parse_valid_non_ascii_latin_test() {
+  let input = "Γ©"
+  // Γ© is U+00E9, in range 0x80-0xD7FF
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.valid_non_ascii()
+  let assert Ok(node.ValidNonAscii("Γ©")) = nibble.run(tokens, parser)
+}
+
+pub fn parse_valid_non_ascii_emoji_test() {
+  let input = "πŸŽ‰"
+  // πŸŽ‰ is U+1F389, in range 0x10000-0x1FFFD
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.valid_non_ascii()
+  let assert Ok(node.ValidNonAscii("πŸŽ‰")) = nibble.run(tokens, parser)
+}
+
+pub fn parse_valid_non_ascii_chinese_test() {
+  let input = "δΈ­"
+  // δΈ­ is U+4E2D, in range 0xE000-0xFFFD
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.valid_non_ascii()
+  let assert Ok(node.ValidNonAscii("δΈ­")) = nibble.run(tokens, parser)
+}
+
+pub fn parse_valid_non_ascii_rejects_ascii_test() {
+  let input = "a"
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.valid_non_ascii()
+  let assert Error(_) = nibble.run(tokens, parser)
+}
+
+pub fn parse_valid_non_ascii_rejects_tab_test() {
+  let input = "\t"
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.valid_non_ascii()
+  let assert Error(_) = nibble.run(tokens, parser)
+}
+
+// Tests for not-end-of-line parser
+
+pub fn parse_not_end_of_line_printable_test() {
+  let input = "a"
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.not_end_of_line()
+  let assert Ok(node.NotEndOfLine(node.Printable("a"))) =
+    nibble.run(tokens, parser)
+}
+
+pub fn parse_not_end_of_line_space_test() {
+  let input = " "
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.not_end_of_line()
+  let assert Ok(node.NotEndOfLine(node.Printable(" "))) =
+    nibble.run(tokens, parser)
+}
+
+pub fn parse_not_end_of_line_tab_test() {
+  let input = "\t"
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.not_end_of_line()
+  let assert Ok(node.NotEndOfLine(node.Tab)) = nibble.run(tokens, parser)
+}
+
+pub fn parse_not_end_of_line_valid_non_ascii_test() {
+  let input = "Ξ»"
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.not_end_of_line()
+  let assert Ok(node.NotEndOfLine(node.ValidNonAscii("Ξ»"))) =
+    nibble.run(tokens, parser)
+}
+
+pub fn parse_not_end_of_line_rejects_newline_test() {
+  let input = "\n"
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.not_end_of_line()
+  let assert Error(_) = nibble.run(tokens, parser)
+}
+
+pub fn parse_not_end_of_line_rejects_carriage_return_test() {
+  let input = "\r"
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.not_end_of_line()
+  let assert Error(_) = nibble.run(tokens, parser)
+}