end-of-line tests

Phillip Davis created

Change summary

birdie_snapshots/line_ending_should_reject_lone_carriage_return.accepted        |  17 
birdie_snapshots/multiple_line_endings_should_all_parse_as_end_of_line.accepted |  12 
birdie_snapshots/quasi_lexer_spans_with_multiline_input.accepted                |   5 
src/ghall.gleam                                                                 |   8 
src/node.gleam                                                                  |   1 
src/parser.gleam                                                                |  24 
src/quasi_lexer.gleam                                                           |   7 
test/ghall_test.gleam                                                           | 108 
8 files changed, 159 insertions(+), 23 deletions(-)

Detailed changes

birdie_snapshots/line_ending_should_reject_lone_carriage_return.accepted 🔗

@@ -0,0 +1,17 @@
+---
+version: 1.4.1
+title: Line ending should reject lone carriage return
+file: ./test/ghall_test.gleam
+test_name: parse_line_ending_fails_on_lone_carriage_return_test
+---
+Input: \r (lone carriage return)
+Number of errors: 2
+Error 1:
+  Reason: Expected("\"\\n\"", "\r")
+  Span: (row_start: 1, col_start: 1, row_end: 1, col_end: 2)
+  Context: []
+Error 2:
+  Reason: Expected("expected '\r\n' in keyword '\r\n'", "\r")
+  Span: (row_start: 1, col_start: 1, row_end: 1, col_end: 2)
+  Context: []
+

birdie_snapshots/quasi_lexer_spans_with_multiline_input.accepted 🔗

@@ -15,6 +15,7 @@ Token 7: '=' at Span(row_start: 1, col_start: 8, row_end: 1, col_end: 9)
 Token 8: '
 ' at Span(row_start: 1, col_start: 9, row_end: 2, col_end: 1)
 Token 9: ' ' at Span(row_start: 2, col_start: 1, row_end: 2, col_end: 2)
-Token 10: 'e' at Span(row_start: 2, col_start: 2, row_end: 2, col_end: 3)
-Token 11: '1' at Span(row_start: 2, col_start: 3, row_end: 2, col_end: 4)
+Token 10: ' ' at Span(row_start: 2, col_start: 2, row_end: 2, col_end: 3)
+Token 11: 'e' at Span(row_start: 2, col_start: 3, row_end: 2, col_end: 4)
+Token 12: '1' at Span(row_start: 2, col_start: 4, row_end: 2, col_end: 5)
 

src/ghall.gleam 🔗

@@ -1,11 +1,3 @@
-import gleam/io
-import gleam/list
-import gleam/string
-import nibble
-import nibble/lexer.{type Token, Token}
-import parser
-import quasi_lexer
-
 pub fn main() -> Nil {
   Nil
 }

src/parser.gleam 🔗

@@ -1,15 +1,20 @@
 import gleam/option.{None, Some}
 import gleam/string
 import nibble.{type Parser, do, return}
-import node.{type Node}
+import node.{type Node, EndOfLine}
 
-/// Parse a keyword string and return the specified Node on success
-pub fn keyword(expected: String, node: Node) -> Parser(Node, String, ctx) {
+pub fn exact_string(expected: String, node: Node) -> Parser(Node, String, ctx) {
   use _ <- do(string.to_graphemes(expected) |> match_chars(expected))
   return(node)
 }
 
-/// Recursively match each character in the list
+pub fn consume_exact_string(expected: String) -> Parser(Nil, String, ctx) {
+  use _ <- nibble.do(exact_string(expected, node.Let))
+  // NOTE: doesn't matter which constructor 
+
+  return(Nil)
+}
+
 fn match_chars(chars: List(String), context: String) -> Parser(Nil, String, ctx) {
   case chars {
     [] -> return(Nil)
@@ -31,7 +36,14 @@ fn match_chars(chars: List(String), context: String) -> Parser(Nil, String, ctx)
   }
 }
 
-/// Parse the "let" keyword
 pub fn let_keyword() -> Parser(Node, String, ctx) {
-  keyword("let", node.Let)
+  exact_string("let", node.Let)
+}
+
+pub fn end_of_line() -> Parser(Node, String, ctx) {
+  use _ <- nibble.do(
+    nibble.one_of([nibble.token("\n"), consume_exact_string("\r\n")]),
+  )
+
+  return(EndOfLine)
 }

src/quasi_lexer.gleam 🔗

@@ -11,9 +11,8 @@ pub fn run(
 ) -> List(Token(String)) {
   let assert Ok(tokens) = lexer.run(input, lexer)
 
-  // Nibble's lexer prepends an empty string to the quasi_lexer's
-  // otherwise acceptable output. After dropping it, we need to decrement
-  // column values on the first row since the empty token advanced the column counter
+  // Nibble's lexer prepends an empty string at the start of each line.
+  // Filter out all empty tokens.
   tokens
-  |> list.drop(1)
+  |> list.filter(fn(token) { token.lexeme != "" })
 }

test/ghall_test.gleam 🔗

@@ -1,6 +1,7 @@
 import birdie
 import gleam/int
 import gleam/list
+import gleam/string
 import gleeunit
 import nibble.{Expected}
 import nibble/lexer.{Span, Token}
@@ -27,7 +28,7 @@ pub fn simple_quasi_lexer_test() {
 }
 
 pub fn quasi_lexer_off_by_one_test() {
-  let input = "let x1 =\n e1"
+  let input = "let x1 =\n  e1"
   let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
 
   let snap =
@@ -56,14 +57,14 @@ pub fn quasi_lexer_off_by_one_test() {
 pub fn parse_let_successfully_test() {
   let input = "let"
   let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
-  let parser = parser.keyword("let", node.Let)
+  let parser = parser.exact_string("let", node.Let)
   let assert Ok(_) = nibble.run(tokens, parser)
 }
 
 pub fn parse_let_failing_test() {
   let input = "lt"
   let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
-  let parser = parser.keyword("let", node.Let)
+  let parser = parser.exact_string("let", node.Let)
   let assert Error(error) = nibble.run(tokens, parser)
   let assert [nibble.DeadEnd(Span(_, cs, _, _), Expected(msg, got: got), [])] =
     error
@@ -81,3 +82,104 @@ pub fn parse_let_failing_test() {
 
   birdie.snap(snap, title: "Should fail to parse 'lt' as node.Let")
 }
+
+pub fn parse_unix_line_ending_test() {
+  let input = "\n"
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.end_of_line()
+  let assert Ok(node.EndOfLine) = nibble.run(tokens, parser)
+}
+
+pub fn parse_windows_line_ending_test() {
+  let input = "\r\n"
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.end_of_line()
+  let assert Ok(node.EndOfLine) = nibble.run(tokens, parser)
+}
+
+pub fn parse_line_ending_fails_on_lone_carriage_return_test() {
+  let input = "\r"
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.end_of_line()
+  let assert Error(error) = nibble.run(tokens, parser)
+
+  let snap =
+    "Input: \\r (lone carriage return)\n"
+    <> "Number of errors: "
+    <> int.to_string(list.length(error))
+    <> "\n"
+    <> {
+      error
+      |> list.index_map(fn(dead_end, idx) {
+        let nibble.DeadEnd(Span(rs, cs, re, ce), reason, context) = dead_end
+        "Error "
+        <> int.to_string(idx + 1)
+        <> ":\n"
+        <> "  Reason: "
+        <> string.inspect(reason)
+        <> "\n"
+        <> "  Span: (row_start: "
+        <> int.to_string(rs)
+        <> ", col_start: "
+        <> int.to_string(cs)
+        <> ", row_end: "
+        <> int.to_string(re)
+        <> ", col_end: "
+        <> int.to_string(ce)
+        <> ")\n"
+        <> "  Context: "
+        <> string.inspect(context)
+        <> "\n"
+      })
+      |> string.join("")
+    }
+
+  birdie.snap(snap, title: "Line ending should reject lone carriage return")
+}
+
+pub fn parse_line_ending_fails_on_other_chars_test() {
+  let input = "x"
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = parser.end_of_line()
+  let assert Error(_) = nibble.run(tokens, parser)
+}
+
+pub fn parse_line_ending_after_content_test() {
+  let input = "let\n"
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = {
+    use _ <- nibble.do(parser.let_keyword())
+    parser.end_of_line()
+  }
+  let assert Ok(node.EndOfLine) = nibble.run(tokens, parser)
+}
+
+pub fn parse_multiple_line_endings_test() {
+  let input = "\n\r\n\n"
+  let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+  let parser = {
+    use eol1 <- nibble.do(parser.end_of_line())
+    use eol2 <- nibble.do(parser.end_of_line())
+    use eol3 <- nibble.do(parser.end_of_line())
+    nibble.return([eol1, eol2, eol3])
+  }
+  let assert Ok(nodes) = nibble.run(tokens, parser)
+
+  let snap =
+    "Input: \\n\\r\\n\\n (Unix, Windows, Unix line endings)\n"
+    <> "Parsed "
+    <> int.to_string(list.length(nodes))
+    <> " line endings:\n"
+    <> {
+      nodes
+      |> list.index_map(fn(n, idx) {
+        "  " <> int.to_string(idx + 1) <> ". " <> string.inspect(n) <> "\n"
+      })
+      |> string.join("")
+    }
+
+  birdie.snap(
+    snap,
+    title: "Multiple line endings should all parse as EndOfLine",
+  )
+}