From a57091b8d079ab65fea44f95c658c532310b8916 Mon Sep 17 00:00:00 2001 From: Phillip Davis Date: Wed, 29 Oct 2025 20:04:40 -0400 Subject: [PATCH] end-of-line tests --- ...hould_reject_lone_carriage_return.accepted | 17 +++ ...s_should_all_parse_as_end_of_line.accepted | 12 ++ ..._lexer_spans_with_multiline_input.accepted | 5 +- src/ghall.gleam | 8 -- src/node.gleam | 1 + src/parser.gleam | 24 +++- src/quasi_lexer.gleam | 7 +- test/ghall_test.gleam | 108 +++++++++++++++++- 8 files changed, 159 insertions(+), 23 deletions(-) create mode 100644 birdie_snapshots/line_ending_should_reject_lone_carriage_return.accepted create mode 100644 birdie_snapshots/multiple_line_endings_should_all_parse_as_end_of_line.accepted diff --git a/birdie_snapshots/line_ending_should_reject_lone_carriage_return.accepted b/birdie_snapshots/line_ending_should_reject_lone_carriage_return.accepted new file mode 100644 index 0000000000000000000000000000000000000000..975a813fc3d4976723ee73345d5556b87e4be3fe --- /dev/null +++ b/birdie_snapshots/line_ending_should_reject_lone_carriage_return.accepted @@ -0,0 +1,17 @@ +--- +version: 1.4.1 +title: Line ending should reject lone carriage return +file: ./test/ghall_test.gleam +test_name: parse_line_ending_fails_on_lone_carriage_return_test +--- +Input: \r (lone carriage return) +Number of errors: 2 +Error 1: + Reason: Expected("\"\\n\"", "\r") + Span: (row_start: 1, col_start: 1, row_end: 1, col_end: 2) + Context: [] +Error 2: + Reason: Expected("expected '\r\n' in keyword '\r\n'", "\r") + Span: (row_start: 1, col_start: 1, row_end: 1, col_end: 2) + Context: [] + diff --git a/birdie_snapshots/multiple_line_endings_should_all_parse_as_end_of_line.accepted b/birdie_snapshots/multiple_line_endings_should_all_parse_as_end_of_line.accepted new file mode 100644 index 0000000000000000000000000000000000000000..d2b9d9069038e261b8f1a73ae51d91e17a03e21a --- /dev/null +++ b/birdie_snapshots/multiple_line_endings_should_all_parse_as_end_of_line.accepted @@ -0,0 +1,12 @@ +--- +version: 1.4.1 +title: Multiple line endings should all parse as EndOfLine +file: ./test/ghall_test.gleam +test_name: parse_multiple_line_endings_test +--- +Input: \n\r\n\n (Unix, Windows, Unix line endings) +Parsed 3 line endings: + 1. EndOfLine + 2. EndOfLine + 3. EndOfLine + diff --git a/birdie_snapshots/quasi_lexer_spans_with_multiline_input.accepted b/birdie_snapshots/quasi_lexer_spans_with_multiline_input.accepted index 43b8c99f75c2a162510a9cd8ccf95c6cf7a497d6..b4d66b10ebc70a5eafd771ad080e72c75855da34 100644 --- a/birdie_snapshots/quasi_lexer_spans_with_multiline_input.accepted +++ b/birdie_snapshots/quasi_lexer_spans_with_multiline_input.accepted @@ -15,6 +15,7 @@ Token 7: '=' at Span(row_start: 1, col_start: 8, row_end: 1, col_end: 9) Token 8: ' ' at Span(row_start: 1, col_start: 9, row_end: 2, col_end: 1) Token 9: ' ' at Span(row_start: 2, col_start: 1, row_end: 2, col_end: 2) -Token 10: 'e' at Span(row_start: 2, col_start: 2, row_end: 2, col_end: 3) -Token 11: '1' at Span(row_start: 2, col_start: 3, row_end: 2, col_end: 4) +Token 10: ' ' at Span(row_start: 2, col_start: 2, row_end: 2, col_end: 3) +Token 11: 'e' at Span(row_start: 2, col_start: 3, row_end: 2, col_end: 4) +Token 12: '1' at Span(row_start: 2, col_start: 4, row_end: 2, col_end: 5) diff --git a/src/ghall.gleam b/src/ghall.gleam index 8b31d14c928611f8c33a8fd8f488cd0ea17fd5bb..e95ddd67dc6ed3a5f9a67cc9910c74a5f276923e 100644 --- a/src/ghall.gleam +++ b/src/ghall.gleam @@ -1,11 +1,3 @@ -import gleam/io -import gleam/list -import gleam/string -import nibble -import nibble/lexer.{type Token, Token} -import parser -import quasi_lexer - pub fn main() -> Nil { Nil } diff --git a/src/node.gleam b/src/node.gleam index c7736359f7050e333530ebc08e5d036d30455c3e..948084d064c51c24b2a289b4a4d3132ad123cd45 100644 --- a/src/node.gleam +++ b/src/node.gleam @@ -1,3 +1,4 @@ pub type Node { Let + EndOfLine } diff --git a/src/parser.gleam b/src/parser.gleam index 224c443563e5db8cf27c396eb827da8d21259e63..aa3b599c1f4cd4df859213e59d14160e30100439 100644 --- a/src/parser.gleam +++ b/src/parser.gleam @@ -1,15 +1,20 @@ import gleam/option.{None, Some} import gleam/string import nibble.{type Parser, do, return} -import node.{type Node} +import node.{type Node, EndOfLine} -/// Parse a keyword string and return the specified Node on success -pub fn keyword(expected: String, node: Node) -> Parser(Node, String, ctx) { +pub fn exact_string(expected: String, node: Node) -> Parser(Node, String, ctx) { use _ <- do(string.to_graphemes(expected) |> match_chars(expected)) return(node) } -/// Recursively match each character in the list +pub fn consume_exact_string(expected: String) -> Parser(Nil, String, ctx) { + use _ <- nibble.do(exact_string(expected, node.Let)) + // NOTE: doesn't matter which constructor + + return(Nil) +} + fn match_chars(chars: List(String), context: String) -> Parser(Nil, String, ctx) { case chars { [] -> return(Nil) @@ -31,7 +36,14 @@ fn match_chars(chars: List(String), context: String) -> Parser(Nil, String, ctx) } } -/// Parse the "let" keyword pub fn let_keyword() -> Parser(Node, String, ctx) { - keyword("let", node.Let) + exact_string("let", node.Let) +} + +pub fn end_of_line() -> Parser(Node, String, ctx) { + use _ <- nibble.do( + nibble.one_of([nibble.token("\n"), consume_exact_string("\r\n")]), + ) + + return(EndOfLine) } diff --git a/src/quasi_lexer.gleam b/src/quasi_lexer.gleam index 73529e6d36458b5200d7607b226582d84ebe5ede..7905486dca2f20ce0e96e28f01f90b2f85dcb6f5 100644 --- a/src/quasi_lexer.gleam +++ b/src/quasi_lexer.gleam @@ -11,9 +11,8 @@ pub fn run( ) -> List(Token(String)) { let assert Ok(tokens) = lexer.run(input, lexer) - // Nibble's lexer prepends an empty string to the quasi_lexer's - // otherwise acceptable output. After dropping it, we need to decrement - // column values on the first row since the empty token advanced the column counter + // Nibble's lexer prepends an empty string at the start of each line. + // Filter out all empty tokens. tokens - |> list.drop(1) + |> list.filter(fn(token) { token.lexeme != "" }) } diff --git a/test/ghall_test.gleam b/test/ghall_test.gleam index 45bdc079eda37b0d287cdbb78295eade4c56e7a0..bf3be6a7fbe6126be80aebc041585ac1421c4415 100644 --- a/test/ghall_test.gleam +++ b/test/ghall_test.gleam @@ -1,6 +1,7 @@ import birdie import gleam/int import gleam/list +import gleam/string import gleeunit import nibble.{Expected} import nibble/lexer.{Span, Token} @@ -27,7 +28,7 @@ pub fn simple_quasi_lexer_test() { } pub fn quasi_lexer_off_by_one_test() { - let input = "let x1 =\n e1" + let input = "let x1 =\n e1" let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input) let snap = @@ -56,14 +57,14 @@ pub fn quasi_lexer_off_by_one_test() { pub fn parse_let_successfully_test() { let input = "let" let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input) - let parser = parser.keyword("let", node.Let) + let parser = parser.exact_string("let", node.Let) let assert Ok(_) = nibble.run(tokens, parser) } pub fn parse_let_failing_test() { let input = "lt" let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input) - let parser = parser.keyword("let", node.Let) + let parser = parser.exact_string("let", node.Let) let assert Error(error) = nibble.run(tokens, parser) let assert [nibble.DeadEnd(Span(_, cs, _, _), Expected(msg, got: got), [])] = error @@ -81,3 +82,104 @@ pub fn parse_let_failing_test() { birdie.snap(snap, title: "Should fail to parse 'lt' as node.Let") } + +pub fn parse_unix_line_ending_test() { + let input = "\n" + let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input) + let parser = parser.end_of_line() + let assert Ok(node.EndOfLine) = nibble.run(tokens, parser) +} + +pub fn parse_windows_line_ending_test() { + let input = "\r\n" + let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input) + let parser = parser.end_of_line() + let assert Ok(node.EndOfLine) = nibble.run(tokens, parser) +} + +pub fn parse_line_ending_fails_on_lone_carriage_return_test() { + let input = "\r" + let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input) + let parser = parser.end_of_line() + let assert Error(error) = nibble.run(tokens, parser) + + let snap = + "Input: \\r (lone carriage return)\n" + <> "Number of errors: " + <> int.to_string(list.length(error)) + <> "\n" + <> { + error + |> list.index_map(fn(dead_end, idx) { + let nibble.DeadEnd(Span(rs, cs, re, ce), reason, context) = dead_end + "Error " + <> int.to_string(idx + 1) + <> ":\n" + <> " Reason: " + <> string.inspect(reason) + <> "\n" + <> " Span: (row_start: " + <> int.to_string(rs) + <> ", col_start: " + <> int.to_string(cs) + <> ", row_end: " + <> int.to_string(re) + <> ", col_end: " + <> int.to_string(ce) + <> ")\n" + <> " Context: " + <> string.inspect(context) + <> "\n" + }) + |> string.join("") + } + + birdie.snap(snap, title: "Line ending should reject lone carriage return") +} + +pub fn parse_line_ending_fails_on_other_chars_test() { + let input = "x" + let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input) + let parser = parser.end_of_line() + let assert Error(_) = nibble.run(tokens, parser) +} + +pub fn parse_line_ending_after_content_test() { + let input = "let\n" + let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input) + let parser = { + use _ <- nibble.do(parser.let_keyword()) + parser.end_of_line() + } + let assert Ok(node.EndOfLine) = nibble.run(tokens, parser) +} + +pub fn parse_multiple_line_endings_test() { + let input = "\n\r\n\n" + let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input) + let parser = { + use eol1 <- nibble.do(parser.end_of_line()) + use eol2 <- nibble.do(parser.end_of_line()) + use eol3 <- nibble.do(parser.end_of_line()) + nibble.return([eol1, eol2, eol3]) + } + let assert Ok(nodes) = nibble.run(tokens, parser) + + let snap = + "Input: \\n\\r\\n\\n (Unix, Windows, Unix line endings)\n" + <> "Parsed " + <> int.to_string(list.length(nodes)) + <> " line endings:\n" + <> { + nodes + |> list.index_map(fn(n, idx) { + " " <> int.to_string(idx + 1) <> ". " <> string.inspect(n) <> "\n" + }) + |> string.join("") + } + + birdie.snap( + snap, + title: "Multiple line endings should all parse as EndOfLine", + ) +}