Detailed changes
@@ -0,0 +1,17 @@
+---
+version: 1.4.1
+title: Line ending should reject lone carriage return
+file: ./test/ghall_test.gleam
+test_name: parse_line_ending_fails_on_lone_carriage_return_test
+---
+Input: \r (lone carriage return)
+Number of errors: 2
+Error 1:
+ Reason: Expected("\"\\n\"", "\r")
+ Span: (row_start: 1, col_start: 1, row_end: 1, col_end: 2)
+ Context: []
+Error 2:
+ Reason: Expected("expected '\r\n' in keyword '\r\n'", "\r")
+ Span: (row_start: 1, col_start: 1, row_end: 1, col_end: 2)
+ Context: []
+
@@ -0,0 +1,12 @@
+---
+version: 1.4.1
+title: Multiple line endings should all parse as EndOfLine
+file: ./test/ghall_test.gleam
+test_name: parse_multiple_line_endings_test
+---
+Input: \n\r\n\n (Unix, Windows, Unix line endings)
+Parsed 3 line endings:
+ 1. EndOfLine
+ 2. EndOfLine
+ 3. EndOfLine
+
@@ -15,6 +15,7 @@ Token 7: '=' at Span(row_start: 1, col_start: 8, row_end: 1, col_end: 9)
Token 8: '
' at Span(row_start: 1, col_start: 9, row_end: 2, col_end: 1)
Token 9: ' ' at Span(row_start: 2, col_start: 1, row_end: 2, col_end: 2)
-Token 10: 'e' at Span(row_start: 2, col_start: 2, row_end: 2, col_end: 3)
-Token 11: '1' at Span(row_start: 2, col_start: 3, row_end: 2, col_end: 4)
+Token 10: ' ' at Span(row_start: 2, col_start: 2, row_end: 2, col_end: 3)
+Token 11: 'e' at Span(row_start: 2, col_start: 3, row_end: 2, col_end: 4)
+Token 12: '1' at Span(row_start: 2, col_start: 4, row_end: 2, col_end: 5)
@@ -1,11 +1,3 @@
-import gleam/io
-import gleam/list
-import gleam/string
-import nibble
-import nibble/lexer.{type Token, Token}
-import parser
-import quasi_lexer
-
pub fn main() -> Nil {
Nil
}
@@ -1,3 +1,4 @@
pub type Node {
Let
+ EndOfLine
}
@@ -1,15 +1,20 @@
import gleam/option.{None, Some}
import gleam/string
import nibble.{type Parser, do, return}
-import node.{type Node}
+import node.{type Node, EndOfLine}
-/// Parse a keyword string and return the specified Node on success
-pub fn keyword(expected: String, node: Node) -> Parser(Node, String, ctx) {
+pub fn exact_string(expected: String, node: Node) -> Parser(Node, String, ctx) {
use _ <- do(string.to_graphemes(expected) |> match_chars(expected))
return(node)
}
-/// Recursively match each character in the list
+pub fn consume_exact_string(expected: String) -> Parser(Nil, String, ctx) {
+ use _ <- nibble.do(exact_string(expected, node.Let))
+ // NOTE: doesn't matter which constructor
+
+ return(Nil)
+}
+
fn match_chars(chars: List(String), context: String) -> Parser(Nil, String, ctx) {
case chars {
[] -> return(Nil)
@@ -31,7 +36,14 @@ fn match_chars(chars: List(String), context: String) -> Parser(Nil, String, ctx)
}
}
-/// Parse the "let" keyword
pub fn let_keyword() -> Parser(Node, String, ctx) {
- keyword("let", node.Let)
+ exact_string("let", node.Let)
+}
+
+pub fn end_of_line() -> Parser(Node, String, ctx) {
+ use _ <- nibble.do(
+ nibble.one_of([nibble.token("\n"), consume_exact_string("\r\n")]),
+ )
+
+ return(EndOfLine)
}
@@ -11,9 +11,8 @@ pub fn run(
) -> List(Token(String)) {
let assert Ok(tokens) = lexer.run(input, lexer)
- // Nibble's lexer prepends an empty string to the quasi_lexer's
- // otherwise acceptable output. After dropping it, we need to decrement
- // column values on the first row since the empty token advanced the column counter
+ // Nibble's lexer prepends an empty string at the start of each line.
+ // Filter out all empty tokens.
tokens
- |> list.drop(1)
+ |> list.filter(fn(token) { token.lexeme != "" })
}
@@ -1,6 +1,7 @@
import birdie
import gleam/int
import gleam/list
+import gleam/string
import gleeunit
import nibble.{Expected}
import nibble/lexer.{Span, Token}
@@ -27,7 +28,7 @@ pub fn simple_quasi_lexer_test() {
}
pub fn quasi_lexer_off_by_one_test() {
- let input = "let x1 =\n e1"
+ let input = "let x1 =\n e1"
let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
let snap =
@@ -56,14 +57,14 @@ pub fn quasi_lexer_off_by_one_test() {
pub fn parse_let_successfully_test() {
let input = "let"
let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
- let parser = parser.keyword("let", node.Let)
+ let parser = parser.exact_string("let", node.Let)
let assert Ok(_) = nibble.run(tokens, parser)
}
pub fn parse_let_failing_test() {
let input = "lt"
let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
- let parser = parser.keyword("let", node.Let)
+ let parser = parser.exact_string("let", node.Let)
let assert Error(error) = nibble.run(tokens, parser)
let assert [nibble.DeadEnd(Span(_, cs, _, _), Expected(msg, got: got), [])] =
error
@@ -81,3 +82,104 @@ pub fn parse_let_failing_test() {
birdie.snap(snap, title: "Should fail to parse 'lt' as node.Let")
}
+
+pub fn parse_unix_line_ending_test() {
+ let input = "\n"
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.end_of_line()
+ let assert Ok(node.EndOfLine) = nibble.run(tokens, parser)
+}
+
+pub fn parse_windows_line_ending_test() {
+ let input = "\r\n"
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.end_of_line()
+ let assert Ok(node.EndOfLine) = nibble.run(tokens, parser)
+}
+
+pub fn parse_line_ending_fails_on_lone_carriage_return_test() {
+ let input = "\r"
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.end_of_line()
+ let assert Error(error) = nibble.run(tokens, parser)
+
+ let snap =
+ "Input: \\r (lone carriage return)\n"
+ <> "Number of errors: "
+ <> int.to_string(list.length(error))
+ <> "\n"
+ <> {
+ error
+ |> list.index_map(fn(dead_end, idx) {
+ let nibble.DeadEnd(Span(rs, cs, re, ce), reason, context) = dead_end
+ "Error "
+ <> int.to_string(idx + 1)
+ <> ":\n"
+ <> " Reason: "
+ <> string.inspect(reason)
+ <> "\n"
+ <> " Span: (row_start: "
+ <> int.to_string(rs)
+ <> ", col_start: "
+ <> int.to_string(cs)
+ <> ", row_end: "
+ <> int.to_string(re)
+ <> ", col_end: "
+ <> int.to_string(ce)
+ <> ")\n"
+ <> " Context: "
+ <> string.inspect(context)
+ <> "\n"
+ })
+ |> string.join("")
+ }
+
+ birdie.snap(snap, title: "Line ending should reject lone carriage return")
+}
+
+pub fn parse_line_ending_fails_on_other_chars_test() {
+ let input = "x"
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = parser.end_of_line()
+ let assert Error(_) = nibble.run(tokens, parser)
+}
+
+pub fn parse_line_ending_after_content_test() {
+ let input = "let\n"
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = {
+ use _ <- nibble.do(parser.let_keyword())
+ parser.end_of_line()
+ }
+ let assert Ok(node.EndOfLine) = nibble.run(tokens, parser)
+}
+
+pub fn parse_multiple_line_endings_test() {
+ let input = "\n\r\n\n"
+ let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input)
+ let parser = {
+ use eol1 <- nibble.do(parser.end_of_line())
+ use eol2 <- nibble.do(parser.end_of_line())
+ use eol3 <- nibble.do(parser.end_of_line())
+ nibble.return([eol1, eol2, eol3])
+ }
+ let assert Ok(nodes) = nibble.run(tokens, parser)
+
+ let snap =
+ "Input: \\n\\r\\n\\n (Unix, Windows, Unix line endings)\n"
+ <> "Parsed "
+ <> int.to_string(list.length(nodes))
+ <> " line endings:\n"
+ <> {
+ nodes
+ |> list.index_map(fn(n, idx) {
+ " " <> int.to_string(idx + 1) <> ". " <> string.inspect(n) <> "\n"
+ })
+ |> string.join("")
+ }
+
+ birdie.snap(
+ snap,
+ title: "Multiple line endings should all parse as EndOfLine",
+ )
+}