diff --git a/.gitignore b/.gitignore index 599be4eb9294fa93ae3e22fc8d4e3828b74d8dd2..64ecb4680ed3e123ff40dd338177afed11a8dea4 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ *.ez /build erl_crash.dump +.mcp.json diff --git a/birdie_snapshots/quasi_lexer_spans_with_multiline_input.accepted b/birdie_snapshots/quasi_lexer_spans_with_multiline_input.accepted new file mode 100644 index 0000000000000000000000000000000000000000..43b8c99f75c2a162510a9cd8ccf95c6cf7a497d6 --- /dev/null +++ b/birdie_snapshots/quasi_lexer_spans_with_multiline_input.accepted @@ -0,0 +1,20 @@ +--- +version: 1.4.1 +title: Quasi lexer spans with multiline input +file: ./test/ghall_test.gleam +test_name: quasi_lexer_off_by_one_test +--- +Token 0: 'l' at Span(row_start: 1, col_start: 1, row_end: 1, col_end: 2) +Token 1: 'e' at Span(row_start: 1, col_start: 2, row_end: 1, col_end: 3) +Token 2: 't' at Span(row_start: 1, col_start: 3, row_end: 1, col_end: 4) +Token 3: ' ' at Span(row_start: 1, col_start: 4, row_end: 1, col_end: 5) +Token 4: 'x' at Span(row_start: 1, col_start: 5, row_end: 1, col_end: 6) +Token 5: '1' at Span(row_start: 1, col_start: 6, row_end: 1, col_end: 7) +Token 6: ' ' at Span(row_start: 1, col_start: 7, row_end: 1, col_end: 8) +Token 7: '=' at Span(row_start: 1, col_start: 8, row_end: 1, col_end: 9) +Token 8: ' +' at Span(row_start: 1, col_start: 9, row_end: 2, col_end: 1) +Token 9: ' ' at Span(row_start: 2, col_start: 1, row_end: 2, col_end: 2) +Token 10: 'e' at Span(row_start: 2, col_start: 2, row_end: 2, col_end: 3) +Token 11: '1' at Span(row_start: 2, col_start: 3, row_end: 2, col_end: 4) + diff --git a/birdie_snapshots/should_fail_to_parse_'lt'_as_node_let.accepted b/birdie_snapshots/should_fail_to_parse_'lt'_as_node_let.accepted new file mode 100644 index 0000000000000000000000000000000000000000..4c22279da9b622ca08eade7ab16f232f24d3e087 --- /dev/null +++ b/birdie_snapshots/should_fail_to_parse_'lt'_as_node_let.accepted @@ -0,0 +1,10 @@ +--- +version: 1.4.1 +title: Should fail to parse 'lt' as node.Let +file: ./test/ghall_test.gleam +test_name: parse_let_failing_test +--- +Msg: expected 'e' in keyword 'let' +Got: t +At column: 2 + diff --git a/gleam.toml b/gleam.toml index acf68a3c1bc736f4a4912a610acbf54694c00285..166db64af90c63af33eace1f63e52b41ed53c8d7 100644 --- a/gleam.toml +++ b/gleam.toml @@ -18,3 +18,4 @@ nibble = ">= 1.1.4 and < 2.0.0" [dev-dependencies] gleeunit = ">= 1.0.0 and < 2.0.0" +birdie = ">= 1.4.1 and < 2.0.0" diff --git a/manifest.toml b/manifest.toml index c74671d87fac97f237056f26869e1e5c169a44e3..607634063c78e74750592996bcc083f7b4acb56b 100644 --- a/manifest.toml +++ b/manifest.toml @@ -2,15 +2,31 @@ # You typically do not need to edit this file packages = [ + { name = "argv", version = "1.0.2", build_tools = ["gleam"], requirements = [], otp_app = "argv", source = "hex", outer_checksum = "BA1FF0929525DEBA1CE67256E5ADF77A7CDDFE729E3E3F57A5BDCAA031DED09D" }, + { name = "birdie", version = "1.4.1", build_tools = ["gleam"], requirements = ["argv", "edit_distance", "filepath", "glance", "gleam_community_ansi", "gleam_stdlib", "justin", "rank", "simplifile", "term_size", "trie_again"], otp_app = "birdie", source = "hex", outer_checksum = "18599E478C14BD9EBD2465F0561F96EB9B58A24DB44AF86F103EF81D4B9834BF" }, + { name = "edit_distance", version = "3.0.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "edit_distance", source = "hex", outer_checksum = "7DC465C34695F9E57D79FC65670C53C992CE342BF29E0AA41FF44F61AF62FC56" }, + { name = "filepath", version = "1.1.2", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "filepath", source = "hex", outer_checksum = "B06A9AF0BF10E51401D64B98E4B627F1D2E48C154967DA7AF4D0914780A6D40A" }, + { name = "glance", version = "5.0.1", build_tools = ["gleam"], requirements = ["gleam_stdlib", "glexer"], otp_app = "glance", source = "hex", outer_checksum = "7F216D97935465FF4AC46699CD1C3E0FB19CB678B002E4ACAFCE256E96312F14" }, + { name = "gleam_community_ansi", version = "1.4.3", build_tools = ["gleam"], requirements = ["gleam_community_colour", "gleam_regexp", "gleam_stdlib"], otp_app = "gleam_community_ansi", source = "hex", outer_checksum = "8A62AE9CC6EA65BEA630D95016D6C07E4F9973565FA3D0DE68DC4200D8E0DD27" }, + { name = "gleam_community_colour", version = "2.0.2", build_tools = ["gleam"], requirements = ["gleam_json", "gleam_stdlib"], otp_app = "gleam_community_colour", source = "hex", outer_checksum = "E34DD2C896AC3792151EDA939DA435FF3B69922F33415ED3C4406C932FBE9634" }, + { name = "gleam_json", version = "3.0.2", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_json", source = "hex", outer_checksum = "874FA3C3BB6E22DD2BB111966BD40B3759E9094E05257899A7C08F5DE77EC049" }, { name = "gleam_regexp", version = "1.1.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_regexp", source = "hex", outer_checksum = "9C215C6CA84A5B35BB934A9B61A9A306EC743153BE2B0425A0D032E477B062A9" }, { name = "gleam_stdlib", version = "0.65.0", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "7C69C71D8C493AE11A5184828A77110EB05A7786EBF8B25B36A72F879C3EE107" }, { name = "gleam_yielder", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleam_yielder", source = "hex", outer_checksum = "8E4E4ECFA7982859F430C57F549200C7749823C106759F4A19A78AEA6687717A" }, { name = "gleeunit", version = "1.6.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "FDC68A8C492B1E9B429249062CD9BAC9B5538C6FBF584817205D0998C42E1DAC" }, + { name = "glexer", version = "2.3.0", build_tools = ["gleam"], requirements = ["gleam_stdlib", "splitter"], otp_app = "glexer", source = "hex", outer_checksum = "40A1FB0919FA080AD6C5809B4C7DBA545841CAAC8168FACDFA0B0667C22475CC" }, { name = "iv", version = "1.3.2", build_tools = ["gleam"], requirements = ["gleam_stdlib", "gleam_yielder"], otp_app = "iv", source = "hex", outer_checksum = "1FE22E047705BE69EA366E3A2E73C2E1310CBCB27DDE767DE17AE3FA86499947" }, + { name = "justin", version = "1.0.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "justin", source = "hex", outer_checksum = "7FA0C6DB78640C6DC5FBFD59BF3456009F3F8B485BF6825E97E1EB44E9A1E2CD" }, { name = "nibble", version = "1.1.4", build_tools = ["gleam"], requirements = ["gleam_regexp", "gleam_stdlib", "iv"], otp_app = "nibble", source = "hex", outer_checksum = "06397501730FF486AE6F99299982A33F5EA9F8945B5A25920C82C8F924CEA481" }, + { name = "rank", version = "1.0.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "rank", source = "hex", outer_checksum = "5660E361F0E49CBB714CC57CC4C89C63415D8986F05B2DA0C719D5642FAD91C9" }, + { name = "simplifile", version = "2.3.0", build_tools = ["gleam"], requirements = ["filepath", "gleam_stdlib"], otp_app = "simplifile", source = "hex", outer_checksum = "0A868DAC6063D9E983477981839810DC2E553285AB4588B87E3E9C96A7FB4CB4" }, + { name = "splitter", version = "1.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "splitter", source = "hex", outer_checksum = "05564A381580395DCDEFF4F88A64B021E8DAFA6540AE99B4623962F52976AA9D" }, + { name = "term_size", version = "1.0.1", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "term_size", source = "hex", outer_checksum = "D00BD2BC8FB3EBB7E6AE076F3F1FF2AC9D5ED1805F004D0896C784D06C6645F1" }, + { name = "trie_again", version = "1.1.4", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "trie_again", source = "hex", outer_checksum = "E3BD66B4E126EF567EA8C4944EAB216413392ADF6C16C36047AF79EE5EF13466" }, ] [requirements] +birdie = { version = ">= 1.4.1 and < 2.0.0" } gleam_stdlib = { version = ">= 0.44.0 and < 2.0.0" } gleeunit = { version = ">= 1.0.0 and < 2.0.0" } nibble = { version = ">= 1.1.4 and < 2.0.0" } diff --git a/src/ghall.gleam b/src/ghall.gleam index 5ef6b892ff67850639687ec70a527f46fee9fcac..8b31d14c928611f8c33a8fd8f488cd0ea17fd5bb 100644 --- a/src/ghall.gleam +++ b/src/ghall.gleam @@ -1,10 +1,11 @@ +import gleam/io import gleam/list +import gleam/string +import nibble import nibble/lexer.{type Token, Token} +import parser import quasi_lexer pub fn main() -> Nil { - let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: "let x1 = e1") - echo tokens - Nil } diff --git a/src/node.gleam b/src/node.gleam index a06179dd5d18109e9fca5b6d4759f0d55464d425..c7736359f7050e333530ebc08e5d036d30455c3e 100644 --- a/src/node.gleam +++ b/src/node.gleam @@ -1 +1,3 @@ -pub type Node +pub type Node { + Let +} diff --git a/src/parser.gleam b/src/parser.gleam new file mode 100644 index 0000000000000000000000000000000000000000..224c443563e5db8cf27c396eb827da8d21259e63 --- /dev/null +++ b/src/parser.gleam @@ -0,0 +1,37 @@ +import gleam/option.{None, Some} +import gleam/string +import nibble.{type Parser, do, return} +import node.{type Node} + +/// Parse a keyword string and return the specified Node on success +pub fn keyword(expected: String, node: Node) -> Parser(Node, String, ctx) { + use _ <- do(string.to_graphemes(expected) |> match_chars(expected)) + return(node) +} + +/// Recursively match each character in the list +fn match_chars(chars: List(String), context: String) -> Parser(Nil, String, ctx) { + case chars { + [] -> return(Nil) + + [first, ..rest] -> { + use _ <- do( + nibble.take_map( + "expected '" <> first <> "' in keyword '" <> context <> "'", + fn(tok) { + case tok == first { + True -> Some(Nil) + False -> None + } + }, + ), + ) + match_chars(rest, context) + } + } +} + +/// Parse the "let" keyword +pub fn let_keyword() -> Parser(Node, String, ctx) { + keyword("let", node.Let) +} diff --git a/src/quasi_lexer.gleam b/src/quasi_lexer.gleam index bd9b354e0ec62f231ce3ade0a1e973a0550487a0..73529e6d36458b5200d7607b226582d84ebe5ede 100644 --- a/src/quasi_lexer.gleam +++ b/src/quasi_lexer.gleam @@ -12,6 +12,8 @@ pub fn run( let assert Ok(tokens) = lexer.run(input, lexer) // Nibble's lexer prepends an empty string to the quasi_lexer's - // otherwise acceptable output - tokens |> list.drop(1) + // otherwise acceptable output. After dropping it, we need to decrement + // column values on the first row since the empty token advanced the column counter + tokens + |> list.drop(1) } diff --git a/test/ghall_test.gleam b/test/ghall_test.gleam index a6ac2a693dc9d87e63f387438ffd313b9d060552..45bdc079eda37b0d287cdbb78295eade4c56e7a0 100644 --- a/test/ghall_test.gleam +++ b/test/ghall_test.gleam @@ -1,6 +1,11 @@ +import birdie +import gleam/int import gleam/list import gleeunit +import nibble.{Expected} import nibble/lexer.{Span, Token} +import node +import parser import quasi_lexer pub fn main() -> Nil { @@ -20,3 +25,59 @@ pub fn simple_quasi_lexer_test() { assert col_end == col_start + 1 }) } + +pub fn quasi_lexer_off_by_one_test() { + let input = "let x1 =\n e1" + let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input) + + let snap = + tokens + |> list.index_map(fn(token, index) { + let Token(Span(rs, cs, re, ce), lexeme, _) = token + "Token " + <> int.to_string(index) + <> ": '" + <> lexeme + <> "' at Span(row_start: " + <> int.to_string(rs) + <> ", col_start: " + <> int.to_string(cs) + <> ", row_end: " + <> int.to_string(re) + <> ", col_end: " + <> int.to_string(ce) + <> ")\n" + }) + |> list.fold("", fn(acc, line) { acc <> line }) + + birdie.snap(snap, title: "Quasi lexer spans with multiline input") +} + +pub fn parse_let_successfully_test() { + let input = "let" + let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input) + let parser = parser.keyword("let", node.Let) + let assert Ok(_) = nibble.run(tokens, parser) +} + +pub fn parse_let_failing_test() { + let input = "lt" + let tokens = quasi_lexer.chars() |> quasi_lexer.run(on: input) + let parser = parser.keyword("let", node.Let) + let assert Error(error) = nibble.run(tokens, parser) + let assert [nibble.DeadEnd(Span(_, cs, _, _), Expected(msg, got: got), [])] = + error + + let snap = + "Msg: " + <> msg + <> "\n" + <> "Got: " + <> got + <> "\n" + <> "At column: " + <> int.to_string(cs) + <> "\n" + + birdie.snap(snap, title: "Should fail to parse 'lt' as node.Let") +}