1import gleam/option.{None, Some}
2import gleam/string
3import nibble.{type Parser, do, return}
4import node.{
5 type Node, EndOfLine, LineCommentPrefix, NotEndOfLine, Printable, Tab,
6 ValidNonAscii,
7}
8
9pub fn exact_string(
10 expected: String,
11 node: Node,
12) -> Parser(Node, String, String) {
13 nibble.in(
14 {
15 use _ <- do(string.to_graphemes(expected) |> match_chars(expected))
16 return(node)
17 },
18 "exact_string('" <> expected <> "')",
19 )
20}
21
22pub fn consume_exact_string(expected: String) -> Parser(Nil, String, String) {
23 nibble.in(
24 {
25 use _ <- nibble.do(exact_string(expected, node.Let))
26 // NOTE: doesn't matter which constructor
27
28 return(Nil)
29 },
30 "consume_exact_string('" <> expected <> "')",
31 )
32}
33
34fn match_chars(
35 chars: List(String),
36 context: String,
37) -> Parser(Nil, String, String) {
38 nibble.in(
39 {
40 case chars {
41 [] -> return(Nil)
42
43 [first, ..rest] -> {
44 use _ <- do(
45 nibble.take_map(
46 "expected '" <> first <> "' in keyword '" <> context <> "'",
47 fn(tok) {
48 case tok == first {
49 True -> Some(Nil)
50 False -> None
51 }
52 },
53 ),
54 )
55 match_chars(rest, context)
56 }
57 }
58 },
59 "match_chars(" <> context <> ")",
60 )
61}
62
63pub fn let_keyword() -> Parser(Node, String, String) {
64 nibble.in(exact_string("let", node.Let), "let_keyword")
65}
66
67pub fn end_of_line() -> Parser(Node, String, String) {
68 nibble.in(
69 {
70 use _ <- nibble.do(
71 nibble.one_of([
72 nibble.in(nibble.token("\n"), "unix_line_ending"),
73 consume_exact_string("\r\n"),
74 ]),
75 )
76
77 return(EndOfLine)
78 },
79 "end_of_line",
80 )
81}
82
83pub fn tab() -> Parser(Node, String, ctx) {
84 use _ <- nibble.do(nibble.token("\t"))
85
86 return(Tab)
87}
88
89pub fn line_comment_prefix() -> Parser(Node, String, String) {
90 use _ <- nibble.do(consume_exact_string("--"))
91
92 return(LineCommentPrefix)
93}
94
95/// Helper function to parse a grapheme if its codepoint satisfies a predicate
96fn codepoint_satisfies(
97 predicate: fn(Int) -> Bool,
98 error_msg: String,
99) -> Parser(String, String, ctx) {
100 nibble.take_map(error_msg, fn(grapheme) {
101 case string.to_utf_codepoints(grapheme) {
102 [codepoint] -> {
103 let cp_value = string.utf_codepoint_to_int(codepoint)
104 case predicate(cp_value) {
105 True -> Some(grapheme)
106 False -> None
107 }
108 }
109 _ -> None
110 }
111 })
112}
113
114/// Helper function to check if a codepoint is in the valid-non-ascii ranges
115/// as defined by the Dhall ABNF specification
116fn is_valid_non_ascii_codepoint(codepoint: Int) -> Bool {
117 case codepoint {
118 _ if codepoint >= 0x80 && codepoint <= 0xD7FF -> True
119 _ if codepoint >= 0xE000 && codepoint <= 0xFFFD -> True
120 _ if codepoint >= 0x10000 && codepoint <= 0x1FFFD -> True
121 _ if codepoint >= 0x20000 && codepoint <= 0x2FFFD -> True
122 _ if codepoint >= 0x30000 && codepoint <= 0x3FFFD -> True
123 _ if codepoint >= 0x40000 && codepoint <= 0x4FFFD -> True
124 _ if codepoint >= 0x50000 && codepoint <= 0x5FFFD -> True
125 _ if codepoint >= 0x60000 && codepoint <= 0x6FFFD -> True
126 _ if codepoint >= 0x70000 && codepoint <= 0x7FFFD -> True
127 _ if codepoint >= 0x80000 && codepoint <= 0x8FFFD -> True
128 _ if codepoint >= 0x90000 && codepoint <= 0x9FFFD -> True
129 _ if codepoint >= 0xA0000 && codepoint <= 0xAFFFD -> True
130 _ if codepoint >= 0xB0000 && codepoint <= 0xBFFFD -> True
131 _ if codepoint >= 0xC0000 && codepoint <= 0xCFFFD -> True
132 _ if codepoint >= 0xD0000 && codepoint <= 0xDFFFD -> True
133 _ if codepoint >= 0xE0000 && codepoint <= 0xEFFFD -> True
134 _ if codepoint >= 0xF0000 && codepoint <= 0xFFFFD -> True
135 _ if codepoint >= 0x100000 && codepoint <= 0x10FFFD -> True
136 _ -> False
137 }
138}
139
140/// Parser for valid-non-ascii as defined by Dhall ABNF
141pub fn valid_non_ascii() -> Parser(Node, String, ctx) {
142 use char <- nibble.do(
143 codepoint_satisfies(is_valid_non_ascii_codepoint, "valid-non-ascii character"),
144 )
145
146 return(ValidNonAscii(char))
147}
148
149/// Parser for ASCII printable characters (%x20-7F)
150pub fn printable() -> Parser(Node, String, ctx) {
151 use char <- nibble.do(
152 codepoint_satisfies(
153 fn(cp) { cp >= 0x20 && cp <= 0x7F },
154 "ASCII printable character (0x20-0x7F)",
155 ),
156 )
157
158 return(Printable(char))
159}
160
161/// Parser for not-end-of-line as defined by Dhall ABNF:
162/// not-end-of-line = %x20-7F / valid-non-ascii / tab
163pub fn not_end_of_line() -> Parser(Node, String, ctx) {
164 use inner <- nibble.do(nibble.one_of([printable(), valid_non_ascii(), tab()]))
165
166 return(NotEndOfLine(inner))
167}