parser.gleam

  1import gleam/option.{None, Some}
  2import gleam/string
  3import nibble.{type Parser, do, return}
  4import node.{
  5  type Node, EndOfLine, LineCommentPrefix, NotEndOfLine, Printable, Tab,
  6  ValidNonAscii,
  7}
  8
  9pub fn exact_string(
 10  expected: String,
 11  node: Node,
 12) -> Parser(Node, String, String) {
 13  nibble.in(
 14    {
 15      use _ <- do(string.to_graphemes(expected) |> match_chars(expected))
 16      return(node)
 17    },
 18    "exact_string('" <> expected <> "')",
 19  )
 20}
 21
 22pub fn consume_exact_string(expected: String) -> Parser(Nil, String, String) {
 23  nibble.in(
 24    {
 25      use _ <- nibble.do(exact_string(expected, node.Let))
 26      // NOTE: doesn't matter which constructor
 27
 28      return(Nil)
 29    },
 30    "consume_exact_string('" <> expected <> "')",
 31  )
 32}
 33
 34fn match_chars(
 35  chars: List(String),
 36  context: String,
 37) -> Parser(Nil, String, String) {
 38  nibble.in(
 39    {
 40      case chars {
 41        [] -> return(Nil)
 42
 43        [first, ..rest] -> {
 44          use _ <- do(
 45            nibble.take_map(
 46              "expected '" <> first <> "' in keyword '" <> context <> "'",
 47              fn(tok) {
 48                case tok == first {
 49                  True -> Some(Nil)
 50                  False -> None
 51                }
 52              },
 53            ),
 54          )
 55          match_chars(rest, context)
 56        }
 57      }
 58    },
 59    "match_chars(" <> context <> ")",
 60  )
 61}
 62
 63pub fn let_keyword() -> Parser(Node, String, String) {
 64  nibble.in(exact_string("let", node.Let), "let_keyword")
 65}
 66
 67pub fn end_of_line() -> Parser(Node, String, String) {
 68  nibble.in(
 69    {
 70      use _ <- nibble.do(
 71        nibble.one_of([
 72          nibble.in(nibble.token("\n"), "unix_line_ending"),
 73          consume_exact_string("\r\n"),
 74        ]),
 75      )
 76
 77      return(EndOfLine)
 78    },
 79    "end_of_line",
 80  )
 81}
 82
 83pub fn tab() -> Parser(Node, String, ctx) {
 84  use _ <- nibble.do(nibble.token("\t"))
 85
 86  return(Tab)
 87}
 88
 89pub fn line_comment_prefix() -> Parser(Node, String, String) {
 90  use _ <- nibble.do(consume_exact_string("--"))
 91
 92  return(LineCommentPrefix)
 93}
 94
 95/// Helper function to parse a grapheme if its codepoint satisfies a predicate
 96fn codepoint_satisfies(
 97  predicate: fn(Int) -> Bool,
 98  error_msg: String,
 99) -> Parser(String, String, ctx) {
100  nibble.take_map(error_msg, fn(grapheme) {
101    case string.to_utf_codepoints(grapheme) {
102      [codepoint] -> {
103        let cp_value = string.utf_codepoint_to_int(codepoint)
104        case predicate(cp_value) {
105          True -> Some(grapheme)
106          False -> None
107        }
108      }
109      _ -> None
110    }
111  })
112}
113
114/// Helper function to check if a codepoint is in the valid-non-ascii ranges
115/// as defined by the Dhall ABNF specification
116fn is_valid_non_ascii_codepoint(codepoint: Int) -> Bool {
117  case codepoint {
118    _ if codepoint >= 0x80 && codepoint <= 0xD7FF -> True
119    _ if codepoint >= 0xE000 && codepoint <= 0xFFFD -> True
120    _ if codepoint >= 0x10000 && codepoint <= 0x1FFFD -> True
121    _ if codepoint >= 0x20000 && codepoint <= 0x2FFFD -> True
122    _ if codepoint >= 0x30000 && codepoint <= 0x3FFFD -> True
123    _ if codepoint >= 0x40000 && codepoint <= 0x4FFFD -> True
124    _ if codepoint >= 0x50000 && codepoint <= 0x5FFFD -> True
125    _ if codepoint >= 0x60000 && codepoint <= 0x6FFFD -> True
126    _ if codepoint >= 0x70000 && codepoint <= 0x7FFFD -> True
127    _ if codepoint >= 0x80000 && codepoint <= 0x8FFFD -> True
128    _ if codepoint >= 0x90000 && codepoint <= 0x9FFFD -> True
129    _ if codepoint >= 0xA0000 && codepoint <= 0xAFFFD -> True
130    _ if codepoint >= 0xB0000 && codepoint <= 0xBFFFD -> True
131    _ if codepoint >= 0xC0000 && codepoint <= 0xCFFFD -> True
132    _ if codepoint >= 0xD0000 && codepoint <= 0xDFFFD -> True
133    _ if codepoint >= 0xE0000 && codepoint <= 0xEFFFD -> True
134    _ if codepoint >= 0xF0000 && codepoint <= 0xFFFFD -> True
135    _ if codepoint >= 0x100000 && codepoint <= 0x10FFFD -> True
136    _ -> False
137  }
138}
139
140/// Parser for valid-non-ascii as defined by Dhall ABNF
141pub fn valid_non_ascii() -> Parser(Node, String, ctx) {
142  use char <- nibble.do(
143    codepoint_satisfies(is_valid_non_ascii_codepoint, "valid-non-ascii character"),
144  )
145
146  return(ValidNonAscii(char))
147}
148
149/// Parser for ASCII printable characters (%x20-7F)
150pub fn printable() -> Parser(Node, String, ctx) {
151  use char <- nibble.do(
152    codepoint_satisfies(
153      fn(cp) { cp >= 0x20 && cp <= 0x7F },
154      "ASCII printable character (0x20-0x7F)",
155    ),
156  )
157
158  return(Printable(char))
159}
160
161/// Parser for not-end-of-line as defined by Dhall ABNF:
162/// not-end-of-line = %x20-7F / valid-non-ascii / tab
163pub fn not_end_of_line() -> Parser(Node, String, ctx) {
164  use inner <- nibble.do(nibble.one_of([printable(), valid_non_ascii(), tab()]))
165
166  return(NotEndOfLine(inner))
167}