parser.gleam

  1import gleam/option.{None, Some}
  2import gleam/string
  3import nibble.{type Parser, do, return}
  4import node.{
  5  type Node, EndOfLine, LineCommentPrefix, NotEndOfLine, Printable, Tab,
  6  ValidNonAscii,
  7}
  8
  9pub fn exact_string(expected: String, node: Node) -> Parser(Node, String, ctx) {
 10  use _ <- do(string.to_graphemes(expected) |> match_chars(expected))
 11  return(node)
 12}
 13
 14pub fn consume_exact_string(expected: String) -> Parser(Nil, String, ctx) {
 15  use _ <- nibble.do(exact_string(expected, node.Let))
 16  // NOTE: doesn't matter which constructor 
 17
 18  return(Nil)
 19}
 20
 21fn match_chars(chars: List(String), context: String) -> Parser(Nil, String, ctx) {
 22  case chars {
 23    [] -> return(Nil)
 24
 25    [first, ..rest] -> {
 26      use _ <- do(
 27        nibble.take_map(
 28          "expected '" <> first <> "' in keyword '" <> context <> "'",
 29          fn(tok) {
 30            case tok == first {
 31              True -> Some(Nil)
 32              False -> None
 33            }
 34          },
 35        ),
 36      )
 37      match_chars(rest, context)
 38    }
 39  }
 40}
 41
 42pub fn let_keyword() -> Parser(Node, String, ctx) {
 43  exact_string("let", node.Let)
 44}
 45
 46pub fn end_of_line() -> Parser(Node, String, ctx) {
 47  use _ <- nibble.do(
 48    nibble.one_of([nibble.token("\n"), consume_exact_string("\r\n")]),
 49  )
 50
 51  return(EndOfLine)
 52}
 53
 54pub fn tab() -> Parser(Node, String, ctx) {
 55  use _ <- nibble.do(nibble.token("\t"))
 56
 57  return(Tab)
 58}
 59
 60pub fn line_comment_prefix() -> Parser(Node, String, ctx) {
 61  use _ <- nibble.do(consume_exact_string("--"))
 62
 63  return(LineCommentPrefix)
 64}
 65
 66/// Helper function to parse a grapheme if its codepoint satisfies a predicate
 67fn codepoint_satisfies(
 68  predicate: fn(Int) -> Bool,
 69  error_msg: String,
 70) -> Parser(String, String, ctx) {
 71  nibble.take_map(error_msg, fn(grapheme) {
 72    case string.to_utf_codepoints(grapheme) {
 73      [codepoint] -> {
 74        let cp_value = string.utf_codepoint_to_int(codepoint)
 75        case predicate(cp_value) {
 76          True -> Some(grapheme)
 77          False -> None
 78        }
 79      }
 80      _ -> None
 81    }
 82  })
 83}
 84
 85/// Helper function to check if a codepoint is in the valid-non-ascii ranges
 86/// as defined by the Dhall ABNF specification
 87fn is_valid_non_ascii_codepoint(codepoint: Int) -> Bool {
 88  case codepoint {
 89    _ if codepoint >= 0x80 && codepoint <= 0xD7FF -> True
 90    _ if codepoint >= 0xE000 && codepoint <= 0xFFFD -> True
 91    _ if codepoint >= 0x10000 && codepoint <= 0x1FFFD -> True
 92    _ if codepoint >= 0x20000 && codepoint <= 0x2FFFD -> True
 93    _ if codepoint >= 0x30000 && codepoint <= 0x3FFFD -> True
 94    _ if codepoint >= 0x40000 && codepoint <= 0x4FFFD -> True
 95    _ if codepoint >= 0x50000 && codepoint <= 0x5FFFD -> True
 96    _ if codepoint >= 0x60000 && codepoint <= 0x6FFFD -> True
 97    _ if codepoint >= 0x70000 && codepoint <= 0x7FFFD -> True
 98    _ if codepoint >= 0x80000 && codepoint <= 0x8FFFD -> True
 99    _ if codepoint >= 0x90000 && codepoint <= 0x9FFFD -> True
100    _ if codepoint >= 0xA0000 && codepoint <= 0xAFFFD -> True
101    _ if codepoint >= 0xB0000 && codepoint <= 0xBFFFD -> True
102    _ if codepoint >= 0xC0000 && codepoint <= 0xCFFFD -> True
103    _ if codepoint >= 0xD0000 && codepoint <= 0xDFFFD -> True
104    _ if codepoint >= 0xE0000 && codepoint <= 0xEFFFD -> True
105    _ if codepoint >= 0xF0000 && codepoint <= 0xFFFFD -> True
106    _ if codepoint >= 0x100000 && codepoint <= 0x10FFFD -> True
107    _ -> False
108  }
109}
110
111/// Parser for valid-non-ascii as defined by Dhall ABNF
112pub fn valid_non_ascii() -> Parser(Node, String, ctx) {
113  use char <- nibble.do(
114    codepoint_satisfies(is_valid_non_ascii_codepoint, "valid-non-ascii character"),
115  )
116
117  return(ValidNonAscii(char))
118}
119
120/// Parser for ASCII printable characters (%x20-7F)
121pub fn printable() -> Parser(Node, String, ctx) {
122  use char <- nibble.do(
123    codepoint_satisfies(
124      fn(cp) { cp >= 0x20 && cp <= 0x7F },
125      "ASCII printable character (0x20-0x7F)",
126    ),
127  )
128
129  return(Printable(char))
130}
131
132/// Parser for not-end-of-line as defined by Dhall ABNF:
133/// not-end-of-line = %x20-7F / valid-non-ascii / tab
134pub fn not_end_of_line() -> Parser(Node, String, ctx) {
135  use inner <- nibble.do(nibble.one_of([printable(), valid_non_ascii(), tab()]))
136
137  return(NotEndOfLine(inner))
138}