import gleam/option.{None, Some} import gleam/string import nibble.{type Parser, do, return} import node.{ type Node, EndOfLine, LineCommentPrefix, NotEndOfLine, Printable, Tab, ValidNonAscii, } pub fn exact_string( expected: String, node: Node, ) -> Parser(Node, String, String) { nibble.in( { use _ <- do(string.to_graphemes(expected) |> match_chars(expected)) return(node) }, "exact_string('" <> expected <> "')", ) } pub fn consume_exact_string(expected: String) -> Parser(Nil, String, String) { nibble.in( { use _ <- nibble.do(exact_string(expected, node.Let)) // NOTE: doesn't matter which constructor return(Nil) }, "consume_exact_string('" <> expected <> "')", ) } fn match_chars( chars: List(String), context: String, ) -> Parser(Nil, String, String) { nibble.in( { case chars { [] -> return(Nil) [first, ..rest] -> { use _ <- do( nibble.take_map( "expected '" <> first <> "' in keyword '" <> context <> "'", fn(tok) { case tok == first { True -> Some(Nil) False -> None } }, ), ) match_chars(rest, context) } } }, "match_chars(" <> context <> ")", ) } pub fn let_keyword() -> Parser(Node, String, String) { nibble.in(exact_string("let", node.Let), "let_keyword") } pub fn end_of_line() -> Parser(Node, String, String) { nibble.in( { use _ <- nibble.do( nibble.one_of([ nibble.in(nibble.token("\n"), "unix_line_ending"), consume_exact_string("\r\n"), ]), ) return(EndOfLine) }, "end_of_line", ) } pub fn tab() -> Parser(Node, String, ctx) { use _ <- nibble.do(nibble.token("\t")) return(Tab) } pub fn line_comment_prefix() -> Parser(Node, String, String) { use _ <- nibble.do(consume_exact_string("--")) return(LineCommentPrefix) } /// Helper function to parse a grapheme if its codepoint satisfies a predicate fn codepoint_satisfies( predicate: fn(Int) -> Bool, error_msg: String, ) -> Parser(String, String, ctx) { nibble.take_map(error_msg, fn(grapheme) { case string.to_utf_codepoints(grapheme) { [codepoint] -> { let cp_value = string.utf_codepoint_to_int(codepoint) case predicate(cp_value) { True -> Some(grapheme) False -> None } } _ -> None } }) } /// Helper function to check if a codepoint is in the valid-non-ascii ranges /// as defined by the Dhall ABNF specification fn is_valid_non_ascii_codepoint(codepoint: Int) -> Bool { case codepoint { _ if codepoint >= 0x80 && codepoint <= 0xD7FF -> True _ if codepoint >= 0xE000 && codepoint <= 0xFFFD -> True _ if codepoint >= 0x10000 && codepoint <= 0x1FFFD -> True _ if codepoint >= 0x20000 && codepoint <= 0x2FFFD -> True _ if codepoint >= 0x30000 && codepoint <= 0x3FFFD -> True _ if codepoint >= 0x40000 && codepoint <= 0x4FFFD -> True _ if codepoint >= 0x50000 && codepoint <= 0x5FFFD -> True _ if codepoint >= 0x60000 && codepoint <= 0x6FFFD -> True _ if codepoint >= 0x70000 && codepoint <= 0x7FFFD -> True _ if codepoint >= 0x80000 && codepoint <= 0x8FFFD -> True _ if codepoint >= 0x90000 && codepoint <= 0x9FFFD -> True _ if codepoint >= 0xA0000 && codepoint <= 0xAFFFD -> True _ if codepoint >= 0xB0000 && codepoint <= 0xBFFFD -> True _ if codepoint >= 0xC0000 && codepoint <= 0xCFFFD -> True _ if codepoint >= 0xD0000 && codepoint <= 0xDFFFD -> True _ if codepoint >= 0xE0000 && codepoint <= 0xEFFFD -> True _ if codepoint >= 0xF0000 && codepoint <= 0xFFFFD -> True _ if codepoint >= 0x100000 && codepoint <= 0x10FFFD -> True _ -> False } } /// Parser for valid-non-ascii as defined by Dhall ABNF pub fn valid_non_ascii() -> Parser(Node, String, ctx) { use char <- nibble.do( codepoint_satisfies(is_valid_non_ascii_codepoint, "valid-non-ascii character"), ) return(ValidNonAscii(char)) } /// Parser for ASCII printable characters (%x20-7F) pub fn printable() -> Parser(Node, String, ctx) { use char <- nibble.do( codepoint_satisfies( fn(cp) { cp >= 0x20 && cp <= 0x7F }, "ASCII printable character (0x20-0x7F)", ), ) return(Printable(char)) } /// Parser for not-end-of-line as defined by Dhall ABNF: /// not-end-of-line = %x20-7F / valid-non-ascii / tab pub fn not_end_of_line() -> Parser(Node, String, ctx) { use inner <- nibble.do(nibble.one_of([printable(), valid_non_ascii(), tab()])) return(NotEndOfLine(inner)) }