python: Refine highlighting (#21389)

Jaagup Averin and Piotr Osiewicz created

Fixes:
* Types in binary unions as per [PEP
604](https://peps.python.org/pep-0604/) not highlighted;
   * `except*` keyword not highlighted;
* Classes beginning with `_` not recognized as such, however `_` is a
valid first character for private classes; additionally the regex for
parsing constant/class names appeared inconsistent and incomplete so was
adjusted;
   * Builtin types such as `float`, `dict`, etc not recognized as types;
   * **Update:** decorators with arguments not recognized as decorators;
* **Update:** docstrings after type alias assignments not recognized as
docstrings;
* **Update:** `and/in/is/not/or/is not/not in` not capturable as
keywords;
* **Update:** decorators with "nesting" (@x.y.z) not recognized as
decorators;

Before:

![new_before](https://github.com/user-attachments/assets/6f05262e-be3b-41bf-aee6-26438c2bf254)

After:

![new_after](https://github.com/user-attachments/assets/408c481c-5eb9-40c9-8e18-52ebf5a121d3)

Release Notes:

- N/A

---------

Co-authored-by: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com>

Change summary

crates/languages/src/python/highlights.scm | 56 +++++++++++++++--------
1 file changed, 36 insertions(+), 20 deletions(-)

Detailed changes

crates/languages/src/python/highlights.scm 🔗

@@ -1,18 +1,23 @@
+; Identifier naming conventions; these "soft conventions" should stay at the top of the file as they're often overridden
+
+; CamelCase for classes
+((identifier) @type.class
+  (#match? @type.class "^_*[A-Z][A-Za-z0-9_]*$"))
+
+; ALL_CAPS for constants:
+((identifier) @constant
+  (#match? @constant "^_*[A-Z][A-Z0-9_]*$"))
+
 (attribute attribute: (identifier) @property)
 (type (identifier) @type)
 (generic_type (identifier) @type)
+(comment) @comment
+(string) @string
+(escape_sequence) @string.escape
 
 ; Type alias
 (type_alias_statement "type" @keyword)
 
-; Identifier naming conventions
-
-((identifier) @type.class
- (#match? @type.class "^[A-Z]"))
-
-((identifier) @constant
- (#match? @constant "^_*[A-Z][A-Z\\d_]*$"))
-
 ; TypeVar with constraints in type parameters
 (type
   (tuple (identifier) @type)
@@ -26,15 +31,20 @@
 
 ; Function calls
 
-(decorator
-  "@" @punctuation.special
-  (identifier) @function.decorator)
-
 (call
   function: (attribute attribute: (identifier) @function.method.call))
 (call
   function: (identifier) @function.call)
 
+(decorator
+  "@" @punctuation.special
+  [
+    (identifier) @function.decorator
+    (attribute attribute: (identifier) @function.decorator)
+    (call function: (identifier) @function.decorator.call)
+    (call (attribute attribute: (identifier) @function.decorator.call))
+  ])
+
 ; Function and class definitions
 
 (function_definition
@@ -47,9 +57,9 @@
 
 (call
   function: (identifier) @type.class.call
-  (#match? @type.class.call "^[A-Z][A-Z0-9_]*[a-z]"))
+  (#match? @type.class.call "^_*[A-Z][A-Za-z0-9_]*$"))
 
-; Builtin functions
+; Builtins
 
 ((call
   function: (identifier) @function.builtin)
@@ -57,6 +67,9 @@
    @function.builtin
    "^(abs|all|any|ascii|bin|bool|breakpoint|bytearray|bytes|callable|chr|classmethod|compile|complex|delattr|dict|dir|divmod|enumerate|eval|exec|filter|float|format|frozenset|getattr|globals|hasattr|hash|help|hex|id|input|int|isinstance|issubclass|iter|len|list|locals|map|max|memoryview|min|next|object|oct|open|ord|pow|print|property|range|repr|reversed|round|set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|vars|zip|__import__)$"))
 
+((identifier) @type.builtin
+    (#any-of? @type.builtin "int" "float" "complex" "bool" "list" "tuple" "range" "str" "bytes" "bytearray" "memoryview" "set" "frozenset" "dict"))
+    
 ; Literals
 
 [
@@ -79,10 +92,6 @@
   (#match? @variable.special "^self|cls$")
 ]
 
-(comment) @comment
-(string) @string
-(escape_sequence) @string.escape
-
 [
   "("
   ")"
@@ -114,7 +123,10 @@
   . (expression_statement (string) @string.doc))
 
 (module
-  (expression_statement (assignment))
+  [
+    (expression_statement (assignment))
+    (type_alias_statement)
+  ]
   . (expression_statement (string) @string.doc))
 
 (class_definition
@@ -163,6 +175,9 @@
   ">>"
   "|"
   "~"
+] @operator
+
+[
   "and"
   "in"
   "is"
@@ -170,7 +185,7 @@
   "or"
   "is not"
   "not in"
-] @operator
+] @keyword.operator
 
 [
   "as"
@@ -185,6 +200,7 @@
   "elif"
   "else"
   "except"
+  "except*"
   "exec"
   "finally"
   "for"