@@ -0,0 +1,68 @@
+"""
+This script analyzes all the highlight.scm files in our embedded languages and extensions.
+It counts the number of unique instances of @{name} and the languages in which they are used.
+
+This is useful to help avoid accidentally introducing new tags when appropriate ones already exist when adding new languages.
+
+Flags:
+-v, --verbose: Include a detailed list of languages for each tag found in the highlight.scm files.
+"""
+
+from collections import defaultdict
+from pathlib import Path
+from typing import Any
+import argparse
+import re
+
+pattern = re.compile(r'@(?!_)[a-zA-Z_.]+')
+
+def parse_arguments():
+ parser = argparse.ArgumentParser(description='Analyze highlight.scm files for unique instances and their languages.')
+ parser.add_argument('-v', '--verbose', action='store_true', help='Include a list of languages for each tag.')
+ return parser.parse_args()
+
+def find_highlight_files(root_dir):
+ for path in Path(root_dir).rglob('highlights.scm'):
+ yield path
+
+def count_instances(files):
+ instances: defaultdict[list[Any], dict[str, Any]] = defaultdict(lambda: {'count': 0, 'languages': set()})
+ for file_path in files:
+ language = file_path.parent.name
+ with open(file_path, "r") as file:
+ text = file.read()
+ matches = pattern.findall(text)
+ for match in matches:
+ instances[match]['count'] += 1
+ instances[match]['languages'].add(language)
+ return instances
+
+def print_instances(instances, verbose=False):
+ for item, details in sorted(instances.items(), key=lambda x: x[0]):
+ languages = ', '.join(sorted(details['languages']))
+ if verbose:
+ print(f"{item} ({details['count']}) - [{languages}]")
+ else:
+ print(f"{item} ({details['count']})")
+
+def main():
+ args = parse_arguments()
+
+ base_dir = Path(__file__).parent.parent
+ core_path = base_dir / 'crates/languages/src'
+ extension_path = base_dir / 'extensions/astro/languages'
+
+ core_instances = count_instances(find_highlight_files(core_path))
+ extension_instances = count_instances(find_highlight_files(extension_path))
+
+ unique_extension_instances = {k: v for k, v in extension_instances.items() if k not in core_instances}
+
+ print('Shared:\n')
+ print_instances(core_instances, args.verbose)
+
+ if unique_extension_instances:
+ print('\nExtension-only:\n')
+ print_instances(unique_extension_instances, args.verbose)
+
+if __name__ == '__main__':
+ main()