mirror of
https://github.com/zed-industries/zed.git
synced 2025-01-09 20:19:27 +00:00
d298df823f
Fixes a minor error in the analyze highlight script. Release Notes: - N/A
68 lines
2.4 KiB
Python
68 lines
2.4 KiB
Python
"""
|
|
This script analyzes all the highlight.scm files in our embedded languages and extensions.
|
|
It counts the number of unique instances of @{name} and the languages in which they are used.
|
|
|
|
This is useful to help avoid accidentally introducing new tags when appropriate ones already exist when adding new languages.
|
|
|
|
Flags:
|
|
-v, --verbose: Include a detailed list of languages for each tag found in the highlight.scm files.
|
|
"""
|
|
|
|
from collections import defaultdict
|
|
from pathlib import Path
|
|
from typing import Any
|
|
import argparse
|
|
import re
|
|
|
|
pattern = re.compile(r'@(?!_)[a-zA-Z_.]+')
|
|
|
|
def parse_arguments():
|
|
parser = argparse.ArgumentParser(description='Analyze highlight.scm files for unique instances and their languages.')
|
|
parser.add_argument('-v', '--verbose', action='store_true', help='Include a list of languages for each tag.')
|
|
return parser.parse_args()
|
|
|
|
def find_highlight_files(root_dir):
|
|
for path in Path(root_dir).rglob('highlights.scm'):
|
|
yield path
|
|
|
|
def count_instances(files):
|
|
instances: defaultdict[list[Any], dict[str, Any]] = defaultdict(lambda: {'count': 0, 'languages': set()})
|
|
for file_path in files:
|
|
language = file_path.parent.name
|
|
with open(file_path, "r") as file:
|
|
text = file.read()
|
|
matches = pattern.findall(text)
|
|
for match in matches:
|
|
instances[match]['count'] += 1
|
|
instances[match]['languages'].add(language)
|
|
return instances
|
|
|
|
def print_instances(instances, verbose=False):
|
|
for item, details in sorted(instances.items(), key=lambda x: x[0]):
|
|
languages = ', '.join(sorted(details['languages']))
|
|
if verbose:
|
|
print(f"{item} ({details['count']}) - [{languages}]")
|
|
else:
|
|
print(f"{item} ({details['count']})")
|
|
|
|
def main():
|
|
args = parse_arguments()
|
|
|
|
base_dir = Path(__file__).parent.parent
|
|
core_path = base_dir / 'crates/languages/src'
|
|
extension_path = base_dir / 'extensions/'
|
|
|
|
core_instances = count_instances(find_highlight_files(core_path))
|
|
extension_instances = count_instances(find_highlight_files(extension_path))
|
|
|
|
unique_extension_instances = {k: v for k, v in extension_instances.items() if k not in core_instances}
|
|
|
|
print('Shared:\n')
|
|
print_instances(core_instances, args.verbose)
|
|
|
|
if unique_extension_instances:
|
|
print('\nExtension-only:\n')
|
|
print_instances(unique_extension_instances, args.verbose)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|