zed/script/analyze_highlights.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

70 lines
2.5 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
Add analyze highlights script (#10855) Adds a script to print all unique highlight keys for building syntax themes. Usage: - `python script/analyze_highlights.py` OR - `python script/analyze_highlights.py -v` - Using the `-v` or `--verbose` arg will print each language that uses each key. Example output: ``` @attribute (6) @boolean (5) @charset (1) @comment (19) @comment.doc (3) @comment.unused (2) @constant (27) @constant.builtin (15) @constant.character (1) @constructor (4) @embedded (10) @emphasis (1) @emphasis.strong (1) @escape (4) @function (44) @function.builtin (2) @function.definition (2) @function.method (22) @function.method.builtin (3) @function.special (4) @function.special.definition (1) @import (1) @keyframes (1) @keyword (32) @label (2) @link_text (1) @link_uri (1) @media (1) @module (1) @namespace (1) @number (16) @operator (24) @property (11) @property.json_key (1) @punctuation (1) @punctuation.bracket (28) @punctuation.delimiter (12) @punctuation.list_marker (1) @punctuation.special (17) @string (23) @string.doc (1) @string.escape (5) @string.regex (7) @string.special (4) @string.special.symbol (2) @supports (1) @tag (14) @text.literal (2) @title (1) @type (28) @type.builtin (4) @type.super (3) @variable (5) @variable.member (3) @variable.parameter (4) @variable.special (12) Extension-only: @tag.delimiter (1) ``` Verbose example output: ``` Shared: @attribute (6) - [css, heex, javascript, tsx] @boolean (5) - [javascript, proto, tsx, typescript, yaml] @charset (1) - [css] @comment (19) - [bash, c, cpp, css, elixir, erb, go, gomod, gowork, heex, javascript, json, proto, python, ruby, rust, tsx, typescript, yaml] @comment.doc (3) - [elixir] @comment.unused (2) - [elixir] @constant (27) - [bash, c, cpp, elixir, heex, javascript, json, proto, python, ruby, rust, tsx, typescript] @constant.builtin (15) - [elixir, go, javascript, python, ruby, tsx, typescript, yaml] @constant.character (1) - [regex] @constructor (4) - [tsx, typescript] @embedded (10) - [bash, elixir, javascript, python, ruby, tsx, typescript] @emphasis (1) - [markdown] @emphasis.strong (1) - [markdown] @escape (4) - [go, python, regex, ruby] @function (44) - [bash, c, cpp, css, elixir, go, heex, javascript, python, rust, tsx, typescript] @function.builtin (2) - [python] @function.definition (2) - [rust] @function.method (22) - [go, javascript, python, ruby, rust, tsx, typescript] @function.method.builtin (3) - [ruby] @function.special (4) - [c, cpp, rust] @function.special.definition (1) - [rust] @import (1) - [css] @keyframes (1) - [css] @keyword (32) - [bash, c, cpp, css, elixir, erb, go, gomod, gowork, heex, javascript, jsdoc, proto, python, ruby, rust, tsx, typescript] @label (2) - [c, cpp] @link_text (1) - [markdown] @link_uri (1) - [markdown] @media (1) - [css] @module (1) - [heex] @namespace (1) - [css] @number (16) - [bash, c, cpp, css, elixir, go, javascript, json, proto, python, regex, ruby, rust, tsx, typescript, yaml] @operator (24) - [bash, c, cpp, css, elixir, go, gomod, gowork, heex, javascript, proto, python, regex, ruby, tsx, typescript] @property (11) - [bash, c, cpp, css, javascript, python, regex, rust, tsx, typescript, yaml] @property.json_key (1) - [json] @punctuation (1) - [elixir] @punctuation.bracket (28) - [c, cpp, elixir, go, heex, javascript, json, proto, regex, ruby, rust, tsx, typescript, yaml] @punctuation.delimiter (12) - [c, cpp, css, elixir, heex, javascript, proto, regex, ruby, tsx, typescript, yaml] @punctuation.list_marker (1) - [markdown] @punctuation.special (17) - [elixir, javascript, python, ruby, tsx, typescript, yaml] @string (23) - [bash, c, cpp, css, elixir, go, gomod, gowork, heex, javascript, json, proto, python, regex, ruby, rust, tsx, typescript, yaml] @string.doc (1) - [python] @string.escape (5) - [elixir, javascript, tsx, typescript, yaml] @string.regex (7) - [elixir, javascript, ruby, tsx, typescript] @string.special (4) - [css, elixir] @string.special.symbol (2) - [elixir, ruby] @supports (1) - [css] @tag (14) - [css, heex, javascript, tsx] @text.literal (2) - [markdown] @title (1) - [markdown] @type (28) - [c, cpp, css, elixir, go, javascript, jsdoc, proto, python, ruby, rust, tsx, typescript, yaml] @type.builtin (4) - [javascript, rust, tsx, typescript] @type.super (3) - [ruby] @variable (5) - [c, cpp, javascript, tsx, typescript] @variable.member (3) - [go, ruby] @variable.parameter (4) - [ruby] @variable.special (12) - [cpp, css, javascript, ruby, rust, tsx, typescript] Extension-only: @tag.delimiter (1) - [astro] ``` Release Notes: - N/A --------- Co-authored-by: Joseph T. Lyons <JosephTLyons@gmail.com>
2024-04-22 15:51:06 +00:00
"""
This script analyzes all the highlight.scm files in our embedded languages and extensions.
It counts the number of unique instances of @{name} and the languages in which they are used.
This is useful to help avoid accidentally introducing new tags when appropriate ones already exist when adding new languages.
Flags:
-v, --verbose: Include a detailed list of languages for each tag found in the highlight.scm files.
"""
from collections import defaultdict
from pathlib import Path
from typing import Any
import argparse
import re
pattern = re.compile(r'@(?!_)[a-zA-Z_.]+')
def parse_arguments():
parser = argparse.ArgumentParser(description='Analyze highlight.scm files for unique instances and their languages.')
parser.add_argument('-v', '--verbose', action='store_true', help='Include a list of languages for each tag.')
return parser.parse_args()
def find_highlight_files(root_dir):
for path in Path(root_dir).rglob('highlights.scm'):
yield path
def count_instances(files):
instances: defaultdict[list[Any], dict[str, Any]] = defaultdict(lambda: {'count': 0, 'languages': set()})
for file_path in files:
language = file_path.parent.name
with open(file_path, "r") as file:
text = file.read()
matches = pattern.findall(text)
for match in matches:
instances[match]['count'] += 1
instances[match]['languages'].add(language)
return instances
def print_instances(instances, verbose=False):
for item, details in sorted(instances.items(), key=lambda x: x[0]):
languages = ', '.join(sorted(details['languages']))
if verbose:
print(f"{item} ({details['count']}) - [{languages}]")
else:
print(f"{item} ({details['count']})")
def main():
args = parse_arguments()
base_dir = Path(__file__).parent.parent
core_path = base_dir / 'crates/languages/src'
extension_path = base_dir / 'extensions/'
Add analyze highlights script (#10855) Adds a script to print all unique highlight keys for building syntax themes. Usage: - `python script/analyze_highlights.py` OR - `python script/analyze_highlights.py -v` - Using the `-v` or `--verbose` arg will print each language that uses each key. Example output: ``` @attribute (6) @boolean (5) @charset (1) @comment (19) @comment.doc (3) @comment.unused (2) @constant (27) @constant.builtin (15) @constant.character (1) @constructor (4) @embedded (10) @emphasis (1) @emphasis.strong (1) @escape (4) @function (44) @function.builtin (2) @function.definition (2) @function.method (22) @function.method.builtin (3) @function.special (4) @function.special.definition (1) @import (1) @keyframes (1) @keyword (32) @label (2) @link_text (1) @link_uri (1) @media (1) @module (1) @namespace (1) @number (16) @operator (24) @property (11) @property.json_key (1) @punctuation (1) @punctuation.bracket (28) @punctuation.delimiter (12) @punctuation.list_marker (1) @punctuation.special (17) @string (23) @string.doc (1) @string.escape (5) @string.regex (7) @string.special (4) @string.special.symbol (2) @supports (1) @tag (14) @text.literal (2) @title (1) @type (28) @type.builtin (4) @type.super (3) @variable (5) @variable.member (3) @variable.parameter (4) @variable.special (12) Extension-only: @tag.delimiter (1) ``` Verbose example output: ``` Shared: @attribute (6) - [css, heex, javascript, tsx] @boolean (5) - [javascript, proto, tsx, typescript, yaml] @charset (1) - [css] @comment (19) - [bash, c, cpp, css, elixir, erb, go, gomod, gowork, heex, javascript, json, proto, python, ruby, rust, tsx, typescript, yaml] @comment.doc (3) - [elixir] @comment.unused (2) - [elixir] @constant (27) - [bash, c, cpp, elixir, heex, javascript, json, proto, python, ruby, rust, tsx, typescript] @constant.builtin (15) - [elixir, go, javascript, python, ruby, tsx, typescript, yaml] @constant.character (1) - [regex] @constructor (4) - [tsx, typescript] @embedded (10) - [bash, elixir, javascript, python, ruby, tsx, typescript] @emphasis (1) - [markdown] @emphasis.strong (1) - [markdown] @escape (4) - [go, python, regex, ruby] @function (44) - [bash, c, cpp, css, elixir, go, heex, javascript, python, rust, tsx, typescript] @function.builtin (2) - [python] @function.definition (2) - [rust] @function.method (22) - [go, javascript, python, ruby, rust, tsx, typescript] @function.method.builtin (3) - [ruby] @function.special (4) - [c, cpp, rust] @function.special.definition (1) - [rust] @import (1) - [css] @keyframes (1) - [css] @keyword (32) - [bash, c, cpp, css, elixir, erb, go, gomod, gowork, heex, javascript, jsdoc, proto, python, ruby, rust, tsx, typescript] @label (2) - [c, cpp] @link_text (1) - [markdown] @link_uri (1) - [markdown] @media (1) - [css] @module (1) - [heex] @namespace (1) - [css] @number (16) - [bash, c, cpp, css, elixir, go, javascript, json, proto, python, regex, ruby, rust, tsx, typescript, yaml] @operator (24) - [bash, c, cpp, css, elixir, go, gomod, gowork, heex, javascript, proto, python, regex, ruby, tsx, typescript] @property (11) - [bash, c, cpp, css, javascript, python, regex, rust, tsx, typescript, yaml] @property.json_key (1) - [json] @punctuation (1) - [elixir] @punctuation.bracket (28) - [c, cpp, elixir, go, heex, javascript, json, proto, regex, ruby, rust, tsx, typescript, yaml] @punctuation.delimiter (12) - [c, cpp, css, elixir, heex, javascript, proto, regex, ruby, tsx, typescript, yaml] @punctuation.list_marker (1) - [markdown] @punctuation.special (17) - [elixir, javascript, python, ruby, tsx, typescript, yaml] @string (23) - [bash, c, cpp, css, elixir, go, gomod, gowork, heex, javascript, json, proto, python, regex, ruby, rust, tsx, typescript, yaml] @string.doc (1) - [python] @string.escape (5) - [elixir, javascript, tsx, typescript, yaml] @string.regex (7) - [elixir, javascript, ruby, tsx, typescript] @string.special (4) - [css, elixir] @string.special.symbol (2) - [elixir, ruby] @supports (1) - [css] @tag (14) - [css, heex, javascript, tsx] @text.literal (2) - [markdown] @title (1) - [markdown] @type (28) - [c, cpp, css, elixir, go, javascript, jsdoc, proto, python, ruby, rust, tsx, typescript, yaml] @type.builtin (4) - [javascript, rust, tsx, typescript] @type.super (3) - [ruby] @variable (5) - [c, cpp, javascript, tsx, typescript] @variable.member (3) - [go, ruby] @variable.parameter (4) - [ruby] @variable.special (12) - [cpp, css, javascript, ruby, rust, tsx, typescript] Extension-only: @tag.delimiter (1) - [astro] ``` Release Notes: - N/A --------- Co-authored-by: Joseph T. Lyons <JosephTLyons@gmail.com>
2024-04-22 15:51:06 +00:00
core_instances = count_instances(find_highlight_files(core_path))
extension_instances = count_instances(find_highlight_files(extension_path))
unique_extension_instances = {k: v for k, v in extension_instances.items() if k not in core_instances}
print('Shared:\n')
print_instances(core_instances, args.verbose)
if unique_extension_instances:
print('\nExtension-only:\n')
print_instances(unique_extension_instances, args.verbose)
if __name__ == '__main__':
main()