2024-10-10 02:09:28 +00:00
import json
2024-01-24 19:40:55 +00:00
import os
2024-10-10 02:09:28 +00:00
import pathlib
2024-01-24 19:40:55 +00:00
from collections import defaultdict
from datetime import datetime , timedelta
from typing import Optional
2024-10-10 02:09:28 +00:00
import typer
2024-01-24 19:40:55 +00:00
from github import Github
from github . Issue import Issue
from github . Repository import Repository
from pytz import timezone
from typer import Typer
app : Typer = typer . Typer ( )
DATETIME_FORMAT : str = " % m/ %d / % Y % I: % M % p "
2024-10-15 19:55:18 +00:00
LABEL_DATA_FILE_PATH = pathlib . Path ( __file__ ) . parent . parent / " label_data.json "
ISSUES_PER_LABEL : int = 20
MISSING_LABEL_ERROR_MESSAGE : str = " missing core label "
2024-10-10 02:09:28 +00:00
2024-10-15 19:55:18 +00:00
with open ( LABEL_DATA_FILE_PATH , " r " ) as label_data_file :
2024-10-10 02:09:28 +00:00
label_data = json . load ( label_data_file )
CORE_LABELS : set [ str ] = set ( label_data [ " core_labels " ] )
# A set of labels for adding in labels that we want present in the final
# report, but that we don't want being defined as a core label, since issues
# with without core labels are flagged as errors.
ADDITIONAL_LABELS : set [ str ] = set ( label_data [ " additional_labels " ] )
2024-10-15 20:01:50 +00:00
NEW_ISSUE_LABELS : set [ str ] = set ( label_data [ " new_issue_labels " ] )
2024-10-10 02:09:28 +00:00
IGNORED_LABEL : str = label_data [ " ignored_label " ]
2024-01-24 19:40:55 +00:00
class IssueData :
def __init__ ( self , issue : Issue ) - > None :
2024-10-15 19:55:18 +00:00
self . title = issue . title
2024-01-24 19:40:55 +00:00
self . url : str = issue . html_url
self . like_count : int = issue . _rawData [ " reactions " ] [ " +1 " ] # type: ignore [attr-defined]
self . creation_datetime : str = issue . created_at . strftime ( DATETIME_FORMAT )
# TODO: Change script to support storing labels here, rather than directly in the script
2024-04-11 04:43:06 +00:00
self . labels : set [ str ] = { label [ " name " ] for label in issue . _rawData [ " labels " ] } # type: ignore [attr-defined]
2024-10-15 19:55:18 +00:00
self . _issue = issue
2024-01-24 19:40:55 +00:00
@app.command ( )
2024-01-28 23:09:11 +00:00
def main (
github_token : Optional [ str ] = None ,
2024-04-05 12:13:33 +00:00
issue_reference_number : Optional [ int ] = None ,
2024-01-28 23:09:11 +00:00
query_day_interval : Optional [ int ] = None ,
) - > None :
2024-01-24 19:40:55 +00:00
start_time : datetime = datetime . now ( )
2024-01-28 23:09:11 +00:00
start_date : datetime | None = None
if query_day_interval :
tz = timezone ( " america/new_york " )
2024-03-29 18:01:27 +00:00
current_time = datetime . now ( tz ) . replace (
hour = 0 , minute = 0 , second = 0 , microsecond = 0
)
2024-01-28 23:22:39 +00:00
start_date = current_time - timedelta ( days = query_day_interval )
2024-01-28 23:09:11 +00:00
2024-01-24 19:40:55 +00:00
# GitHub Workflow will pass in the token as an environment variable,
# but we can place it in our env when running the script locally, for convenience
github_token = github_token or os . getenv ( " GITHUB_ACCESS_TOKEN " )
github = Github ( github_token )
remaining_requests_before : int = github . rate_limiting [ 0 ]
print ( f " Remaining requests before: { remaining_requests_before } " )
repo_name : str = " zed-industries/zed "
repository : Repository = github . get_repo ( repo_name )
# There has to be a nice way of adding types to tuple unpacking
label_to_issue_data : dict [ str , list [ IssueData ] ]
error_message_to_erroneous_issue_data : dict [ str , list [ IssueData ] ]
(
label_to_issue_data ,
error_message_to_erroneous_issue_data ,
2024-01-28 23:09:11 +00:00
) = get_issue_maps ( github , repository , start_date )
2024-01-24 19:40:55 +00:00
issue_text : str = get_issue_text (
label_to_issue_data ,
error_message_to_erroneous_issue_data ,
)
2024-04-05 12:13:33 +00:00
if issue_reference_number :
2024-01-28 23:22:39 +00:00
top_ranking_issues_issue : Issue = repository . get_issue ( issue_reference_number )
2024-01-24 19:40:55 +00:00
top_ranking_issues_issue . edit ( body = issue_text )
else :
print ( issue_text )
2024-10-15 19:55:18 +00:00
for error_message , issue_data in error_message_to_erroneous_issue_data . items ( ) :
if error_message == MISSING_LABEL_ERROR_MESSAGE :
for issue in issue_data :
# Used as a dry-run flag
if issue_reference_number :
2024-10-15 20:01:50 +00:00
issue . _issue . add_to_labels ( * NEW_ISSUE_LABELS )
2024-10-15 19:55:18 +00:00
2024-01-24 19:40:55 +00:00
remaining_requests_after : int = github . rate_limiting [ 0 ]
print ( f " Remaining requests after: { remaining_requests_after } " )
print ( f " Requests used: { remaining_requests_before - remaining_requests_after } " )
run_duration : timedelta = datetime . now ( ) - start_time
print ( run_duration )
def get_issue_maps (
2024-01-28 23:09:11 +00:00
github : Github ,
repository : Repository ,
start_date : datetime | None = None ,
2024-01-24 19:40:55 +00:00
) - > tuple [ dict [ str , list [ IssueData ] ] , dict [ str , list [ IssueData ] ] ] :
label_to_issues : defaultdict [ str , list [ Issue ] ] = get_label_to_issues (
2024-01-28 23:09:11 +00:00
github ,
repository ,
start_date ,
2024-01-24 19:40:55 +00:00
)
label_to_issue_data : dict [ str , list [ IssueData ] ] = get_label_to_issue_data (
label_to_issues
)
2024-03-29 18:01:27 +00:00
error_message_to_erroneous_issues : defaultdict [ str , list [ Issue ] ] = (
get_error_message_to_erroneous_issues ( github , repository )
)
error_message_to_erroneous_issue_data : dict [ str , list [ IssueData ] ] = (
get_error_message_to_erroneous_issue_data ( error_message_to_erroneous_issues )
)
2024-01-24 19:40:55 +00:00
# Create a new dictionary with labels ordered by the summation the of likes on the associated issues
labels = list ( label_to_issue_data . keys ( ) )
labels . sort (
key = lambda label : sum (
issue_data . like_count for issue_data in label_to_issue_data [ label ]
) ,
reverse = True ,
)
label_to_issue_data = { label : label_to_issue_data [ label ] for label in labels }
return (
label_to_issue_data ,
error_message_to_erroneous_issue_data ,
)
def get_label_to_issues (
2024-01-28 23:09:11 +00:00
github : Github ,
repository : Repository ,
start_date : datetime | None = None ,
2024-01-24 19:40:55 +00:00
) - > defaultdict [ str , list [ Issue ] ] :
label_to_issues : defaultdict [ str , list [ Issue ] ] = defaultdict ( list )
labels : set [ str ] = CORE_LABELS | ADDITIONAL_LABELS
2024-01-28 23:09:11 +00:00
date_query : str = (
f " created:>= { start_date . strftime ( ' % Y- % m- %d ' ) } " if start_date else " "
)
2024-01-24 19:40:55 +00:00
for label in labels :
2024-10-10 02:09:28 +00:00
query : str = f ' repo: { repository . full_name } is:open is:issue { date_query } label: " { label } " -label: " { IGNORED_LABEL } " sort:reactions-+1-desc '
2024-01-28 23:09:11 +00:00
issues = github . search_issues ( query )
2024-01-24 19:40:55 +00:00
2024-01-28 23:09:11 +00:00
if issues . totalCount > 0 :
for issue in issues [ 0 : ISSUES_PER_LABEL ] :
label_to_issues [ label ] . append ( issue )
2024-01-24 19:40:55 +00:00
return label_to_issues
def get_label_to_issue_data (
2024-03-29 18:01:27 +00:00
label_to_issues : defaultdict [ str , list [ Issue ] ] ,
2024-01-24 19:40:55 +00:00
) - > dict [ str , list [ IssueData ] ] :
label_to_issue_data : dict [ str , list [ IssueData ] ] = { }
for label in label_to_issues :
issues : list [ Issue ] = label_to_issues [ label ]
issue_data : list [ IssueData ] = [ IssueData ( issue ) for issue in issues ]
issue_data . sort (
key = lambda issue_data : (
- issue_data . like_count ,
issue_data . creation_datetime ,
)
)
if issue_data :
label_to_issue_data [ label ] = issue_data
return label_to_issue_data
def get_error_message_to_erroneous_issues (
github : Github , repository : Repository
) - > defaultdict [ str , list [ Issue ] ] :
error_message_to_erroneous_issues : defaultdict [ str , list [ Issue ] ] = defaultdict ( list )
2024-06-12 14:57:33 +00:00
# Query for all open issues that don't have either a core or the ignored label and mark those as erroneous
2024-10-10 02:09:28 +00:00
filter_labels : set [ str ] = CORE_LABELS | { IGNORED_LABEL }
2024-01-24 19:40:55 +00:00
filter_labels_text : str = " " . join ( [ f ' -label: " { label } " ' for label in filter_labels ] )
query : str = f " repo: { repository . full_name } is:open is:issue { filter_labels_text } "
for issue in github . search_issues ( query ) :
2024-10-15 19:55:18 +00:00
error_message_to_erroneous_issues [ MISSING_LABEL_ERROR_MESSAGE ] . append ( issue )
2024-01-24 19:40:55 +00:00
return error_message_to_erroneous_issues
def get_error_message_to_erroneous_issue_data (
error_message_to_erroneous_issues : defaultdict [ str , list [ Issue ] ] ,
) - > dict [ str , list [ IssueData ] ] :
error_message_to_erroneous_issue_data : dict [ str , list [ IssueData ] ] = { }
for label in error_message_to_erroneous_issues :
issues : list [ Issue ] = error_message_to_erroneous_issues [ label ]
issue_data : list [ IssueData ] = [ IssueData ( issue ) for issue in issues ]
error_message_to_erroneous_issue_data [ label ] = issue_data
return error_message_to_erroneous_issue_data
def get_issue_text (
label_to_issue_data : dict [ str , list [ IssueData ] ] ,
error_message_to_erroneous_issue_data : dict [ str , list [ IssueData ] ] ,
) - > str :
tz = timezone ( " america/new_york " )
current_datetime : str = datetime . now ( tz ) . strftime ( f " { DATETIME_FORMAT } (%Z) " )
highest_ranking_issues_lines : list [ str ] = get_highest_ranking_issues_lines (
label_to_issue_data
)
issue_text_lines : list [ str ] = [
f " *Updated on { current_datetime } * " ,
* highest_ranking_issues_lines ,
" " ,
" --- \n " ,
]
erroneous_issues_lines : list [ str ] = get_erroneous_issues_lines (
error_message_to_erroneous_issue_data
)
if erroneous_issues_lines :
core_labels_text : str = " , " . join (
f ' " { core_label } " ' for core_label in CORE_LABELS
)
issue_text_lines . extend (
[
" ## errors with issues (this section only shows when there are errors with issues) \n " ,
f " This script expects every issue to have at least one of the following core labels: { core_labels_text } " ,
2024-10-10 02:09:28 +00:00
f " This script currently ignores issues that have the following label: { IGNORED_LABEL } \n " ,
2024-01-24 19:40:55 +00:00
" ### what to do? \n " ,
" - Adjust the core labels on an issue to put it into a correct state or add a currently-ignored label to the issue " ,
" - Adjust the core and ignored labels registered in this script " ,
* erroneous_issues_lines ,
" " ,
" --- \n " ,
]
)
issue_text_lines . extend (
[
2024-01-24 19:48:44 +00:00
" *For details on how this issue is generated, [see the script](https://github.com/zed-industries/zed/blob/main/script/update_top_ranking_issues/main.py)* " ,
2024-01-24 19:40:55 +00:00
]
)
return " \n " . join ( issue_text_lines )
def get_highest_ranking_issues_lines (
label_to_issue_data : dict [ str , list [ IssueData ] ] ,
) - > list [ str ] :
highest_ranking_issues_lines : list [ str ] = [ ]
if label_to_issue_data :
for label , issue_data in label_to_issue_data . items ( ) :
highest_ranking_issues_lines . append ( f " \n ## { label } \n " )
for i , issue_data in enumerate ( issue_data ) :
markdown_bullet_point : str = (
f " { issue_data . url } ( { issue_data . like_count } :thumbsup:) "
)
markdown_bullet_point = f " { i + 1 } . { markdown_bullet_point } "
highest_ranking_issues_lines . append ( markdown_bullet_point )
return highest_ranking_issues_lines
def get_erroneous_issues_lines (
error_message_to_erroneous_issue_data ,
) - > list [ str ] :
erroneous_issues_lines : list [ str ] = [ ]
if error_message_to_erroneous_issue_data :
for (
error_message ,
erroneous_issue_data ,
) in error_message_to_erroneous_issue_data . items ( ) :
erroneous_issues_lines . append ( f " \n #### { error_message } \n " )
2024-01-24 19:58:47 +00:00
for erroneous_issue_data in erroneous_issue_data :
erroneous_issues_lines . append ( f " - { erroneous_issue_data . url } " )
2024-01-24 19:40:55 +00:00
return erroneous_issues_lines
if __name__ == " __main__ " :
app ( )
# TODO: Sort label output into core and non core sections