Skip to content

Commit 7fedc81

Browse files
committed
ci: merged_pr stats: fix a bug an minor tweaks
We are not setting the review rule value correctly and default to 'no' for all prs, even if they were correctly reviewed by assignees. Minor other cleanups. Signed-off-by: Anas Nashif <[email protected]>
1 parent 0a50cb9 commit 7fedc81

File tree

1 file changed

+118
-107
lines changed

1 file changed

+118
-107
lines changed

scripts/ci/stats/merged_prs.py

Lines changed: 118 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from elasticsearch import Elasticsearch
1414
from elasticsearch.helpers import bulk
1515
from datetime import timedelta
16+
import pprint
1617

1718

1819
date_format = '%Y-%m-%d %H:%M:%S'
@@ -21,8 +22,11 @@ def parse_args() -> argparse.Namespace:
2122
parser = argparse.ArgumentParser(
2223
formatter_class=argparse.RawDescriptionHelpFormatter, allow_abbrev=False)
2324

24-
parser.add_argument('--pull-request', required=True, help='pull request number', type=int)
25-
parser.add_argument('--repo', required=True, help='github repo')
25+
parser.add_argument('--pull-request', help='pull request number', type=int)
26+
parser.add_argument('--range', help='execute based on a date range, for example 2023-01-01..2023-01-05')
27+
parser.add_argument('--repo', help='github repo', default='zephyrproject-rtos/zephyr')
28+
parser.add_argument('--es-index', help='Elasticsearch index')
29+
parser.add_argument('-y','--dry-run', action="store_true", help='dry run, do not upload data')
2630

2731
return parser.parse_args()
2832

@@ -33,6 +37,90 @@ def gendata(data, index):
3337
"_source": t
3438
}
3539

40+
def process_pr(pr):
41+
reviews = pr.get_reviews()
42+
print(f'#{pr.number}: {pr.title} - {pr.comments} Comments, reviews: {reviews.totalCount}, {len(pr.assignees)} Assignees (Updated {pr.updated_at})')
43+
assignee_reviews = 0
44+
prj = {}
45+
46+
assignees = []
47+
labels = []
48+
for label in pr.labels:
49+
labels.append(label.name)
50+
51+
reviewers = set()
52+
for review in reviews:
53+
# get list of all approved reviews
54+
if review.user and review.state == 'APPROVED':
55+
reviewers.add(review.user.login)
56+
57+
for assignee in pr.assignees:
58+
# list assignees for later checks
59+
assignees.append(assignee.login)
60+
if assignee.login in reviewers:
61+
assignee_reviews += 1
62+
63+
if assignee_reviews > 0 or pr.merged_by.login in assignees:
64+
# in case of assignee reviews or if PR was merged by an assignee
65+
prj['review_rule'] = "yes"
66+
elif not pr.assignees or \
67+
(pr.user.login in assignees and len(assignees) == 1) or \
68+
('Trivial' in labels or 'Hotfix' in labels):
69+
# in case where no assignees set or if submitter is the only assignee
70+
# or in case of trivial or hotfixes
71+
prj['review_rule'] = "na"
72+
else:
73+
# everything else
74+
prj['review_rule'] = "no"
75+
76+
77+
# calculate time the PR was in review, hours and business days.
78+
delta = pr.closed_at - pr.created_at
79+
deltah = delta.total_seconds() / 3600
80+
prj['hours_open'] = deltah
81+
82+
dates = (pr.created_at + timedelta(idx + 1) for idx in range((pr.closed_at - pr.created_at).days))
83+
84+
# Get number of business days per the guidelines, we need at least 2.
85+
business_days = sum(1 for day in dates if day.weekday() < 5)
86+
prj['business_days_open'] = business_days
87+
88+
# less than 2 business days ...
89+
if business_days < 2 and not ('Trivial' in labels or 'Hotfix' in labels) or \
90+
deltah < 4 and 'Trivial' in labels:
91+
prj['time_rule'] = "no"
92+
else:
93+
prj['time_rule'] = "yes"
94+
95+
# This is all data we get easily though the Github API and serves as the basis
96+
# for displaying some trends and metrics.
97+
# Data can be extended in the future if we find more information that
98+
# is useful through the API
99+
100+
prj['nr'] = pr.number
101+
prj['url'] = pr.url
102+
prj['title'] = pr.title
103+
prj['comments'] = pr.comments
104+
prj['reviews'] = reviews.totalCount
105+
prj['assignees'] = assignees
106+
prj['updated'] = pr.updated_at.strftime("%Y-%m-%d %H:%M:%S")
107+
prj['created'] = pr.created_at.strftime("%Y-%m-%d %H:%M:%S")
108+
prj['closed'] = pr.closed_at.strftime("%Y-%m-%d %H:%M:%S")
109+
prj['merged_by'] = pr.merged_by.login
110+
prj['submitted_by'] = pr.user.login
111+
prj['changed_files'] = pr.changed_files
112+
prj['additions'] = pr.additions
113+
prj['deletions'] = pr.deletions
114+
prj['commits'] = pr.commits
115+
# The branch we are targeting. main vs release branches.
116+
prj['base'] = pr.base.ref
117+
118+
# list all reviewers
119+
prj['reviewers'] = list(reviewers)
120+
prj['labels'] = labels
121+
122+
return prj
123+
36124
def main():
37125
args = parse_args()
38126
token = os.environ.get('GITHUB_TOKEN')
@@ -46,112 +134,35 @@ def main():
46134

47135
if args.pull_request:
48136
pr = gh_repo.get_pull(args.pull_request)
49-
50-
reviews = pr.get_reviews()
51-
print(f'#{pr.number}: {pr.title} - {pr.comments} Comments, reviews: {reviews.totalCount}, {len(pr.assignees)} Assignees (Updated {pr.updated_at})')
52-
assignee_reviews = 0
53-
reviewers = set()
54-
prj = {}
55-
for r in reviews:
56-
if r.user and r.state == 'APPROVED':
57-
reviewers.add(r.user.login)
58-
if pr.assignees and r.user:
59-
for assignee in pr.assignees:
60-
if r.user.login == assignee.login:
61-
assignee_reviews = assignee_reviews + 1
62-
# was reviewed at least by one assignee
63-
prj['reviewed_by_assignee'] = "yes"
64-
65-
# This is all data we get easily though the Github API and serves as the basis
66-
# for displaying some trends and metrics.
67-
# Data can be extended in the future if we find more information that
68-
# is useful through the API
69-
70-
prj['nr'] = pr.number
71-
prj['url'] = pr.url
72-
prj['title'] = pr.title
73-
prj['comments'] = pr.comments
74-
prj['reviews'] = reviews.totalCount
75-
prj['assignees'] = len(pr.assignees)
76-
prj['updated'] = pr.updated_at.strftime("%Y-%m-%d %H:%M:%S")
77-
prj['created'] = pr.created_at.strftime("%Y-%m-%d %H:%M:%S")
78-
prj['closed'] = pr.closed_at.strftime("%Y-%m-%d %H:%M:%S")
79-
prj['merged_by'] = pr.merged_by.login
80-
prj['submitted_by'] = pr.user.login
81-
prj['changed_files'] = pr.changed_files
82-
prj['additions'] = pr.additions
83-
prj['deletions'] = pr.deletions
84-
prj['commits'] = pr.commits
85-
# The branch we are targeting. main vs release branches.
86-
prj['base'] = pr.base.ref
87-
88-
ll = []
89-
for l in pr.labels:
90-
ll.append(l.name)
91-
prj['labels'] = ll
92-
93-
# take first assignee, otherwise we have no assignees and this rule is not applicable
94-
if pr.assignee:
95-
prj['assignee'] = pr.assignee.login
96-
else:
97-
prj['assignee'] = "none"
98-
prj['reviewed_by_assignee'] = "na"
99-
prj['review_rule'] = "na"
100-
101-
# go through all assignees and check if anyone has approved and reset assignee to the one who approved
102-
for assignee in pr.assignees:
103-
if assignee.login in reviewers:
104-
prj['assignee'] = assignee.login
105-
elif assignee.login == pr.user.login:
106-
prj['reviewed_by_assignee'] = "yes"
107-
108-
109-
# list assignees for later checks
110-
assignees = [a.login for a in pr.assignees]
111-
112-
# Deal with exceptions when assignee approval is not needed.
113-
if 'Trivial' in ll or 'Hotfix' in ll:
114-
prj['review_rule'] = "yes"
115-
elif pr.merged_by.login in assignees:
116-
prj['review_rule'] = "yes"
117-
else:
118-
prj['review_rule'] = "no"
119-
120-
prj['assignee_reviews'] = assignee_reviews
121-
122-
delta = pr.closed_at - pr.created_at
123-
deltah = delta.total_seconds() / 3600
124-
prj['hours_open'] = deltah
125-
126-
dates = (pr.created_at + timedelta(idx + 1) for idx in range((pr.closed_at - pr.created_at).days))
127-
128-
# Get number of business days per the guidelines, we need at least 2.
129-
res = sum(1 for day in dates if day.weekday() < 5)
130-
131-
if res < 2 and not ('Trivial' in ll or 'Hotfix' in ll):
132-
prj['time_rule'] = False
133-
elif deltah < 4 and 'Trivial' in ll:
134-
prj['time_rule'] = False
135-
else:
136-
prj['time_rule'] = True
137-
prj['reviewers'] = list(reviewers)
138-
137+
prj = process_pr(pr)
139138
json_list.append(prj)
140-
141-
142-
# Send data over to elasticsearch.
143-
es = Elasticsearch(
144-
[os.environ['ELASTICSEARCH_SERVER']],
145-
api_key=os.environ['ELASTICSEARCH_KEY'],
146-
verify_certs=False
147-
)
148-
149-
try:
150-
index = os.environ['PR_STAT_ES_INDEX']
151-
bulk(es, gendata(json_list, index))
152-
except KeyError as e:
153-
print(f"Error: {e} not set.")
154-
print(json_list)
139+
elif args.range:
140+
query = f'repo:{args.repo} merged:{args.range} is:pr is:closed sort:updated-desc base:main'
141+
prs = gh.search_issues(query=f'{query}')
142+
for _pr in prs:
143+
pr = gh_repo.get_pull(_pr.number)
144+
prj = process_pr(pr)
145+
json_list.append(prj)
146+
147+
if json_list and not args.dry_run:
148+
# Send data over to elasticsearch.
149+
es = Elasticsearch(
150+
[os.environ['ELASTICSEARCH_SERVER']],
151+
api_key=os.environ['ELASTICSEARCH_KEY'],
152+
verify_certs=False
153+
)
154+
155+
try:
156+
if args.es_index:
157+
index = args.es_index
158+
else:
159+
index = os.environ['PR_STAT_ES_INDEX']
160+
bulk(es, gendata(json_list, index))
161+
except KeyError as e:
162+
print(f"Error: {e} not set.")
163+
print(json_list)
164+
if args.dry_run:
165+
pprint.pprint(json_list)
155166

156167
if __name__ == "__main__":
157168
main()

0 commit comments

Comments
 (0)