13
13
from elasticsearch import Elasticsearch
14
14
from elasticsearch .helpers import bulk
15
15
from datetime import timedelta
16
+ import pprint
16
17
17
18
18
19
date_format = '%Y-%m-%d %H:%M:%S'
@@ -21,8 +22,11 @@ def parse_args() -> argparse.Namespace:
21
22
parser = argparse .ArgumentParser (
22
23
formatter_class = argparse .RawDescriptionHelpFormatter , allow_abbrev = False )
23
24
24
- parser .add_argument ('--pull-request' , required = True , help = 'pull request number' , type = int )
25
- parser .add_argument ('--repo' , required = True , help = 'github repo' )
25
+ parser .add_argument ('--pull-request' , help = 'pull request number' , type = int )
26
+ parser .add_argument ('--range' , help = 'execute based on a date range, for example 2023-01-01..2023-01-05' )
27
+ parser .add_argument ('--repo' , help = 'github repo' , default = 'zephyrproject-rtos/zephyr' )
28
+ parser .add_argument ('--es-index' , help = 'Elasticsearch index' )
29
+ parser .add_argument ('-y' ,'--dry-run' , action = "store_true" , help = 'dry run, do not upload data' )
26
30
27
31
return parser .parse_args ()
28
32
@@ -33,6 +37,90 @@ def gendata(data, index):
33
37
"_source" : t
34
38
}
35
39
40
+ def process_pr (pr ):
41
+ reviews = pr .get_reviews ()
42
+ print (f'#{ pr .number } : { pr .title } - { pr .comments } Comments, reviews: { reviews .totalCount } , { len (pr .assignees )} Assignees (Updated { pr .updated_at } )' )
43
+ assignee_reviews = 0
44
+ prj = {}
45
+
46
+ assignees = []
47
+ labels = []
48
+ for label in pr .labels :
49
+ labels .append (label .name )
50
+
51
+ reviewers = set ()
52
+ for review in reviews :
53
+ # get list of all approved reviews
54
+ if review .user and review .state == 'APPROVED' :
55
+ reviewers .add (review .user .login )
56
+
57
+ for assignee in pr .assignees :
58
+ # list assignees for later checks
59
+ assignees .append (assignee .login )
60
+ if assignee .login in reviewers :
61
+ assignee_reviews += 1
62
+
63
+ if assignee_reviews > 0 or pr .merged_by .login in assignees :
64
+ # in case of assignee reviews or if PR was merged by an assignee
65
+ prj ['review_rule' ] = "yes"
66
+ elif not pr .assignees or \
67
+ (pr .user .login in assignees and len (assignees ) == 1 ) or \
68
+ ('Trivial' in labels or 'Hotfix' in labels ):
69
+ # in case where no assignees set or if submitter is the only assignee
70
+ # or in case of trivial or hotfixes
71
+ prj ['review_rule' ] = "na"
72
+ else :
73
+ # everything else
74
+ prj ['review_rule' ] = "no"
75
+
76
+
77
+ # calculate time the PR was in review, hours and business days.
78
+ delta = pr .closed_at - pr .created_at
79
+ deltah = delta .total_seconds () / 3600
80
+ prj ['hours_open' ] = deltah
81
+
82
+ dates = (pr .created_at + timedelta (idx + 1 ) for idx in range ((pr .closed_at - pr .created_at ).days ))
83
+
84
+ # Get number of business days per the guidelines, we need at least 2.
85
+ business_days = sum (1 for day in dates if day .weekday () < 5 )
86
+ prj ['business_days_open' ] = business_days
87
+
88
+ # less than 2 business days ...
89
+ if business_days < 2 and not ('Trivial' in labels or 'Hotfix' in labels ) or \
90
+ deltah < 4 and 'Trivial' in labels :
91
+ prj ['time_rule' ] = "no"
92
+ else :
93
+ prj ['time_rule' ] = "yes"
94
+
95
+ # This is all data we get easily though the Github API and serves as the basis
96
+ # for displaying some trends and metrics.
97
+ # Data can be extended in the future if we find more information that
98
+ # is useful through the API
99
+
100
+ prj ['nr' ] = pr .number
101
+ prj ['url' ] = pr .url
102
+ prj ['title' ] = pr .title
103
+ prj ['comments' ] = pr .comments
104
+ prj ['reviews' ] = reviews .totalCount
105
+ prj ['assignees' ] = assignees
106
+ prj ['updated' ] = pr .updated_at .strftime ("%Y-%m-%d %H:%M:%S" )
107
+ prj ['created' ] = pr .created_at .strftime ("%Y-%m-%d %H:%M:%S" )
108
+ prj ['closed' ] = pr .closed_at .strftime ("%Y-%m-%d %H:%M:%S" )
109
+ prj ['merged_by' ] = pr .merged_by .login
110
+ prj ['submitted_by' ] = pr .user .login
111
+ prj ['changed_files' ] = pr .changed_files
112
+ prj ['additions' ] = pr .additions
113
+ prj ['deletions' ] = pr .deletions
114
+ prj ['commits' ] = pr .commits
115
+ # The branch we are targeting. main vs release branches.
116
+ prj ['base' ] = pr .base .ref
117
+
118
+ # list all reviewers
119
+ prj ['reviewers' ] = list (reviewers )
120
+ prj ['labels' ] = labels
121
+
122
+ return prj
123
+
36
124
def main ():
37
125
args = parse_args ()
38
126
token = os .environ .get ('GITHUB_TOKEN' )
@@ -46,112 +134,35 @@ def main():
46
134
47
135
if args .pull_request :
48
136
pr = gh_repo .get_pull (args .pull_request )
49
-
50
- reviews = pr .get_reviews ()
51
- print (f'#{ pr .number } : { pr .title } - { pr .comments } Comments, reviews: { reviews .totalCount } , { len (pr .assignees )} Assignees (Updated { pr .updated_at } )' )
52
- assignee_reviews = 0
53
- reviewers = set ()
54
- prj = {}
55
- for r in reviews :
56
- if r .user and r .state == 'APPROVED' :
57
- reviewers .add (r .user .login )
58
- if pr .assignees and r .user :
59
- for assignee in pr .assignees :
60
- if r .user .login == assignee .login :
61
- assignee_reviews = assignee_reviews + 1
62
- # was reviewed at least by one assignee
63
- prj ['reviewed_by_assignee' ] = "yes"
64
-
65
- # This is all data we get easily though the Github API and serves as the basis
66
- # for displaying some trends and metrics.
67
- # Data can be extended in the future if we find more information that
68
- # is useful through the API
69
-
70
- prj ['nr' ] = pr .number
71
- prj ['url' ] = pr .url
72
- prj ['title' ] = pr .title
73
- prj ['comments' ] = pr .comments
74
- prj ['reviews' ] = reviews .totalCount
75
- prj ['assignees' ] = len (pr .assignees )
76
- prj ['updated' ] = pr .updated_at .strftime ("%Y-%m-%d %H:%M:%S" )
77
- prj ['created' ] = pr .created_at .strftime ("%Y-%m-%d %H:%M:%S" )
78
- prj ['closed' ] = pr .closed_at .strftime ("%Y-%m-%d %H:%M:%S" )
79
- prj ['merged_by' ] = pr .merged_by .login
80
- prj ['submitted_by' ] = pr .user .login
81
- prj ['changed_files' ] = pr .changed_files
82
- prj ['additions' ] = pr .additions
83
- prj ['deletions' ] = pr .deletions
84
- prj ['commits' ] = pr .commits
85
- # The branch we are targeting. main vs release branches.
86
- prj ['base' ] = pr .base .ref
87
-
88
- ll = []
89
- for l in pr .labels :
90
- ll .append (l .name )
91
- prj ['labels' ] = ll
92
-
93
- # take first assignee, otherwise we have no assignees and this rule is not applicable
94
- if pr .assignee :
95
- prj ['assignee' ] = pr .assignee .login
96
- else :
97
- prj ['assignee' ] = "none"
98
- prj ['reviewed_by_assignee' ] = "na"
99
- prj ['review_rule' ] = "na"
100
-
101
- # go through all assignees and check if anyone has approved and reset assignee to the one who approved
102
- for assignee in pr .assignees :
103
- if assignee .login in reviewers :
104
- prj ['assignee' ] = assignee .login
105
- elif assignee .login == pr .user .login :
106
- prj ['reviewed_by_assignee' ] = "yes"
107
-
108
-
109
- # list assignees for later checks
110
- assignees = [a .login for a in pr .assignees ]
111
-
112
- # Deal with exceptions when assignee approval is not needed.
113
- if 'Trivial' in ll or 'Hotfix' in ll :
114
- prj ['review_rule' ] = "yes"
115
- elif pr .merged_by .login in assignees :
116
- prj ['review_rule' ] = "yes"
117
- else :
118
- prj ['review_rule' ] = "no"
119
-
120
- prj ['assignee_reviews' ] = assignee_reviews
121
-
122
- delta = pr .closed_at - pr .created_at
123
- deltah = delta .total_seconds () / 3600
124
- prj ['hours_open' ] = deltah
125
-
126
- dates = (pr .created_at + timedelta (idx + 1 ) for idx in range ((pr .closed_at - pr .created_at ).days ))
127
-
128
- # Get number of business days per the guidelines, we need at least 2.
129
- res = sum (1 for day in dates if day .weekday () < 5 )
130
-
131
- if res < 2 and not ('Trivial' in ll or 'Hotfix' in ll ):
132
- prj ['time_rule' ] = False
133
- elif deltah < 4 and 'Trivial' in ll :
134
- prj ['time_rule' ] = False
135
- else :
136
- prj ['time_rule' ] = True
137
- prj ['reviewers' ] = list (reviewers )
138
-
137
+ prj = process_pr (pr )
139
138
json_list .append (prj )
140
-
141
-
142
- # Send data over to elasticsearch.
143
- es = Elasticsearch (
144
- [os .environ ['ELASTICSEARCH_SERVER' ]],
145
- api_key = os .environ ['ELASTICSEARCH_KEY' ],
146
- verify_certs = False
147
- )
148
-
149
- try :
150
- index = os .environ ['PR_STAT_ES_INDEX' ]
151
- bulk (es , gendata (json_list , index ))
152
- except KeyError as e :
153
- print (f"Error: { e } not set." )
154
- print (json_list )
139
+ elif args .range :
140
+ query = f'repo:{ args .repo } merged:{ args .range } is:pr is:closed sort:updated-desc base:main'
141
+ prs = gh .search_issues (query = f'{ query } ' )
142
+ for _pr in prs :
143
+ pr = gh_repo .get_pull (_pr .number )
144
+ prj = process_pr (pr )
145
+ json_list .append (prj )
146
+
147
+ if json_list and not args .dry_run :
148
+ # Send data over to elasticsearch.
149
+ es = Elasticsearch (
150
+ [os .environ ['ELASTICSEARCH_SERVER' ]],
151
+ api_key = os .environ ['ELASTICSEARCH_KEY' ],
152
+ verify_certs = False
153
+ )
154
+
155
+ try :
156
+ if args .es_index :
157
+ index = args .es_index
158
+ else :
159
+ index = os .environ ['PR_STAT_ES_INDEX' ]
160
+ bulk (es , gendata (json_list , index ))
161
+ except KeyError as e :
162
+ print (f"Error: { e } not set." )
163
+ print (json_list )
164
+ if args .dry_run :
165
+ pprint .pprint (json_list )
155
166
156
167
if __name__ == "__main__" :
157
168
main ()
0 commit comments