Skip to content

Commit 713206f

Browse files
committed
improve readability, logging, fix spellcheck test
1 parent 8ddcfd8 commit 713206f

File tree

5 files changed

+668
-575
lines changed

5 files changed

+668
-575
lines changed

.github/scripts/spellcheck_conf/wordlist.txt

+4
Original file line numberDiff line numberDiff line change
@@ -1451,3 +1451,7 @@ openhathi
14511451
sarvam
14521452
subtask
14531453
acc
1454+
Triaging
1455+
matplotlib
1456+
remediations
1457+
walkthrough

recipes/use_cases/github_triage/llm.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import time
55
import json
66

7+
from tqdm import tqdm
78
from openai import OpenAI
89
import groq
910

@@ -37,7 +38,7 @@ def chat(
3738
)
3839
output = response.choices[0].message
3940
except Exception as e:
40-
log.error(
41+
logger.error(
4142
f"FAILED to generate inference for input {inputs}\nError: {str(e)}"
4243
)
4344
output = None
@@ -85,7 +86,8 @@ def chat(
8586
print(f"[groq] waiting for {wait} to prevent ratelimiting")
8687
time.sleep(wait)
8788
except Exception as e:
88-
logger.error(f"INFERENCE FAILED with Error: {e.response.status_code}! for input:\n{inputs[-1]['content'][:300]}")
89+
logger.error(f"INFERENCE FAILED with Error: {e.response.status_code} for input:\n{inputs[-1]['content'][:300]}")
90+
break
8991

9092
return output
9193

@@ -141,7 +143,8 @@ def run_llm_inference(
141143
)
142144

143145
responses = [
144-
LLM.chat(i, generation_kwargs, guided_decode_json_schema) for i in inputs
146+
LLM.chat(i, generation_kwargs, guided_decode_json_schema)
147+
for i in tqdm(inputs, desc=f"Inference[{prompt_name}]")
145148
]
146149

147150
if guided_decode_json_schema is not None:
@@ -159,4 +162,4 @@ def run_llm_inference(
159162
if not _batch:
160163
responses = responses[0]
161164

162-
return responses
165+
return responses

recipes/use_cases/github_triage/triage.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def _categorize_issues(
6868
}
6969
return themes, theme_count
7070

71-
logger.info(f"Generating annotations for {len(issues_df)}")
71+
logger.info(f"Generating annotations for {len(issues_df)} issues")
7272

7373
discussions = issues_df["discussion"].tolist()
7474
metadata = run_llm_inference(

recipes/use_cases/github_triage/utils.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ def fetch_repo_issues(repo, start_date=None, end_date=None):
3131
url = f"https://api.github.com/search/issues?per_page=100&sort=created&order=asc&q=repo:{repo}+is:issue{time_filter}"
3232

3333
samples = []
34-
logger.info(f"Fetching issues on {repo} from {start_date} to {end_date}")
3534

3635
while True:
3736
response = fetch_github_endpoint(url)
@@ -61,8 +60,7 @@ def fetch_repo_issues(repo, start_date=None, end_date=None):
6160
else:
6261
break
6362
else:
64-
raise Exception(f"Fetching issues failed with Error: {response.status_code}")
65-
print()
63+
raise Exception(f"Fetching issues failed with Error: {response.status_code} on url {url}")
6664

6765
rows = [{
6866
"repo_name": repo,
@@ -93,12 +91,8 @@ def fetch_repo_stats(repo):
9391

9492
def validate_df_values(df, out_folder=None, name=None):
9593
df.columns = df.columns.str.lower().str.replace(" ", "_").str.replace("-", "_")
96-
# for c in df.columns:
97-
# x = df[c].iloc[0]
98-
# if isinstance(x, str) and '[' in x:
99-
# df[c] = df[c].apply(lambda x: eval(x))
10094
if out_folder is not None:
10195
path = f"{out_folder}/{name}.csv"
10296
df.to_csv(path, index=False)
10397
logger.info(f"Data saved to {path}")
104-
return df
98+
return df

0 commit comments

Comments
 (0)