Skip to content

Commit d2519f6

Browse files
author
温进
committed
update code
1 parent 3f76c43 commit d2519f6

File tree

7 files changed

+25
-28
lines changed

7 files changed

+25
-28
lines changed

.DS_Store

6 KB
Binary file not shown.

scripts/run_eval.sh

-22
This file was deleted.

scripts/run_eval_example.sh

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# model_path: 要测试的模型路径
2+
# model_name: 模型配置文件对应的模型命名
3+
# model_conf_path: 模型配置文件的地址,一般就为 conf 路径下的 devopseval_dataset_fp.json
4+
# eval_dataset_list: 要测试的数据集名称,默认 all,全部测试,如果需要测试单个或者多个,用 # 符号链接,示例:dataset1#dataset2
5+
# eval_dataset_fp_conf_path: 数据集配置地址
6+
# eval_dataset_type: 测试哪种类型,只支持默认 test 类型的测试集
7+
# data_path: 评测数据集地址,填写下载数据集后的地址就可以
8+
# k_shot: 支持 0-5,代表 few-shot 会给模型前缀加的示例数量
9+
10+
python src/run_eval.py \
11+
--model_path path_to_model \
12+
--model_name model_name_in_conf \
13+
--model_conf_path path_to_model_conf \
14+
--eval_dataset_list all \
15+
--eval_dataset_fp_conf_path path_to_dataset_conf \
16+
--eval_dataset_type test \
17+
--data_path path_to_downloaded_devops_eval_data \
18+
--k_shot 0

src/evaluate/evaluate.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def evaluate(model, tokenizer, context_builder, all_dataset):
1515
for question in dataset:
1616
if do_verbose:
1717
question['pred'] = get_pred(model, tokenizer, context_builder, question, do_verbose)
18-
do_verbose = True
18+
do_verbose = False
1919
else:
2020
question['pred'] = get_pred(model, tokenizer, context_builder, question, do_verbose)
2121
return all_dataset_pred
@@ -35,8 +35,6 @@ def get_pred(model, tokenizer, context_builder, question: dict, verbose: bool =
3535
option_dict[option] = encoded
3636
else:
3737
option_dict[option] = tokenizer._convert_token_to_id(option)
38-
39-
logger.debug('option_dict={}'.format(option_dict))
4038

4139
# build context
4240
raw_text, context_tokens = context_builder.make_context(model, tokenizer, query)

src/hparams/evaluate_args.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class EvaluateArguments:
3838
)
3939
k_shot: int = field(
4040
default = 0,
41-
metadata={"help": "k-shot test, k should be in (0, 1,2,3,4,5)]"}
41+
metadata={"help": "k-shot test, k should be in (0, 1,2,3,4,5)"}
4242
)
4343
seed: int = field(
4444
default = 100,

src/model_and_tokenizer_loader/model_and_tokenizer_loader.py

+3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import torch
22
import transformers
33
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
4+
from loguru import logger
45

56

67
class ModelAndTokenizerLoader:
@@ -14,6 +15,8 @@ def load_model_and_tokenizer(self, model_path: str):
1415

1516
def load_model(self, model_path: str):
1617
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", trust_remote_code=True).eval()
18+
# for name, param in model.named_parameters():
19+
# logger.debug('param_name={}, param.device={}'.format(name, param.device))
1720
return model
1821

1922
def load_tokenizer(self, model_path: str):

src/run_eval.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def run_eval(args=None):
1919

2020
# Get all dataset
2121
eval_datasets = load_all_dataset(eval_args)
22-
logger.info('Load all dataset success')
22+
logger.info('Load all dataset success, total question number={}'.format(sum(len(v) for v in eval_datasets.values())))
2323

2424
# Load model and tokenizer
2525
model, tokenizer = load_model_and_tokenizer(eval_args)
@@ -35,7 +35,7 @@ def run_eval(args=None):
3535

3636
# get metric
3737
score_dict = get_acc_score(all_pred)
38-
logger.info('model_path={}\nk_shot={}\nEvaluation result={}'.format(eval_args.model_path, eval_args.k_shot, score_dict))
38+
logger.info('model_path={} k_shot={} Evaluation result={}'.format(eval_args.model_path, eval_args.k_shot, score_dict))
3939

4040
# save metric
4141

0 commit comments

Comments
 (0)