@@ -69,23 +69,23 @@ def tuner_hyper_params(self):
69
69
return ''
70
70
if args ['sft_type' ] in ('lora' , 'adalora' , 'longlora' ):
71
71
if 'lora_rank' in args :
72
- hyper_params += f'rank={ args [" lora_rank" ]} /' \
73
- f'target={ args [" lora_target_modules" ]} /' \
74
- f'alpha={ args [" lora_alpha" ]} /' \
75
- f'lr_ratio={ args .get (" lora_lr_ratio" , None )} /' \
76
- f'use_rslora={ args .get (" use_rslora" , False )} /' \
77
- f'use_dora={ args .get (" use_dora" , False )} '
72
+ hyper_params += f'rank={ args [' lora_rank' ]} /' \
73
+ f'target={ args [' lora_target_modules' ]} /' \
74
+ f'alpha={ args [' lora_alpha' ]} /' \
75
+ f'lr_ratio={ args .get (' lora_lr_ratio' , None )} /' \
76
+ f'use_rslora={ args .get (' use_rslora' , False )} /' \
77
+ f'use_dora={ args .get (' use_dora' , False )} '
78
78
else :
79
79
hyper_params = ''
80
80
if args ['sft_type' ] == 'full' :
81
81
if 'use_galore' in args and args ['use_galore' ] == 'true' :
82
- hyper_params += f'galore_rank={ args [" galore_rank" ]} /' \
83
- f'galore_per_parameter={ args [" galore_optim_per_parameter" ]} /' \
84
- f'galore_with_embedding={ args [" galore_with_embedding" ]} /'
82
+ hyper_params += f'galore_rank={ args [' galore_rank' ]} /' \
83
+ f'galore_per_parameter={ args [' galore_optim_per_parameter' ]} /' \
84
+ f'galore_with_embedding={ args [' galore_with_embedding' ]} /'
85
85
if args ['sft_type' ] == 'llamapro' :
86
- hyper_params += f'num_blocks={ args [" llamapro_num_new_blocks" ]} /'
86
+ hyper_params += f'num_blocks={ args [' llamapro_num_new_blocks' ]} /'
87
87
if 'neftune_noise_alpha' in args and args ['neftune_noise_alpha' ]:
88
- hyper_params += f'neftune_noise_alpha={ args [" neftune_noise_alpha" ]} /'
88
+ hyper_params += f'neftune_noise_alpha={ args [' neftune_noise_alpha' ]} /'
89
89
90
90
if hyper_params .endswith ('/' ):
91
91
hyper_params = hyper_params [:- 1 ]
@@ -95,8 +95,8 @@ def tuner_hyper_params(self):
95
95
def hyper_parameters (self ):
96
96
if 'learning_rate' not in self .args :
97
97
return ''
98
- return f'lr={ self .args [" learning_rate" ]} /' \
99
- f'epoch={ self .args [" num_train_epochs" ]} '
98
+ return f'lr={ self .args [' learning_rate' ]} /' \
99
+ f'epoch={ self .args [' num_train_epochs' ]} '
100
100
101
101
@property
102
102
def train_speed (self ):
@@ -190,10 +190,10 @@ def generate_sft_report(outputs: List[ModelOutput]):
190
190
ceval_acc = '' if not ceval_acc else f'**{ ceval_acc :.3f} **'
191
191
192
192
line = f'|{ output .name } |' \
193
- f'{ output .args [" model_type" ]} |' \
194
- f'{ output .args .get (" dataset" )} |' \
195
- f'{ output .args .get (" train_dataset_mix_ratio" , 0. )} |' \
196
- f'{ output .args .get (" sft_type" )} |' \
193
+ f'{ output .args [' model_type' ]} |' \
194
+ f'{ output .args .get (' dataset' )} |' \
195
+ f'{ output .args .get (' train_dataset_mix_ratio' , 0. )} |' \
196
+ f'{ output .args .get (' sft_type' )} |' \
197
197
f'{ output .tuner_hyper_params } |' \
198
198
f'{ output .num_trainable_parameters } ({ output .trainable_parameters_percentage } )|' \
199
199
f'{ use_flash_attn } |' \
@@ -267,14 +267,14 @@ def generate_export_report(outputs: List[ModelOutput]):
267
267
ceval_acc = '' if not ceval_acc else f'**{ ceval_acc :.3f} **'
268
268
269
269
if output .train_dataset_info :
270
- dataset_info = f'{ output .args [" dataset" ]} /{ output .train_dataset_info } '
270
+ dataset_info = f'{ output .args [' dataset' ]} /{ output .train_dataset_info } '
271
271
else :
272
- dataset_info = f'{ output .args [" dataset" ]} '
272
+ dataset_info = f'{ output .args [' dataset' ]} '
273
273
line = f'|{ output .name } |' \
274
- f'{ output .args [" model_type" ]} |' \
274
+ f'{ output .args [' model_type' ]} |' \
275
275
f'{ dataset_info } |' \
276
- f'{ output .args [" quant_method" ]} |' \
277
- f'{ output .args [" quant_bits" ]} |' \
276
+ f'{ output .args [' quant_method' ]} |' \
277
+ f'{ output .args [' quant_bits' ]} |' \
278
278
f'{ infer_speed } |' \
279
279
f'{ gsm8k_acc } |' \
280
280
f'{ arc_acc } |' \
0 commit comments