@@ -2468,85 +2468,22 @@ def set_gguf_parameters(self):
2468
2468
self .gguf_writer .add_rope_dimension_count (64 )
2469
2469
self .gguf_writer .add_add_bos_token (False )
2470
2470
2471
- def write_tensors (self ):
2472
- block_count = self .hparams ["num_layers" ]
2473
- tensors = dict (self .get_tensors ())
2474
- tensor_map = gguf .get_tensor_name_map (self .model_arch , block_count )
2475
- has_lm_head = True
2476
- n_head = self .hparams .get ("n_head" , self .hparams .get ("num_attention_heads" ))
2477
- n_embed = self .hparams .get ("hidden_size" , self .hparams .get ("n_embed" ))
2471
+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
2472
+ if name .endswith (".rotary_pos_emb.inv_freq" ):
2473
+ return []
2478
2474
2479
- for name , data_torch in tensors .items ():
2480
- if name .endswith (".rotary_pos_emb.inv_freq" ):
2481
- continue
2475
+ del bid # unused
2482
2476
2483
- if "lm_head.weight" not in tensors .keys () and "output.weight" not in tensors .keys ():
2484
- has_lm_head = False
2477
+ name = re .sub (r'transformer\.' , '' , name )
2485
2478
2486
- name = re . sub ( r'transformer\.' , '' , name )
2479
+ tensors : list [ tuple [ str , Tensor ]] = []
2487
2480
2488
- old_dtype = data_torch . dtype
2481
+ tensors . append (( self . map_tensor_name ( name ), data_torch ))
2489
2482
2490
- # convert any unsupported data types to float32
2491
- if data_torch .dtype not in (torch .float16 , torch .float32 ):
2492
- data_torch = data_torch .to (torch .float32 )
2483
+ if name == "word_embeddings.weight" :
2484
+ assert self .tensor_names is not None
2493
2485
2494
- data = data_torch .squeeze ().numpy ()
2495
-
2496
- if re .match (r"h\.\d+\.self_attention\.query_key_value\.weight" , name ):
2497
- # Map bloom-style qkv_linear to gpt-style qkv_linear
2498
- # bloom: https://github.com/huggingface/transformers/blob/main/src/transformers/models/bloom/modeling_bloom.py#L238-L252 # noqa
2499
- # gpt-2: https://github.com/huggingface/transformers/blob/main/src/transformers/models/gpt2/modeling_gpt2.py#L312 # noqa
2500
- qkv_weights = data .reshape ((n_head , 3 , n_embed // n_head , n_embed ))
2501
- data = np .concatenate (
2502
- (
2503
- qkv_weights [:, 0 , :, :].reshape ((- 1 , n_embed )),
2504
- qkv_weights [:, 1 , :, :].reshape ((- 1 , n_embed )),
2505
- qkv_weights [:, 2 , :, :].reshape ((- 1 , n_embed )),
2506
- ),
2507
- axis = 0 ,
2508
- )
2509
- print ("re-format attention.linear_qkv.weight" )
2510
- elif re .match (r"h\.\d+\.self_attention\.query_key_value\.bias" , name ):
2511
- qkv_bias = data .reshape ((n_head , 3 , n_embed // n_head ))
2512
- data = np .concatenate (
2513
- (
2514
- qkv_bias [:, 0 , :].reshape ((n_embed ,)),
2515
- qkv_bias [:, 1 , :].reshape ((n_embed ,)),
2516
- qkv_bias [:, 2 , :].reshape ((n_embed ,)),
2517
- ),
2518
- axis = 0 ,
2519
- )
2520
- print ("re-format attention.linear_qkv.bias" )
2521
-
2522
- # map tensor names
2523
- new_name = tensor_map .get_name (name , try_suffixes = (".weight" , ".bias" ))
2524
- if new_name is None :
2525
- print (f"Can not map tensor { name !r} " )
2526
- sys .exit ()
2527
-
2528
- n_dims = len (data .shape )
2529
- data_dtype = data .dtype
2530
-
2531
- # if f32 desired, convert any float16 to float32
2532
- if self .ftype == 0 and data_dtype == np .float16 :
2533
- data = data .astype (np .float32 )
2534
-
2535
- # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
2536
- if self .ftype == 1 and data_dtype == np .float16 and n_dims == 1 :
2537
- data = data .astype (np .float32 )
2538
-
2539
- # if f16 desired, convert any float32 2-dim weight tensors to float16
2540
- if self .ftype == 1 and data_dtype == np .float32 and name .endswith (".weight" ) and n_dims == 2 :
2541
- data = data .astype (np .float16 )
2542
-
2543
- print (f"=> { new_name } , shape = { data .shape } , { old_dtype } --> { data .dtype } " )
2544
-
2545
- self .gguf_writer .add_tensor (new_name , data )
2546
-
2547
- if not has_lm_head and name == "word_embeddings.weight" :
2548
- self .gguf_writer .add_tensor ("output.weight" , data )
2549
- print (name , f"=> output.weight, shape = { data .shape } , { old_dtype } --> { data .dtype } " )
2486
+ return tensors
2550
2487
2551
2488
2552
2489
###### CONVERSION LOGIC ######
0 commit comments