@@ -203,6 +203,8 @@ def from_model_architecture(model_architecture):
203
203
return CodeShellModel
204
204
if model_architecture == "OrionForCausalLM" :
205
205
return OrionModel
206
+ if model_architecture == "InternLM2ForCausalLM" :
207
+ return InternLM2Model
206
208
return Model
207
209
208
210
def _is_model_safetensors (self ) -> bool :
@@ -254,6 +256,8 @@ def _get_model_architecture(self) -> gguf.MODEL_ARCH:
254
256
return gguf .MODEL_ARCH .CODESHELL
255
257
if arch == "OrionForCausalLM" :
256
258
return gguf .MODEL_ARCH .ORION
259
+ if arch == "InternLM2ForCausalLM" :
260
+ return gguf .MODEL_ARCH .INTERNLM2
257
261
258
262
raise NotImplementedError (f'Architecture "{ arch } " not supported!' )
259
263
@@ -1344,6 +1348,154 @@ def write_tensors(self):
1344
1348
self .gguf_writer .add_tensor ("output.weight" , data )
1345
1349
print (name , f"=> output.weight, shape = { data .shape } , { old_dtype } --> { data .dtype } " )
1346
1350
1351
+
1352
+ class InternLM2Model (Model ):
1353
+ def set_vocab (self ):
1354
+ # (TODO): Is there a better way?
1355
+ # Copy from _set_vocab_sentencepiece, The only difference is that we will treat the character
1356
+ # \x00 specially and convert it into an emoji character to prevent it from being mistakenly
1357
+ # recognized as an empty string in C++.
1358
+ from sentencepiece import SentencePieceProcessor
1359
+ from sentencepiece import sentencepiece_model_pb2 as model
1360
+
1361
+ tokenizer_path = self .dir_model / 'tokenizer.model'
1362
+
1363
+ tokens : list [bytes ] = []
1364
+ scores : list [float ] = []
1365
+ toktypes : list [int ] = []
1366
+
1367
+ if not tokenizer_path .is_file ():
1368
+ print (f'Error: Missing { tokenizer_path } ' , file = sys .stderr )
1369
+ sys .exit (1 )
1370
+
1371
+ sentencepiece_model = model .ModelProto ()
1372
+ sentencepiece_model .ParseFromString (open (tokenizer_path , "rb" ).read ())
1373
+ add_prefix = sentencepiece_model .normalizer_spec .add_dummy_prefix
1374
+
1375
+ tokenizer = SentencePieceProcessor (str (tokenizer_path ))
1376
+ vocab_size = self .hparams .get ('vocab_size' , tokenizer .vocab_size ())
1377
+
1378
+ for token_id in range (vocab_size ):
1379
+ piece = tokenizer .id_to_piece (token_id )
1380
+ text = piece .encode ("utf-8" )
1381
+ score = tokenizer .get_score (token_id )
1382
+ if text == b"\x00 " :
1383
+ # (TODO): fixme
1384
+ # Hack here and replace the \x00 characters.
1385
+ print (f"InternLM2 convert token '{ text } ' to '🐉'!" )
1386
+ text = "🐉"
1387
+
1388
+ toktype = SentencePieceTokenTypes .NORMAL
1389
+ if tokenizer .is_unknown (token_id ):
1390
+ toktype = SentencePieceTokenTypes .UNKNOWN
1391
+ elif tokenizer .is_control (token_id ):
1392
+ toktype = SentencePieceTokenTypes .CONTROL
1393
+ elif tokenizer .is_unused (token_id ):
1394
+ toktype = SentencePieceTokenTypes .UNUSED
1395
+ elif tokenizer .is_byte (token_id ):
1396
+ toktype = SentencePieceTokenTypes .BYTE
1397
+
1398
+ tokens .append (text )
1399
+ scores .append (score )
1400
+ toktypes .append (toktype )
1401
+
1402
+ added_tokens_file = self .dir_model / 'added_tokens.json'
1403
+ if added_tokens_file .is_file ():
1404
+ with open (added_tokens_file , "r" , encoding = "utf-8" ) as f :
1405
+ added_tokens_json = json .load (f )
1406
+
1407
+ for key in added_tokens_json :
1408
+ tokens .append (key .encode ("utf-8" ))
1409
+ scores .append (- 1000.0 )
1410
+ toktypes .append (SentencePieceTokenTypes .USER_DEFINED )
1411
+
1412
+ self .gguf_writer .add_tokenizer_model ("llama" )
1413
+ self .gguf_writer .add_token_list (tokens )
1414
+ self .gguf_writer .add_token_scores (scores )
1415
+ self .gguf_writer .add_token_types (toktypes )
1416
+ self .gguf_writer .add_add_space_prefix (add_prefix )
1417
+
1418
+ special_vocab = gguf .SpecialVocab (self .dir_model , n_vocab = len (tokens ))
1419
+ special_vocab .add_to_gguf (self .gguf_writer )
1420
+
1421
+ def set_gguf_parameters (self ):
1422
+ self .gguf_writer .add_name ("InternLM2" )
1423
+ self .gguf_writer .add_context_length (self .hparams ["max_position_embeddings" ])
1424
+ self .gguf_writer .add_block_count (self .hparams ["num_hidden_layers" ])
1425
+ self .gguf_writer .add_embedding_length (self .hparams ["hidden_size" ])
1426
+ self .gguf_writer .add_feed_forward_length (self .hparams ["intermediate_size" ])
1427
+ self .gguf_writer .add_rope_freq_base (self .hparams ["rope_theta" ])
1428
+ self .gguf_writer .add_head_count (self .hparams ["num_attention_heads" ])
1429
+ self .gguf_writer .add_layer_norm_rms_eps (self .hparams ["rms_norm_eps" ])
1430
+ self .gguf_writer .add_head_count_kv (self .hparams ["num_key_value_heads" ])
1431
+
1432
+ def post_write_tensors (self , tensor_map , name , data_torch ):
1433
+ old_dtype = data_torch .dtype
1434
+
1435
+ # convert any unsupported data types to float32
1436
+ if data_torch .dtype not in (torch .float16 , torch .float32 ):
1437
+ data_torch = data_torch .to (torch .float32 )
1438
+
1439
+ data = data_torch .squeeze ().numpy ()
1440
+
1441
+ # map tensor names
1442
+ new_name = tensor_map .get_name (name , try_suffixes = (".weight" , ".bias" ))
1443
+ if new_name is None :
1444
+ print (f"Can not map tensor { name !r} " )
1445
+ sys .exit ()
1446
+
1447
+ n_dims = len (data .shape )
1448
+ data_dtype = data .dtype
1449
+
1450
+ # if f32 desired, convert any float16 to float32
1451
+ if self .ftype == 0 and data_dtype == np .float16 :
1452
+ data = data .astype (np .float32 )
1453
+
1454
+ # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
1455
+ if self .ftype == 1 and data_dtype == np .float16 and n_dims == 1 :
1456
+ data = data .astype (np .float32 )
1457
+
1458
+ # if f16 desired, convert any float32 2-dim weight tensors to float16
1459
+ if self .ftype == 1 and data_dtype == np .float32 and name .endswith (".weight" ) and n_dims == 2 :
1460
+ data = data .astype (np .float16 )
1461
+
1462
+ print (f"{ new_name } , n_dims = { n_dims } , { old_dtype } --> { data .dtype } " )
1463
+ self .gguf_writer .add_tensor (new_name , data )
1464
+
1465
+ def write_tensors (self ):
1466
+ from einops import rearrange
1467
+
1468
+ num_heads = self .hparams .get ("num_attention_heads" )
1469
+ num_kv_heads = self .hparams .get ("num_key_value_heads" )
1470
+ hidden_size = self .hparams .get ("hidden_size" )
1471
+ q_per_kv = num_heads // num_kv_heads
1472
+ head_dim = hidden_size // num_heads
1473
+ num_groups = num_heads // q_per_kv
1474
+
1475
+ block_count = self .hparams ["num_hidden_layers" ]
1476
+ model_kv = dict (self .get_tensors ())
1477
+ tensor_map = gguf .get_tensor_name_map (self .model_arch , block_count )
1478
+ qkv_pattern = r"model\.layers\.(\d+)\.attention\.wqkv"
1479
+ for name , data_torch in model_kv .items ():
1480
+ # we don't need these
1481
+ if name .endswith (".rotary_emb.inv_freq" ):
1482
+ continue
1483
+
1484
+ if re .match (qkv_pattern , name ):
1485
+ bid = re .findall (qkv_pattern , name )[0 ]
1486
+ qkv = data_torch
1487
+ qkv = rearrange (qkv .T , " o (g n i) ->o g n i" , g = num_groups , n = q_per_kv + 2 , i = head_dim )
1488
+ q , k , v = qkv [..., : q_per_kv , :], qkv [..., q_per_kv : q_per_kv + 1 , :], qkv [..., q_per_kv + 1 : q_per_kv + 2 , :]
1489
+ q = rearrange (q , " o g n i -> o (g n i)" ).T
1490
+ k = rearrange (k , " o g n i -> o (g n i)" ).T
1491
+ v = rearrange (v , " o g n i -> o (g n i)" ).T
1492
+ self .post_write_tensors (tensor_map , f"model.layers.{ bid } .attention.wq.weight" , q )
1493
+ self .post_write_tensors (tensor_map , f"model.layers.{ bid } .attention.wk.weight" , k )
1494
+ self .post_write_tensors (tensor_map , f"model.layers.{ bid } .attention.wv.weight" , v )
1495
+ else :
1496
+ self .post_write_tensors (tensor_map , name , data_torch )
1497
+
1498
+
1347
1499
###### CONVERSION LOGIC ######
1348
1500
1349
1501
0 commit comments