@@ -22,12 +22,8 @@ enum split_operation : uint8_t {
22
22
SPLIT_OP_MERGE,
23
23
};
24
24
25
- static const char * const LLM_KV_GENERAL_SPLIT_I_SPLIT = " general.split" ;
26
- static const char * const LLM_KV_GENERAL_SPLIT_N_SPLIT = " general.split_count" ;
27
-
28
- static const int SPLIT_FILENAME_MAX = 256 ;
29
-
30
- static const char * const SPLIT_FILENAME_FORMAT = " %s-%05d-of-%05d.gguf" ;
25
+ static const char * const LLM_KV_GENERAL_SPLIT_I_SPLIT = " split.no" ; // @ggerganov: should we make this accessible from outside ?
26
+ static const char * const LLM_KV_GENERAL_SPLIT_N_SPLIT = " split.count" ;
31
27
32
28
struct split_params {
33
29
split_operation operation = SPLIT_OP_SPLIT;
@@ -136,12 +132,6 @@ static void zeros(std::ofstream & file, size_t n) {
136
132
}
137
133
}
138
134
139
- static std::string split_file_name (const std::string & path, int i_split, int n_split) {
140
- char f_split[SPLIT_FILENAME_MAX] = {0 };
141
- snprintf (f_split, sizeof (f_split), SPLIT_FILENAME_FORMAT, path.c_str (), i_split + 1 , n_split);
142
- return std::string (f_split);
143
- }
144
-
145
135
struct split_strategy {
146
136
const split_params params;
147
137
std::ifstream & f_input;
@@ -182,19 +172,20 @@ struct split_strategy {
182
172
if (i_split == 0 ) {
183
173
gguf_set_kv (ctx_out, ctx_gguf);
184
174
}
185
- gguf_set_val_u8 (ctx_out, LLM_KV_GENERAL_SPLIT_I_SPLIT, i_split);
186
- gguf_set_val_u8 (ctx_out, LLM_KV_GENERAL_SPLIT_N_SPLIT, n_split);
175
+ gguf_set_val_u16 (ctx_out, LLM_KV_GENERAL_SPLIT_I_SPLIT, i_split);
176
+ gguf_set_val_u16 (ctx_out, LLM_KV_GENERAL_SPLIT_N_SPLIT, n_split);
187
177
188
178
// populate the original tensors, so we get an initial metadata
189
179
for (int i = i_split * params.n_split_tensors ; i < n_tensors && i < (i_split + 1 ) * params.n_split_tensors ; ++i) {
190
180
struct ggml_tensor * meta = ggml_get_tensor (ctx_meta, gguf_get_tensor_name (ctx_gguf, i));
191
181
gguf_add_tensor (ctx_out, meta);
192
182
}
193
183
194
- auto split_name = split_file_name (params.output , i_split, n_split);
184
+ char split_path[4096 ] = {0 };
185
+ llama_split_path (split_path, sizeof (split_path), params.output .c_str (), i_split, n_split);
195
186
196
- fprintf (stderr, " %s: %s ..." , __func__, split_name. c_str () );
197
- fout = std::ofstream (split_name , std::ios::binary);
187
+ fprintf (stderr, " %s: %s ..." , __func__, split_path );
188
+ fout = std::ofstream (split_path , std::ios::binary);
198
189
fout.exceptions (std::ofstream::failbit); // fail fast on write errors
199
190
200
191
auto meta_size = gguf_get_meta_size (ctx_out);
@@ -262,9 +253,13 @@ static void gguf_split(const split_params & split_params) {
262
253
}
263
254
264
255
split_strategy strategy (split_params, f_input, ctx_gguf, ctx_meta);
256
+
257
+ char first_split_path[4096 ] = {0 };
258
+ llama_split_path (first_split_path, sizeof (first_split_path),
259
+ split_params.output .c_str (), strategy.i_split , strategy.n_split );
265
260
fprintf (stderr, " %s: %s -> %s (%d tensors per file)\n " ,
266
261
__func__, split_params.input .c_str (),
267
- split_file_name (split_params. output , strategy. i_split , strategy. n_split ). c_str () ,
262
+ first_split_path ,
268
263
split_params.n_split_tensors );
269
264
270
265
strategy.split_start ();
@@ -300,7 +295,9 @@ static void gguf_merge(const split_params & split_params) {
300
295
std::vector<ggml_context *> ctx_metas;
301
296
std::vector<gguf_context *> ctx_ggufs;
302
297
303
- std::string split_prefix;
298
+ char split_path[4096 ] = {0 };
299
+ strncpy (split_path, split_params.input .c_str (), sizeof (split_path));
300
+ char split_prefix[4096 ] = {0 };
304
301
305
302
// First pass to find KV and tensors metadata
306
303
for (int i_split = 0 ; i_split < n_split; i_split++) {
@@ -311,13 +308,12 @@ static void gguf_merge(const split_params & split_params) {
311
308
/* .ctx = */ &ctx_meta,
312
309
};
313
310
314
- auto split_name = split_params.input ;
315
311
if (i_split > 0 ) {
316
- split_name = split_file_name ( split_prefix, i_split, n_split);
312
+ llama_split_path (split_path, sizeof (split_path), split_prefix, i_split, n_split);
317
313
}
318
- fprintf (stderr, " %s: reading metadata %s ..." , __func__, split_name. c_str () );
314
+ fprintf (stderr, " %s: reading metadata %s ..." , __func__, split_path );
319
315
320
- auto * ctx_gguf = gguf_init_from_file (split_name. c_str () , params);
316
+ auto * ctx_gguf = gguf_init_from_file (split_path , params);
321
317
if (!ctx_gguf) {
322
318
fprintf (stderr, " \n %s: failed to load input GGUF from %s\n " , __func__, split_params.input .c_str ());
323
319
exit (1 );
@@ -333,65 +329,43 @@ static void gguf_merge(const split_params & split_params) {
333
329
__func__,
334
330
LLM_KV_GENERAL_SPLIT_N_SPLIT);
335
331
gguf_free (ctx_gguf);
332
+ ggml_free (ctx_meta);
336
333
gguf_free (ctx_out);
337
334
fout.close ();
338
335
exit (1 );
339
336
}
340
337
341
- n_split = gguf_get_val_u8 (ctx_gguf, key_n_split);
338
+ n_split = gguf_get_val_u16 (ctx_gguf, key_n_split);
342
339
if (n_split < 1 ) {
343
340
fprintf (stderr,
344
341
" \n %s: input file does not contain a valid split count %d\n " ,
345
342
__func__,
346
343
n_split);
347
344
gguf_free (ctx_gguf);
345
+ ggml_free (ctx_meta);
348
346
gguf_free (ctx_out);
349
347
fout.close ();
350
348
exit (1 );
351
349
}
352
350
353
- // Do not trigger merge if we try to merge again the output
354
- gguf_set_val_u8 (ctx_out, LLM_KV_GENERAL_SPLIT_N_SPLIT, 0 );
355
-
356
- // Set metadata from the first split
357
- gguf_set_kv (ctx_out, ctx_gguf);
358
- }
359
-
360
- // Verify the file naming
361
- {
362
- int i_split_file = 0 ;
363
- int n_split_file = 0 ;
364
- const char * i_split_format = " -00000-of-00000.gguf" ;
365
-
366
- if (split_name.size () < strlen (i_split_format)) {
367
- fprintf (stderr, " \n %s: unexpected input file name: %s\n " , __func__, split_params.input .c_str ());
368
- for (auto * _ctx_gguf : ctx_ggufs) {
369
- gguf_free (_ctx_gguf);
370
- }
371
- gguf_free (ctx_out);
372
- fout.close ();
373
- exit (1 );
374
- }
375
-
376
- split_prefix = split_name.substr (0 , split_name.size () - strlen (i_split_format));
377
-
378
- const char * split_name_c_str = split_name.c_str ();
379
- int n_part = sscanf (&split_name_c_str[0 ] + split_prefix.size (), " -%d-of-%d" , &i_split_file, &n_split_file);
380
-
381
- if (n_part != 2 || i_split_file - 1 != i_split || n_split_file != n_split) {
351
+ // Verify the file naming and extract split_prefix
352
+ if (!llama_split_prefix (split_prefix, split_path, i_split, n_split)) {
382
353
fprintf (stderr, " \n %s: unexpected input file name: %s"
383
- " i_split=%d i_split_file=%d"
384
- " n_split=%d n_split_file=%d\n " , __func__,
385
- split_params.input .c_str (),
386
- i_split, i_split_file,
387
- n_split, n_split_file);
388
- for (auto * _ctx_gguf : ctx_ggufs) {
389
- gguf_free (_ctx_gguf);
390
- }
354
+ " i_split=%d"
355
+ " n_split=%d\n " , __func__,
356
+ split_path, i_split, n_split);
357
+ gguf_free (ctx_gguf);
358
+ ggml_free (ctx_meta);
391
359
gguf_free (ctx_out);
392
360
fout.close ();
393
361
exit (1 );
394
362
}
363
+
364
+ // Do not trigger merge if we try to merge again the output
365
+ gguf_set_val_u16 (ctx_gguf, LLM_KV_GENERAL_SPLIT_N_SPLIT, 0 );
366
+ +
367
+ // Set metadata from the first split
368
+ gguf_set_kv (ctx_out, ctx_gguf);
395
369
}
396
370
397
371
auto n_tensors = gguf_get_n_tensors (ctx_gguf);
@@ -413,18 +387,19 @@ static void gguf_merge(const split_params & split_params) {
413
387
414
388
// Write tensors data
415
389
for (int i_split = 0 ; i_split < n_split; i_split++) {
416
- auto split_name = split_file_name ( split_prefix, i_split, n_split);
417
- std::ifstream f_input (split_name. c_str () , std::ios::binary);
390
+ llama_split_path (split_path, sizeof (split_path), split_prefix, i_split, n_split);
391
+ std::ifstream f_input (split_path , std::ios::binary);
418
392
if (!f_input.is_open ()) {
419
- fprintf (stderr, " %s: failed to open input GGUF from %s\n " , __func__, split_name.c_str ());
420
- for (auto * _ctx_gguf : ctx_ggufs) {
421
- gguf_free (_ctx_gguf);
393
+ fprintf (stderr, " %s: failed to open input GGUF from %s\n " , __func__, split_path);
394
+ for (uint32_t i = 0 ; i < ctx_ggufs.size (); i++) {
395
+ gguf_free (ctx_ggufs[i]);
396
+ ggml_free (ctx_metas[i]);
422
397
}
423
398
gguf_free (ctx_out);
424
399
fout.close ();
425
400
exit (1 );
426
401
}
427
- fprintf (stderr, " %s: writing tensors %s ..." , __func__, split_name. c_str () );
402
+ fprintf (stderr, " %s: writing tensors %s ..." , __func__, split_path );
428
403
429
404
auto * ctx_gguf = ctx_ggufs[i_split];
430
405
auto * ctx_meta = ctx_metas[i_split];
0 commit comments