1
1
#include " ggml.h"
2
2
3
3
#include < cstdio>
4
+ #include < cinttypes>
4
5
#include < string>
6
+ #include < sstream>
7
+ #include < fstream>
8
+ #include < vector>
5
9
6
- bool gguf_write (const std::string & fname) {
10
+ enum gguf_type {
11
+ GGUF_TYPE_UINT8 = 0 ,
12
+ GGUF_TYPE_INT8 = 1 ,
13
+ GGUF_TYPE_UINT16 = 2 ,
14
+ GGUF_TYPE_INT16 = 3 ,
15
+ GGUF_TYPE_UINT32 = 4 ,
16
+ GGUF_TYPE_INT32 = 5 ,
17
+ GGUF_TYPE_FLOAT32 = 6 ,
18
+ GGUF_TYPE_BOOL = 7 ,
19
+ GGUF_TYPE_STRING = 8 ,
20
+ GGUF_TYPE_ARRAY = 9 ,
21
+ };
7
22
23
+ template <typename T>
24
+ static std::string to_string (const T & val) {
25
+ std::stringstream ss;
26
+ ss << val;
27
+ return ss.str ();
28
+ }
29
+
30
+ void gguf_ex_write_str (std::ofstream & fout, const std::string & val) {
31
+ const int32_t n = val.size ();
32
+ fout.write ((const char *) &n, sizeof (n));
33
+ fout.write (val.c_str (), n);
34
+ }
35
+
36
+ void gguf_ex_write_i32 (std::ofstream & fout, int32_t val) {
37
+ fout.write ((const char *) &val, sizeof (val));
38
+ }
39
+
40
+ void gguf_ex_write_u64 (std::ofstream & fout, size_t val) {
41
+ fout.write ((const char *) &val, sizeof (val));
42
+ }
43
+
44
+ template <typename T>
45
+ void gguf_ex_write_param (std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) {
46
+ gguf_ex_write_str (fout, key);
47
+ fout.write ((const char *) &type, sizeof (type));
48
+ fout.write ((const char *) &val, sizeof (val));
49
+
50
+ fprintf (stdout, " %s: write param: %s = %s\n " , __func__, key.c_str (), to_string (val).c_str ());
51
+ }
52
+
53
+ template <>
54
+ void gguf_ex_write_param<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) {
55
+ gguf_ex_write_str (fout, key);
56
+ fout.write ((const char *) &type, sizeof (type));
57
+
58
+ const int32_t n = val.size ();
59
+ fout.write ((const char *) &n, sizeof (n));
60
+ fout.write (val.c_str (), n);
61
+ }
62
+
63
+ bool gguf_ex_write (const std::string & fname) {
64
+ std::ofstream fout (fname.c_str (), std::ios::binary);
65
+
66
+ {
67
+ const int32_t magic = GGUF_MAGIC;
68
+ fout.write ((const char *) &magic, sizeof (magic));
69
+ }
70
+
71
+ {
72
+ const int32_t version = GGUF_VERSION;
73
+ fout.write ((const char *) &version, sizeof (version));
74
+ }
75
+
76
+ const int n_tensors = 10 ;
77
+ const int n_kv = 9 ;
78
+
79
+ fout.write ((const char *) &n_tensors, sizeof (n_tensors));
80
+ fout.write ((const char *) &n_kv, sizeof (n_kv));
81
+
82
+ fprintf (stdout, " %s: write header\n " , __func__);
83
+
84
+ // kv data
85
+ {
86
+ gguf_ex_write_param< uint8_t >(fout, " some.parameter.uint8" , GGUF_TYPE_UINT8, 0x12 );
87
+ gguf_ex_write_param< int8_t >(fout, " some.parameter.int8" , GGUF_TYPE_INT8, -0x13 );
88
+ gguf_ex_write_param<uint16_t >(fout, " some.parameter.uint16" , GGUF_TYPE_UINT16, 0x1234 );
89
+ gguf_ex_write_param< int16_t >(fout, " some.parameter.int16" , GGUF_TYPE_INT16, -0x1235 );
90
+ gguf_ex_write_param<uint32_t >(fout, " some.parameter.uint32" , GGUF_TYPE_UINT32, 0x12345678 );
91
+ gguf_ex_write_param< int32_t >(fout, " some.parameter.int32" , GGUF_TYPE_INT32, -0x12345679 );
92
+
93
+ gguf_ex_write_param<float > (fout, " some.parameter.float32" , GGUF_TYPE_FLOAT32, 0 .123456789f );
94
+ gguf_ex_write_param<bool > (fout, " some.parameter.bool" , GGUF_TYPE_BOOL, true );
95
+
96
+ gguf_ex_write_param<std::string>(fout, " some.parameter.string" , GGUF_TYPE_STRING, " hello world" );
97
+ }
98
+
99
+ uint64_t offset_tensor = 0 ;
100
+
101
+ struct ggml_init_params params = {
102
+ /* .mem_size =*/ 128ull *1024ull *1024ull ,
103
+ /* .mem_buffer =*/ NULL ,
104
+ /* .no_alloc =*/ false ,
105
+ };
106
+
107
+ struct ggml_context * ctx_data = ggml_init (params);
108
+
109
+ // tensor infos
110
+ for (int i = 0 ; i < n_tensors; ++i) {
111
+ const std::string name = " tensor_" + to_string (i);
112
+
113
+ int64_t ne[GGML_MAX_DIMS] = { 1 };
114
+ int32_t n_dims = rand () % GGML_MAX_DIMS + 1 ;
115
+
116
+ for (int j = 0 ; j < n_dims; ++j) {
117
+ ne[j] = rand () % 10 + 1 ;
118
+ }
119
+
120
+ struct ggml_tensor * cur = ggml_new_tensor (ctx_data, GGML_TYPE_F32, n_dims, ne);
121
+ ggml_set_name (cur, name.c_str ());
122
+
123
+ {
124
+ float * data = (float *) cur->data ;
125
+ for (int j = 0 ; j < ggml_nelements (cur); ++j) {
126
+ data[j] = 100 + i;
127
+ }
128
+ }
129
+
130
+ fprintf (stdout, " %s: tensor: %s, %d dims, ne = [" , __func__, name.c_str (), n_dims);
131
+ for (int j = 0 ; j < 4 ; ++j) {
132
+ fprintf (stdout, " %s%3d" , j == 0 ? " " : " , " , (int ) cur->ne [j]);
133
+ }
134
+ fprintf (stdout, " ], offset_tensor = %6" PRIu64 " \n " , offset_tensor);
135
+
136
+ gguf_ex_write_str (fout, name);
137
+ gguf_ex_write_i32 (fout, n_dims);
138
+ for (int j = 0 ; j < n_dims; ++j) {
139
+ gguf_ex_write_i32 (fout, cur->ne [j]);
140
+ }
141
+ gguf_ex_write_i32 (fout, cur->type );
142
+ gguf_ex_write_u64 (fout, offset_tensor);
143
+
144
+ offset_tensor += GGML_PAD (ggml_nbytes (cur), GGUF_DEFAULT_ALIGNMENT);
145
+ }
146
+
147
+ const uint64_t offset_data = GGML_PAD ((uint64_t ) fout.tellp (), GGUF_DEFAULT_ALIGNMENT);
148
+
149
+ fprintf (stdout, " %s: data offset = %" PRIu64 " \n " , __func__, offset_data);
150
+
151
+ {
152
+ const size_t pad = offset_data - fout.tellp ();
153
+
154
+ for (size_t j = 0 ; j < pad; ++j) {
155
+ fout.put (0 );
156
+ }
157
+ }
158
+
159
+ for (int i = 0 ; i < n_tensors; ++i) {
160
+ fprintf (stdout, " %s: writing tensor %d data\n " , __func__, i);
161
+
162
+ const std::string name = " tensor_" + to_string (i);
163
+
164
+ struct ggml_tensor * cur = ggml_get_tensor (ctx_data, name.c_str ());
165
+
166
+ fout.write ((const char *) cur->data , ggml_nbytes (cur));
167
+
168
+ {
169
+ const size_t pad = GGML_PAD (ggml_nbytes (cur), GGUF_DEFAULT_ALIGNMENT) - ggml_nbytes (cur);
170
+
171
+ for (size_t j = 0 ; j < pad; ++j) {
172
+ fout.put (0 );
173
+ }
174
+ }
175
+ }
176
+
177
+ fout.close ();
178
+
179
+ fprintf (stdout, " %s: wrote file '%s;\n " , __func__, fname.c_str ());
180
+
181
+ ggml_free (ctx_data);
182
+
183
+ return true ;
184
+ }
185
+
186
+ // just read tensor info
187
+ bool gguf_ex_read_0 (const std::string & fname) {
188
+ struct gguf_init_params params = {
189
+ /* .no_alloc = */ false ,
190
+ /* .ctx = */ NULL ,
191
+ };
192
+
193
+ struct gguf_context * ctx = gguf_init_from_file (fname.c_str (), params);
194
+
195
+ fprintf (stdout, " %s: version: %d\n " , __func__, gguf_get_version (ctx));
196
+ fprintf (stdout, " %s: alignment: %zu\n " , __func__, gguf_get_alignment (ctx));
197
+ fprintf (stdout, " %s: data offset: %zu\n " , __func__, gguf_get_data_offset (ctx));
198
+
199
+ // kv
200
+ {
201
+ const int n_kv = gguf_get_n_kv (ctx);
202
+
203
+ fprintf (stdout, " %s: n_kv: %d\n " , __func__, n_kv);
204
+
205
+ for (int i = 0 ; i < n_kv; ++i) {
206
+ const char * key = gguf_get_key (ctx, i);
207
+
208
+ fprintf (stdout, " %s: kv[%d]: key = %s\n " , __func__, i, key);
209
+ }
210
+ }
211
+
212
+ // tensor info
213
+ {
214
+ const int n_tensors = gguf_get_n_tensors (ctx);
215
+
216
+ fprintf (stdout, " %s: n_tensors: %d\n " , __func__, n_tensors);
217
+
218
+ for (int i = 0 ; i < n_tensors; ++i) {
219
+ const char * name = gguf_get_tensor_name (ctx, i);
220
+ const size_t offset = gguf_get_tensor_offset (ctx, i);
221
+
222
+ fprintf (stdout, " %s: tensor[%d]: name = %s, offset = %zu\n " , __func__, i, name, offset);
223
+ }
224
+ }
225
+
226
+ return true ;
227
+ }
228
+
229
+ // read and create ggml_context containing the tensors and their data
230
+ bool gguf_ex_read_1 (const std::string & fname) {
231
+ struct ggml_context * ctx_data = NULL ;
232
+
233
+ struct gguf_init_params params = {
234
+ /* .no_alloc = */ false ,
235
+ /* .ctx = */ &ctx_data,
236
+ };
237
+
238
+ struct gguf_context * ctx = gguf_init_from_file (fname.c_str (), params);
239
+
240
+ fprintf (stdout, " %s: version: %d\n " , __func__, gguf_get_version (ctx));
241
+ fprintf (stdout, " %s: alignment: %zu\n " , __func__, gguf_get_alignment (ctx));
242
+ fprintf (stdout, " %s: data offset: %zu\n " , __func__, gguf_get_data_offset (ctx));
243
+
244
+ // kv
245
+ {
246
+ const int n_kv = gguf_get_n_kv (ctx);
247
+
248
+ fprintf (stdout, " %s: n_kv: %d\n " , __func__, n_kv);
249
+
250
+ for (int i = 0 ; i < n_kv; ++i) {
251
+ const char * key = gguf_get_key (ctx, i);
252
+
253
+ fprintf (stdout, " %s: kv[%d]: key = %s\n " , __func__, i, key);
254
+ }
255
+ }
256
+
257
+ // tensor info
258
+ {
259
+ const int n_tensors = gguf_get_n_tensors (ctx);
260
+
261
+ fprintf (stdout, " %s: n_tensors: %d\n " , __func__, n_tensors);
262
+
263
+ for (int i = 0 ; i < n_tensors; ++i) {
264
+ const char * name = gguf_get_tensor_name (ctx, i);
265
+ const size_t offset = gguf_get_tensor_offset (ctx, i);
266
+
267
+ fprintf (stdout, " %s: tensor[%d]: name = %s, offset = %zu\n " , __func__, i, name, offset);
268
+ }
269
+ }
270
+
271
+ // data
272
+ {
273
+ const int n_tensors = gguf_get_n_tensors (ctx);
274
+
275
+ for (int i = 0 ; i < n_tensors; ++i) {
276
+ fprintf (stdout, " %s: reading tensor %d data\n " , __func__, i);
277
+
278
+ const std::string name = " tensor_" + to_string (i);
279
+
280
+ struct ggml_tensor * cur = ggml_get_tensor (ctx_data, name.c_str ());
281
+
282
+ fprintf (stdout, " %s: tensor[%d]: n_dims = %d, name = %s, data = %p\n " ,
283
+ __func__, i, cur->n_dims , cur->name , cur->data );
284
+
285
+ // check data
286
+ {
287
+ const float * data = (const float *) cur->data ;
288
+ for (int j = 0 ; j < ggml_nelements (cur); ++j) {
289
+ if (data[j] != 100 + i) {
290
+ fprintf (stderr, " %s: tensor[%d]: data[%d] = %f\n " , __func__, i, j, data[j]);
291
+ return false ;
292
+ }
293
+ }
294
+ }
295
+ }
296
+ }
297
+
298
+ fprintf (stdout, " %s: ctx_data size: %zu\n " , __func__, ggml_get_mem_size (ctx_data));
299
+
300
+ ggml_free (ctx_data);
301
+ gguf_free (ctx);
8
302
9
303
return true ;
10
304
}
11
305
12
- bool gguf_read (const std::string & fname) {
306
+ // read just the tensor info and mmap the data in user code
307
+ bool gguf_ex_read_2 (const std::string & fname) {
308
+ struct ggml_context * ctx_data = NULL ;
309
+
310
+ struct gguf_init_params params = {
311
+ /* .no_alloc = */ true ,
312
+ /* .ctx = */ &ctx_data,
313
+ };
314
+
315
+ struct gguf_context * ctx = gguf_init_from_file (fname.c_str (), params);
316
+
317
+ // TODO: mmap based on tensor infos
318
+
319
+ fprintf (stdout, " %s: ctx_data size: %zu\n " , __func__, ggml_get_mem_size (ctx_data));
320
+
321
+ ggml_free (ctx_data);
322
+ gguf_free (ctx);
323
+
13
324
return true ;
14
325
}
15
326
@@ -20,14 +331,16 @@ int main(int argc, char ** argv) {
20
331
}
21
332
22
333
const std::string fname (argv[1 ]);
23
- const std::string mode (argv[2 ]);
334
+ const std::string mode (argv[2 ]);
24
335
25
336
GGML_ASSERT ((mode == " r" || mode == " w" ) && " mode must be r or w" );
26
337
27
338
if (mode == " w" ) {
28
- GGML_ASSERT (gguf_write (fname) && " failed to write gguf file" );
339
+ GGML_ASSERT (gguf_ex_write (fname) && " failed to write gguf file" );
29
340
} else if (mode == " r" ) {
30
- GGML_ASSERT (gguf_read (fname) && " failed to read gguf file" );
341
+ GGML_ASSERT (gguf_ex_read_0 (fname) && " failed to read gguf file" );
342
+ GGML_ASSERT (gguf_ex_read_1 (fname) && " failed to read gguf file" );
343
+ GGML_ASSERT (gguf_ex_read_2 (fname) && " failed to read gguf file" );
31
344
}
32
345
33
346
return 0 ;
0 commit comments