1
- /*
2
- License: MIT License
3
-
4
- Changelog:
5
- - 2023-03-31 Initial version by Sebastian Apel (https://github.com/SebastianApel)
6
-
7
- */
8
-
9
1
#include < locale.h>
10
2
#include " ggml.h"
11
3
#include < assert.h>
@@ -45,7 +37,7 @@ float tensor_sum_elements(struct ggml_tensor * tensor) {
45
37
46
38
#define TENSOR_TYPE_AS_STR (TYPE ) TYPE == GGML_TYPE_F32 ? " FP32" : TYPE == GGML_TYPE_F16 ? " FP16" : TYPE == GGML_TYPE_Q4_0 ? " Q4_0" : TYPE == GGML_TYPE_Q4_1 ? " Q4_1" : " UNKNOWN"
47
39
48
- #define TENSOR_DUMP (TENSOR ) printf("%15s: type = %i (%5s) ne = %5d x %5d x %5d , nb = (%5li, %5li, %5li) - ", #TENSOR, \
40
+ #define TENSOR_DUMP (TENSOR ) printf(" %15s: type = %i (%5s) ne = %5ld x %5ld x %5ld , nb = (%5li, %5li, %5li) - " , #TENSOR, \
49
41
TENSOR->type,TENSOR_TYPE_AS_STR(TENSOR->type),\
50
42
TENSOR->ne[0 ], TENSOR->ne[1 ], TENSOR->ne[2 ], TENSOR->nb[0 ], TENSOR->nb[1 ], TENSOR->nb[2 ]); \
51
43
{ float sum = tensor_sum_elements (TENSOR); printf (" Sum of tensor %s is %6.2f\n " ,#TENSOR, sum); }
@@ -98,12 +90,9 @@ int main(int argc, char ** argv) {
98
90
}
99
91
}
100
92
101
-
102
93
// create the ggml context
103
94
printf (" Starting Test\n " );
104
95
105
-
106
-
107
96
struct ggml_context * ctx;
108
97
// const int sizex = 4096;
109
98
// const int sizey = 11008;
@@ -125,16 +114,18 @@ int main(int argc, char ** argv) {
125
114
#endif
126
115
127
116
// printf("Memsize required = %i\n", sizex*sizex);
128
- ggml_type wtype = GGML_TYPE_F32 ;
129
117
130
118
size_t ctx_size = 0 ;
131
- ctx_size += sizex * sizey * ggml_type_sizef (wtype );
132
- ctx_size += sizex * sizey * ggml_type_sizef (wtype );
133
119
ctx_size += sizex*sizey*ggml_type_sizef (GGML_TYPE_F32);
134
- ctx_size += sizex * sizeof (float );
135
- ctx_size += 1024 * 1024 * 100 ;
120
+ ctx_size += sizex*sizey*ggml_type_sizef (GGML_TYPE_F32);
121
+ ctx_size += sizex*sizez*ggml_type_sizef (GGML_TYPE_F32);
122
+ ctx_size += sizex*sizey*ggml_type_sizef (GGML_TYPE_Q4_0);
123
+ ctx_size += sizex*sizey*ggml_type_sizef (GGML_TYPE_Q4_0);
124
+ ctx_size += sizex*sizey*ggml_type_sizef (GGML_TYPE_F32); // BLAS
125
+ ctx_size += sizex*sizey*ggml_type_sizef (GGML_TYPE_F32); // BLAS
126
+ ctx_size += 1024 *1024 *16 ;
136
127
137
- printf ("Allocating Memory of size %li byes , %li MB\n" ,ctx_size , (ctx_size /1024 /1024 ));
128
+ printf (" Allocating Memory of size %li bytes , %li MB\n " ,ctx_size, (ctx_size/1024 /1024 ));
138
129
139
130
struct ggml_init_params params = {
140
131
/* .mem_size =*/ ctx_size,
@@ -217,7 +208,7 @@ int main(int argc, char ** argv) {
217
208
const int dimz = sizez;
218
209
long long int flops_per_dot_product = dimy + dimy;
219
210
long long int flops_per_matrix = flops_per_dot_product * dimx * dimz; ;
220
- printf ("Matrix Multiplication of (%i,%i,%i) x (%i,%i,%i) - aboout %6.2f gFLOPS\n\n" , sizex , sizey , 1 , sizex , sizez , 1 , 1.0f * flops_per_matrix / 1000 / 1000 / 1000 );
211
+ printf (" Matrix Multiplication of (%i,%i,%i) x (%i,%i,%i) - about %6.2f gFLOPS\n\n " , sizex, sizey, 1 , sizex, sizez, 1 , 1 .0f *flops_per_matrix / 1000 / 1000 / 1000 );
221
212
222
213
223
214
// Let's use the F32 result from above as a reference for the q4_0 multiplication
@@ -234,7 +225,6 @@ int main(int argc, char ** argv) {
234
225
ggml_graph_compute (ctx, &gf31);
235
226
long long int stop = ggml_time_us ();
236
227
long long int usec = stop-start;
237
- float sec = usec /1000000 ;
238
228
float flops_per_usec = (1 .0f *flops_per_matrix)/usec;
239
229
printf (" %9i;%8i;%6i;%6i;%6i;%15lli;%18lli;%19.2f\n " ,
240
230
i,
0 commit comments