1- /*
2- License: MIT License
3-
4- Changelog:
5- - 2023-03-31 Initial version by Sebastian Apel (https://github.com/SebastianApel)
6-
7- */
8-
91#include < locale.h>
102#include " ggml.h"
113#include < assert.h>
@@ -45,7 +37,7 @@ float tensor_sum_elements(struct ggml_tensor * tensor) {
4537
4638#define TENSOR_TYPE_AS_STR (TYPE ) TYPE == GGML_TYPE_F32 ? " FP32" : TYPE == GGML_TYPE_F16 ? " FP16" : TYPE == GGML_TYPE_Q4_0 ? " Q4_0" : TYPE == GGML_TYPE_Q4_1 ? " Q4_1" : " UNKNOWN"
4739
48- #define TENSOR_DUMP (TENSOR ) printf("%15s: type = %i (%5s) ne = %5d x %5d x %5d , nb = (%5li, %5li, %5li) - ", #TENSOR, \
40+ #define TENSOR_DUMP (TENSOR ) printf(" %15s: type = %i (%5s) ne = %5ld x %5ld x %5ld , nb = (%5li, %5li, %5li) - " , #TENSOR, \
4941 TENSOR->type,TENSOR_TYPE_AS_STR(TENSOR->type),\
5042 TENSOR->ne[0 ], TENSOR->ne[1 ], TENSOR->ne[2 ], TENSOR->nb[0 ], TENSOR->nb[1 ], TENSOR->nb[2 ]); \
5143 { float sum = tensor_sum_elements (TENSOR); printf (" Sum of tensor %s is %6.2f\n " ,#TENSOR, sum); }
@@ -98,12 +90,9 @@ int main(int argc, char ** argv) {
9890 }
9991 }
10092
101-
10293 // create the ggml context
10394 printf (" Starting Test\n " );
10495
105-
106-
10796 struct ggml_context * ctx;
10897 // const int sizex = 4096;
10998 // const int sizey = 11008;
@@ -125,16 +114,18 @@ int main(int argc, char ** argv) {
125114#endif
126115
127116 // printf("Memsize required = %i\n", sizex*sizex);
128- ggml_type wtype = GGML_TYPE_F32 ;
129117
130118 size_t ctx_size = 0 ;
131- ctx_size += sizex * sizey * ggml_type_sizef (wtype );
132- ctx_size += sizex * sizey * ggml_type_sizef (wtype );
133119 ctx_size += sizex*sizey*ggml_type_sizef (GGML_TYPE_F32);
134- ctx_size += sizex * sizeof (float );
135- ctx_size += 1024 * 1024 * 100 ;
120+ ctx_size += sizex*sizey*ggml_type_sizef (GGML_TYPE_F32);
121+ ctx_size += sizex*sizez*ggml_type_sizef (GGML_TYPE_F32);
122+ ctx_size += sizex*sizey*ggml_type_sizef (GGML_TYPE_Q4_0);
123+ ctx_size += sizex*sizey*ggml_type_sizef (GGML_TYPE_Q4_0);
124+ ctx_size += sizex*sizey*ggml_type_sizef (GGML_TYPE_F32); // BLAS
125+ ctx_size += sizex*sizey*ggml_type_sizef (GGML_TYPE_F32); // BLAS
126+ ctx_size += 1024 *1024 *16 ;
136127
137- printf ("Allocating Memory of size %li byes , %li MB\n" ,ctx_size , (ctx_size /1024 /1024 ));
128+ printf (" Allocating Memory of size %li bytes , %li MB\n " ,ctx_size, (ctx_size/1024 /1024 ));
138129
139130 struct ggml_init_params params = {
140131 /* .mem_size =*/ ctx_size,
@@ -217,7 +208,7 @@ int main(int argc, char ** argv) {
217208 const int dimz = sizez;
218209 long long int flops_per_dot_product = dimy + dimy;
219210 long long int flops_per_matrix = flops_per_dot_product * dimx * dimz; ;
220- printf ("Matrix Multiplication of (%i,%i,%i) x (%i,%i,%i) - aboout %6.2f gFLOPS\n\n" , sizex , sizey , 1 , sizex , sizez , 1 , 1.0f * flops_per_matrix / 1000 / 1000 / 1000 );
211+ printf (" Matrix Multiplication of (%i,%i,%i) x (%i,%i,%i) - about %6.2f gFLOPS\n\n " , sizex, sizey, 1 , sizex, sizez, 1 , 1 .0f *flops_per_matrix / 1000 / 1000 / 1000 );
221212
222213
223214 // Let's use the F32 result from above as a reference for the q4_0 multiplication
@@ -234,7 +225,6 @@ int main(int argc, char ** argv) {
234225 ggml_graph_compute (ctx, &gf31);
235226 long long int stop = ggml_time_us ();
236227 long long int usec = stop-start;
237- float sec = usec /1000000 ;
238228 float flops_per_usec = (1 .0f *flops_per_matrix)/usec;
239229 printf (" %9i;%8i;%6i;%6i;%6i;%15lli;%18lli;%19.2f\n " ,
240230 i,
0 commit comments