@@ -3494,7 +3494,7 @@ static bool GGML_IS_QUANTIZED[GGML_TYPE_COUNT] = {
3494
3494
};
3495
3495
static_assert (GGML_TYPE_COUNT == 13 , "GGML_IS_QUANTIZED is outdated" );
3496
3496
3497
- static const char * GGML_OP_LABEL [GGML_OP_COUNT ] = {
3497
+ static const char * GGML_OP_NAME [GGML_OP_COUNT ] = {
3498
3498
"NONE" ,
3499
3499
3500
3500
"DUP" ,
@@ -3749,6 +3749,9 @@ const char * ggml_type_name(enum ggml_type type) {
3749
3749
return GGML_TYPE_NAME [type ];
3750
3750
}
3751
3751
3752
+ const char * ggml_op_name (enum ggml_op op ) {
3753
+ return GGML_OP_NAME [op ];
3754
+ }
3752
3755
3753
3756
size_t ggml_element_size (const struct ggml_tensor * tensor ) {
3754
3757
return GGML_TYPE_SIZE [tensor -> type ];
@@ -3805,6 +3808,10 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
3805
3808
return wtype ;
3806
3809
}
3807
3810
3811
+ size_t ggml_tensor_overhead (void ) {
3812
+ return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE + 16 ;
3813
+ }
3814
+
3808
3815
static inline bool ggml_is_transposed (const struct ggml_tensor * tensor ) {
3809
3816
return tensor -> nb [0 ] > tensor -> nb [1 ];
3810
3817
}
@@ -4017,6 +4024,10 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch)
4017
4024
return result ;
4018
4025
}
4019
4026
4027
+ void ggml_set_no_alloc (struct ggml_context * ctx , bool no_alloc ) {
4028
+ ctx -> no_alloc = no_alloc ;
4029
+ }
4030
+
4020
4031
// IMPORTANT:
4021
4032
// when creating "opt" tensors, always save and load the scratch buffer
4022
4033
// this is an error prone process, but it is necessary to support inplace
@@ -4061,7 +4072,7 @@ struct ggml_tensor * ggml_new_tensor_impl(
4061
4072
struct ggml_object * const obj_new = (struct ggml_object * )(mem_buffer + cur_end );
4062
4073
4063
4074
if (ctx -> scratch .data == NULL || data != NULL ) {
4064
- size_needed += sizeof ( struct ggml_tensor ) ;
4075
+ size_needed += GGML_TENSOR_SIZE ;
4065
4076
4066
4077
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx -> mem_size ) {
4067
4078
GGML_PRINT ("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n" ,
@@ -4077,14 +4088,15 @@ struct ggml_tensor * ggml_new_tensor_impl(
4077
4088
};
4078
4089
} else {
4079
4090
if (ctx -> scratch .offs + size_needed > ctx -> scratch .size ) {
4080
- GGML_PRINT ("%s: not enough space in the scratch memory\n" , __func__ );
4091
+ GGML_PRINT ("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n" ,
4092
+ __func__ , ctx -> scratch .offs + size_needed , ctx -> scratch .size );
4081
4093
assert (false);
4082
4094
return NULL ;
4083
4095
}
4084
4096
4085
- if (cur_end + sizeof ( struct ggml_tensor ) + GGML_OBJECT_SIZE > ctx -> mem_size ) {
4097
+ if (cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE > ctx -> mem_size ) {
4086
4098
GGML_PRINT ("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n" ,
4087
- __func__ , cur_end + sizeof ( struct ggml_tensor ) + GGML_OBJECT_SIZE , ctx -> mem_size );
4099
+ __func__ , cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE , ctx -> mem_size );
4088
4100
assert (false);
4089
4101
return NULL ;
4090
4102
}
@@ -4093,7 +4105,7 @@ struct ggml_tensor * ggml_new_tensor_impl(
4093
4105
4094
4106
* obj_new = (struct ggml_object ) {
4095
4107
.offs = cur_end + GGML_OBJECT_SIZE ,
4096
- .size = sizeof ( struct ggml_tensor ) ,
4108
+ .size = GGML_TENSOR_SIZE ,
4097
4109
.next = NULL ,
4098
4110
};
4099
4111
@@ -13792,11 +13804,19 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
13792
13804
// reached a leaf node, not part of the gradient graph (e.g. a constant)
13793
13805
GGML_ASSERT (cgraph -> n_leafs < GGML_MAX_NODES );
13794
13806
13807
+ if (strlen (node -> name ) == 0 ) {
13808
+ snprintf (node -> name , sizeof (node -> name ), "leaf_%d" , cgraph -> n_leafs );
13809
+ }
13810
+
13795
13811
cgraph -> leafs [cgraph -> n_leafs ] = node ;
13796
13812
cgraph -> n_leafs ++ ;
13797
13813
} else {
13798
13814
GGML_ASSERT (cgraph -> n_nodes < GGML_MAX_NODES );
13799
13815
13816
+ if (strlen (node -> name ) == 0 ) {
13817
+ snprintf (node -> name , sizeof (node -> name ), "node_%d" , cgraph -> n_nodes );
13818
+ }
13819
+
13800
13820
cgraph -> nodes [cgraph -> n_nodes ] = node ;
13801
13821
cgraph -> grads [cgraph -> n_nodes ] = node -> grad ;
13802
13822
cgraph -> n_nodes ++ ;
@@ -14510,6 +14530,26 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
14510
14530
}
14511
14531
}
14512
14532
14533
+ struct ggml_tensor * ggml_get_tensor_by_name (struct ggml_cgraph * cgraph , const char * name ) {
14534
+ for (int i = 0 ; i < cgraph -> n_leafs ; i ++ ) {
14535
+ struct ggml_tensor * leaf = cgraph -> leafs [i ];
14536
+
14537
+ if (strcmp (leaf -> name , name ) == 0 ) {
14538
+ return leaf ;
14539
+ }
14540
+ }
14541
+
14542
+ for (int i = 0 ; i < cgraph -> n_nodes ; i ++ ) {
14543
+ struct ggml_tensor * node = cgraph -> nodes [i ];
14544
+
14545
+ if (strcmp (node -> name , name ) == 0 ) {
14546
+ return node ;
14547
+ }
14548
+ }
14549
+
14550
+ return NULL ;
14551
+ }
14552
+
14513
14553
void ggml_graph_print (const struct ggml_cgraph * cgraph ) {
14514
14554
int64_t perf_total_per_op_us [GGML_OP_COUNT ] = {0 };
14515
14555
@@ -14527,7 +14567,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
14527
14567
GGML_PRINT (" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n" ,
14528
14568
i ,
14529
14569
node -> ne [0 ], node -> ne [1 ], node -> ne [2 ],
14530
- GGML_OP_LABEL [node -> op ], node -> is_param ? "x" : node -> grad ? "g" : " " , node -> perf_runs ,
14570
+ GGML_OP_NAME [node -> op ], node -> is_param ? "x" : node -> grad ? "g" : " " , node -> perf_runs ,
14531
14571
(double ) node -> perf_cycles / (double ) ggml_cycles_per_ms (),
14532
14572
(double ) node -> perf_cycles / (double ) ggml_cycles_per_ms () / (double ) node -> perf_runs ,
14533
14573
(double ) node -> perf_time_us / 1000.0 ,
@@ -14541,15 +14581,15 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
14541
14581
GGML_PRINT (" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n" ,
14542
14582
i ,
14543
14583
node -> ne [0 ], node -> ne [1 ],
14544
- GGML_OP_LABEL [node -> op ]);
14584
+ GGML_OP_NAME [node -> op ]);
14545
14585
}
14546
14586
14547
14587
for (int i = 0 ; i < GGML_OP_COUNT ; i ++ ) {
14548
14588
if (perf_total_per_op_us [i ] == 0 ) {
14549
14589
continue ;
14550
14590
}
14551
14591
14552
- GGML_PRINT ("perf_total_per_op_us[%16s] = %7.3f ms\n" , GGML_OP_LABEL [i ], (double ) perf_total_per_op_us [i ] / 1000.0 );
14592
+ GGML_PRINT ("perf_total_per_op_us[%16s] = %7.3f ms\n" , GGML_OP_NAME [i ], (double ) perf_total_per_op_us [i ] / 1000.0 );
14553
14593
}
14554
14594
14555
14595
GGML_PRINT ("========================================\n" );
0 commit comments