@@ -12,76 +12,130 @@ extern "C" {
12
12
// Backend buffer
13
13
//
14
14
15
+ // buffer type
16
+ typedef void * ggml_backend_buffer_type_context_t ;
17
+
18
+ struct ggml_backend_buffer_type_i {
19
+ const char * (* GGML_CALL get_name ) (ggml_backend_buffer_type_t buft );
20
+ ggml_backend_buffer_t (* GGML_CALL alloc_buffer ) (ggml_backend_buffer_type_t buft , size_t size );
21
+ size_t (* GGML_CALL get_alignment ) (ggml_backend_buffer_type_t buft ); // tensor alignment
22
+ size_t (* GGML_CALL get_max_size ) (ggml_backend_buffer_type_t buft ); // allocation max size
23
+ size_t (* GGML_CALL get_alloc_size ) (ggml_backend_buffer_type_t buft , const struct ggml_tensor * tensor ); // data size needed to allocate the tensor, including padding
24
+ bool (* GGML_CALL supports_backend )(ggml_backend_buffer_type_t buft , ggml_backend_t backend ); // check if the buffer type is usable by the backend
25
+ // check if tensor data is in host memory
26
+ // should be equivalent to supports_backend(buft, ggml_backend_cpu_init())
27
+ bool (* GGML_CALL is_host ) (ggml_backend_buffer_type_t buft );
28
+ };
29
+
30
+ struct ggml_backend_buffer_type {
31
+ struct ggml_backend_buffer_type_i iface ;
32
+ ggml_backend_buffer_type_context_t context ;
33
+ };
34
+
35
+ // buffer
15
36
typedef void * ggml_backend_buffer_context_t ;
16
37
17
38
struct ggml_backend_buffer_i {
18
- void (* free_buffer ) (ggml_backend_buffer_t buffer );
19
- void * (* get_base ) (ggml_backend_buffer_t buffer ); // get base pointer
20
- size_t (* get_alloc_size )(ggml_backend_buffer_t buffer , struct ggml_tensor * tensor ); // pre-allocation callback
21
- void (* init_tensor ) (ggml_backend_buffer_t buffer , struct ggml_tensor * tensor ); // post-allocation callback
22
- void (* free_tensor ) (ggml_backend_buffer_t buffer , struct ggml_tensor * tensor ); // pre-free callback
39
+ const char * (* GGML_CALL get_name ) (ggml_backend_buffer_t buffer );
40
+ void (* GGML_CALL free_buffer )(ggml_backend_buffer_t buffer );
41
+ void * (* GGML_CALL get_base ) (ggml_backend_buffer_t buffer );
42
+ void (* GGML_CALL init_tensor )(ggml_backend_buffer_t buffer , struct ggml_tensor * tensor );
43
+ void (* GGML_CALL set_tensor ) (ggml_backend_buffer_t buffer , struct ggml_tensor * tensor , const void * data , size_t offset , size_t size );
44
+ void (* GGML_CALL get_tensor ) (ggml_backend_buffer_t buffer , const struct ggml_tensor * tensor , void * data , size_t offset , size_t size );
45
+ bool (* GGML_CALL cpy_tensor ) (ggml_backend_buffer_t buffer , const struct ggml_tensor * src , struct ggml_tensor * dst ); // dst is in the buffer, src may be in any buffer
46
+ void (* GGML_CALL clear ) (ggml_backend_buffer_t buffer , uint8_t value );
47
+ void (* GGML_CALL reset ) (ggml_backend_buffer_t buffer ); // reset any internal state due to tensor initialization, such as tensor extras
23
48
};
24
49
25
50
struct ggml_backend_buffer {
26
- struct ggml_backend_buffer_i iface ;
27
-
28
- ggml_backend_t backend ;
51
+ struct ggml_backend_buffer_i iface ;
52
+ ggml_backend_buffer_type_t buft ;
29
53
ggml_backend_buffer_context_t context ;
30
-
31
54
size_t size ;
55
+ enum ggml_backend_buffer_usage usage ;
32
56
};
33
57
34
- GGML_API ggml_backend_buffer_t ggml_backend_buffer_init (
35
- struct ggml_backend * backend ,
58
+ GGML_CALL ggml_backend_buffer_t ggml_backend_buffer_init (
59
+ ggml_backend_buffer_type_t buft ,
36
60
struct ggml_backend_buffer_i iface ,
37
61
ggml_backend_buffer_context_t context ,
38
62
size_t size );
39
63
64
+ // do not use directly, use ggml_backend_tensor_copy instead
65
+ bool ggml_backend_buffer_copy_tensor (const struct ggml_tensor * src , struct ggml_tensor * dst );
66
+
67
+ // buffer that contains a collection of buffers
68
+ GGML_CALL ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer (ggml_backend_buffer_t * buffers , size_t n_buffers );
69
+ GGML_CALL bool ggml_backend_buffer_is_multi_buffer (ggml_backend_buffer_t buffer );
70
+ GGML_CALL void ggml_backend_multi_buffer_set_usage (ggml_backend_buffer_t buffer , enum ggml_backend_buffer_usage usage );
71
+
40
72
//
41
73
// Backend
42
74
//
43
75
44
76
typedef void * ggml_backend_context_t ;
45
77
46
78
struct ggml_backend_i {
47
- const char * (* get_name )(ggml_backend_t backend );
79
+ const char * (* GGML_CALL get_name )(ggml_backend_t backend );
48
80
49
- void (* free )(ggml_backend_t backend );
81
+ void (* GGML_CALL free )(ggml_backend_t backend );
50
82
51
83
// buffer allocation
52
- ggml_backend_buffer_t (* alloc_buffer )(ggml_backend_t backend , size_t size );
84
+ ggml_backend_buffer_type_t (* GGML_CALL get_default_buffer_type )(ggml_backend_t backend );
53
85
54
- // get buffer alignment
55
- size_t (* get_alignment )(ggml_backend_t backend );
86
+ // (optional) asynchronous tensor data access
87
+ void (* GGML_CALL set_tensor_async )(ggml_backend_t backend , struct ggml_tensor * tensor , const void * data , size_t offset , size_t size );
88
+ void (* GGML_CALL get_tensor_async )(ggml_backend_t backend , const struct ggml_tensor * tensor , void * data , size_t offset , size_t size );
89
+ bool (* GGML_CALL cpy_tensor_async )(ggml_backend_t backend_src , ggml_backend_t backend_dst , const struct ggml_tensor * src , struct ggml_tensor * dst );
56
90
57
- // tensor data access
58
- // these functions can be asynchronous, helper functions are provided for synchronous access that automatically call synchronize
59
- void (* set_tensor_async )(ggml_backend_t backend , struct ggml_tensor * tensor , const void * data , size_t offset , size_t size );
60
- void (* get_tensor_async )(ggml_backend_t backend , const struct ggml_tensor * tensor , void * data , size_t offset , size_t size );
61
- void (* synchronize ) (ggml_backend_t backend );
91
+ // (optional) complete all pending operations
92
+ void (* GGML_CALL synchronize )(ggml_backend_t backend );
62
93
63
- // (optional) copy tensor between different backends, allow for single-copy tranfers
64
- void (* cpy_tensor_from ) (ggml_backend_t backend , struct ggml_tensor * src , struct ggml_tensor * dst );
65
- void ( * cpy_tensor_to ) ( ggml_backend_t backend , struct ggml_tensor * src , struct ggml_tensor * dst );
94
+ // compute graph with a plan (not used currently)
95
+ ggml_backend_graph_plan_t (* GGML_CALL graph_plan_create ) (ggml_backend_t backend , const struct ggml_cgraph * cgraph );
96
+ void ( * GGML_CALL graph_plan_free ) ( ggml_backend_t backend , ggml_backend_graph_plan_t plan );
66
97
67
98
// compute graph with a plan
68
- ggml_backend_graph_plan_t (* graph_plan_create ) (ggml_backend_t backend , struct ggml_cgraph * cgraph );
69
- void (* graph_plan_free ) (ggml_backend_t backend , ggml_backend_graph_plan_t plan );
70
- void (* graph_plan_compute )(ggml_backend_t backend , ggml_backend_graph_plan_t plan );
71
-
72
- // compute graph without a plan
73
- bool (* graph_compute )(ggml_backend_t backend , struct ggml_cgraph * cgraph );
99
+ enum ggml_status (* GGML_CALL graph_plan_compute )(ggml_backend_t backend , ggml_backend_graph_plan_t plan );
100
+ // compute graph without a plan (async)
101
+ enum ggml_status (* GGML_CALL graph_compute ) (ggml_backend_t backend , struct ggml_cgraph * cgraph );
74
102
75
103
// check if the backend supports an operation
76
- bool (* supports_op )(ggml_backend_t backend , const struct ggml_tensor * op );
104
+ bool (* GGML_CALL supports_op )(ggml_backend_t backend , const struct ggml_tensor * op );
105
+
106
+ // check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
107
+ // these should be expensive operations with large batch sizes that may benefit from running on this backend
108
+ // even if the weight has to be copied from the CPU temporarily
109
+ bool (* GGML_CALL offload_op )(ggml_backend_t backend , const struct ggml_tensor * op );
110
+
111
+ // (optional) event synchronization
112
+ ggml_backend_event_t (* GGML_CALL event_new ) (ggml_backend_t backend );
113
+ void (* GGML_CALL event_free ) (ggml_backend_event_t event );
114
+ void (* GGML_CALL event_record ) (ggml_backend_event_t event );
115
+ void (* GGML_CALL event_wait ) (ggml_backend_t backend , ggml_backend_event_t event );
116
+ void (* GGML_CALL event_synchronize ) (ggml_backend_event_t event );
77
117
};
78
118
79
119
struct ggml_backend {
80
- struct ggml_backend_i iface ;
120
+ ggml_guid_t guid ;
81
121
122
+ struct ggml_backend_i iface ;
82
123
ggml_backend_context_t context ;
83
124
};
84
125
126
+ struct ggml_backend_event {
127
+ ggml_backend_t backend ;
128
+ void * context ;
129
+ };
130
+
131
+ //
132
+ // Backend registry
133
+ //
134
+
135
+ typedef ggml_backend_t (* GGML_CALL ggml_backend_init_fn )(const char * params , void * user_data );
136
+
137
+ GGML_CALL void ggml_backend_register (const char * name , ggml_backend_init_fn init_fn , ggml_backend_buffer_type_t default_buffer_type , void * user_data );
138
+
85
139
#ifdef __cplusplus
86
140
}
87
141
#endif
0 commit comments