Skip to content

Commit 91c818c

Browse files
taf2iThalay
authored andcommitted
ruby : update bindings (ggml-org#2154)
* update library files * update whispercpp * not needed for gem
1 parent 6fe075a commit 91c818c

15 files changed

+13247
-4461
lines changed

bindings/ruby/Rakefile

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
require 'rake/clean'
2+
require 'rubygems/package'
3+
4+
desc 'Build gem'
5+
task :package do
6+
spec_source = File.read File.join(File.dirname(__FILE__),'whispercpp.gemspec')
7+
spec = nil
8+
# see: http://gist.github.com/16215
9+
Thread.new { spec = eval("#{spec_source}") }.join
10+
spec.validate
11+
Gem::Package.build(spec)
12+
end

bindings/ruby/ext/ggml-backend-impl.h

+86-32
Original file line numberDiff line numberDiff line change
@@ -12,76 +12,130 @@ extern "C" {
1212
// Backend buffer
1313
//
1414

15+
// buffer type
16+
typedef void * ggml_backend_buffer_type_context_t;
17+
18+
struct ggml_backend_buffer_type_i {
19+
const char * (*GGML_CALL get_name) (ggml_backend_buffer_type_t buft);
20+
ggml_backend_buffer_t (*GGML_CALL alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size);
21+
size_t (*GGML_CALL get_alignment) (ggml_backend_buffer_type_t buft); // tensor alignment
22+
size_t (*GGML_CALL get_max_size) (ggml_backend_buffer_type_t buft); // allocation max size
23+
size_t (*GGML_CALL get_alloc_size) (ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor); // data size needed to allocate the tensor, including padding
24+
bool (*GGML_CALL supports_backend)(ggml_backend_buffer_type_t buft, ggml_backend_t backend); // check if the buffer type is usable by the backend
25+
// check if tensor data is in host memory
26+
// should be equivalent to supports_backend(buft, ggml_backend_cpu_init())
27+
bool (*GGML_CALL is_host) (ggml_backend_buffer_type_t buft);
28+
};
29+
30+
struct ggml_backend_buffer_type {
31+
struct ggml_backend_buffer_type_i iface;
32+
ggml_backend_buffer_type_context_t context;
33+
};
34+
35+
// buffer
1536
typedef void * ggml_backend_buffer_context_t;
1637

1738
struct ggml_backend_buffer_i {
18-
void (*free_buffer) (ggml_backend_buffer_t buffer);
19-
void * (*get_base) (ggml_backend_buffer_t buffer); // get base pointer
20-
size_t (*get_alloc_size)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-allocation callback
21-
void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // post-allocation callback
22-
void (*free_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-free callback
39+
const char * (*GGML_CALL get_name) (ggml_backend_buffer_t buffer);
40+
void (*GGML_CALL free_buffer)(ggml_backend_buffer_t buffer);
41+
void * (*GGML_CALL get_base) (ggml_backend_buffer_t buffer);
42+
void (*GGML_CALL init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
43+
void (*GGML_CALL set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
44+
void (*GGML_CALL get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
45+
bool (*GGML_CALL cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst); // dst is in the buffer, src may be in any buffer
46+
void (*GGML_CALL clear) (ggml_backend_buffer_t buffer, uint8_t value);
47+
void (*GGML_CALL reset) (ggml_backend_buffer_t buffer); // reset any internal state due to tensor initialization, such as tensor extras
2348
};
2449

2550
struct ggml_backend_buffer {
26-
struct ggml_backend_buffer_i iface;
27-
28-
ggml_backend_t backend;
51+
struct ggml_backend_buffer_i iface;
52+
ggml_backend_buffer_type_t buft;
2953
ggml_backend_buffer_context_t context;
30-
3154
size_t size;
55+
enum ggml_backend_buffer_usage usage;
3256
};
3357

34-
GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
35-
struct ggml_backend * backend,
58+
GGML_CALL ggml_backend_buffer_t ggml_backend_buffer_init(
59+
ggml_backend_buffer_type_t buft,
3660
struct ggml_backend_buffer_i iface,
3761
ggml_backend_buffer_context_t context,
3862
size_t size);
3963

64+
// do not use directly, use ggml_backend_tensor_copy instead
65+
bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
66+
67+
// buffer that contains a collection of buffers
68+
GGML_CALL ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
69+
GGML_CALL bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
70+
GGML_CALL void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
71+
4072
//
4173
// Backend
4274
//
4375

4476
typedef void * ggml_backend_context_t;
4577

4678
struct ggml_backend_i {
47-
const char * (*get_name)(ggml_backend_t backend);
79+
const char * (*GGML_CALL get_name)(ggml_backend_t backend);
4880

49-
void (*free)(ggml_backend_t backend);
81+
void (*GGML_CALL free)(ggml_backend_t backend);
5082

5183
// buffer allocation
52-
ggml_backend_buffer_t (*alloc_buffer)(ggml_backend_t backend, size_t size);
84+
ggml_backend_buffer_type_t (*GGML_CALL get_default_buffer_type)(ggml_backend_t backend);
5385

54-
// get buffer alignment
55-
size_t (*get_alignment)(ggml_backend_t backend);
86+
// (optional) asynchronous tensor data access
87+
void (*GGML_CALL set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
88+
void (*GGML_CALL get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
89+
bool (*GGML_CALL cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst);
5690

57-
// tensor data access
58-
// these functions can be asynchronous, helper functions are provided for synchronous access that automatically call synchronize
59-
void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
60-
void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
61-
void (*synchronize) (ggml_backend_t backend);
91+
// (optional) complete all pending operations
92+
void (*GGML_CALL synchronize)(ggml_backend_t backend);
6293

63-
// (optional) copy tensor between different backends, allow for single-copy tranfers
64-
void (*cpy_tensor_from)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
65-
void (*cpy_tensor_to) (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
94+
// compute graph with a plan (not used currently)
95+
ggml_backend_graph_plan_t (*GGML_CALL graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
96+
void (*GGML_CALL graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
6697

6798
// compute graph with a plan
68-
ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
69-
void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
70-
void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
71-
72-
// compute graph without a plan
73-
bool (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
99+
enum ggml_status (*GGML_CALL graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
100+
// compute graph without a plan (async)
101+
enum ggml_status (*GGML_CALL graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
74102

75103
// check if the backend supports an operation
76-
bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
104+
bool (*GGML_CALL supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
105+
106+
// check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
107+
// these should be expensive operations with large batch sizes that may benefit from running on this backend
108+
// even if the weight has to be copied from the CPU temporarily
109+
bool (*GGML_CALL offload_op)(ggml_backend_t backend, const struct ggml_tensor * op);
110+
111+
// (optional) event synchronization
112+
ggml_backend_event_t (*GGML_CALL event_new) (ggml_backend_t backend);
113+
void (*GGML_CALL event_free) (ggml_backend_event_t event);
114+
void (*GGML_CALL event_record) (ggml_backend_event_t event);
115+
void (*GGML_CALL event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
116+
void (*GGML_CALL event_synchronize) (ggml_backend_event_t event);
77117
};
78118

79119
struct ggml_backend {
80-
struct ggml_backend_i iface;
120+
ggml_guid_t guid;
81121

122+
struct ggml_backend_i iface;
82123
ggml_backend_context_t context;
83124
};
84125

126+
struct ggml_backend_event {
127+
ggml_backend_t backend;
128+
void * context;
129+
};
130+
131+
//
132+
// Backend registry
133+
//
134+
135+
typedef ggml_backend_t (*GGML_CALL ggml_backend_init_fn)(const char * params, void * user_data);
136+
137+
GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data);
138+
85139
#ifdef __cplusplus
86140
}
87141
#endif

0 commit comments

Comments
 (0)