@@ -35,6 +35,13 @@ bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_ba
35
35
return buft -> iface .supports_backend (buft , backend );
36
36
}
37
37
38
+ bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft ) {
39
+ if (buft -> iface .is_host ) {
40
+ return buft -> iface .is_host (buft );
41
+ }
42
+ return false;
43
+ }
44
+
38
45
// backend buffer
39
46
40
47
ggml_backend_buffer_t ggml_backend_buffer_init (
@@ -94,6 +101,14 @@ size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct g
94
101
return ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type (buffer ), tensor );
95
102
}
96
103
104
+ void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer , uint8_t value ) {
105
+ buffer -> iface .clear (buffer , value );
106
+ }
107
+
108
+ bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer ) {
109
+ return ggml_backend_buft_is_host (ggml_backend_buffer_type (buffer ));
110
+ }
111
+
97
112
ggml_backend_buffer_type_t ggml_backend_buffer_type (ggml_backend_buffer_t buffer ) {
98
113
return buffer -> buft ;
99
114
}
@@ -378,7 +393,6 @@ static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
378
393
379
394
static void ggml_backend_cpu_buffer_free_buffer (ggml_backend_buffer_t buffer ) {
380
395
free (buffer -> context );
381
- GGML_UNUSED (buffer );
382
396
}
383
397
384
398
static void ggml_backend_cpu_buffer_set_tensor (ggml_backend_buffer_t buffer , struct ggml_tensor * tensor , const void * data , size_t offset , size_t size ) {
@@ -411,6 +425,10 @@ static void ggml_backend_cpu_buffer_cpy_tensor_to(ggml_backend_buffer_t buffer,
411
425
GGML_UNUSED (buffer );
412
426
}
413
427
428
+ static void ggml_backend_cpu_buffer_clear (ggml_backend_buffer_t buffer , uint8_t value ) {
429
+ memset (buffer -> context , value , buffer -> size );
430
+ }
431
+
414
432
static struct ggml_backend_buffer_i cpu_backend_buffer_i = {
415
433
/* .free_buffer = */ ggml_backend_cpu_buffer_free_buffer ,
416
434
/* .get_base = */ ggml_backend_cpu_buffer_get_base ,
@@ -419,6 +437,7 @@ static struct ggml_backend_buffer_i cpu_backend_buffer_i = {
419
437
/* .get_tensor = */ ggml_backend_cpu_buffer_get_tensor ,
420
438
/* .cpy_tensor_from = */ ggml_backend_cpu_buffer_cpy_tensor_from ,
421
439
/* .cpy_tensor_to = */ ggml_backend_cpu_buffer_cpy_tensor_to ,
440
+ /* .clear = */ ggml_backend_cpu_buffer_clear ,
422
441
};
423
442
424
443
// for buffers from ptr, free is not called
@@ -430,6 +449,7 @@ static struct ggml_backend_buffer_i cpu_backend_buffer_i_from_ptr = {
430
449
/* .get_tensor = */ ggml_backend_cpu_buffer_get_tensor ,
431
450
/* .cpy_tensor_from = */ ggml_backend_cpu_buffer_cpy_tensor_from ,
432
451
/* .cpy_tensor_to = */ ggml_backend_cpu_buffer_cpy_tensor_to ,
452
+ /* .clear = */ ggml_backend_cpu_buffer_clear ,
433
453
};
434
454
435
455
static const size_t TENSOR_ALIGNMENT = 64 ; // should be enough for AVX 512
@@ -455,20 +475,70 @@ static bool ggml_backend_cpu_buffer_type_supports_backend(ggml_backend_buffer_ty
455
475
GGML_UNUSED (buft );
456
476
}
457
477
478
+ static bool ggml_backend_cpu_buffer_type_is_host (ggml_backend_buffer_type_t buft ) {
479
+ return true;
480
+
481
+ GGML_UNUSED (buft );
482
+ }
483
+
458
484
ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type (void ) {
459
- static struct ggml_backend_buffer_type ggml_backend_buffer_type_cpu = {
485
+ static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type = {
460
486
/* .iface = */ {
461
487
/* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer ,
462
488
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment ,
463
489
/* .get_alloc_size = */ NULL , // defaults to ggml_nbytes
464
490
/* .supports_backend = */ ggml_backend_cpu_buffer_type_supports_backend ,
491
+ /* .is_host = */ ggml_backend_cpu_buffer_type_is_host ,
465
492
},
466
493
/* .context = */ NULL ,
467
494
};
468
495
469
- return & ggml_backend_buffer_type_cpu ;
496
+ return & ggml_backend_cpu_buffer_type ;
470
497
}
471
498
499
+ #ifdef GGML_USE_CPU_HBM
500
+
501
+ // buffer type HBM
502
+
503
+ #include <hbwmalloc.h>
504
+
505
+ static void ggml_backend_cpu_hbm_buffer_free_buffer (ggml_backend_buffer_t buffer ) {
506
+ hbw_free (buffer -> context );
507
+ }
508
+
509
+ static ggml_backend_buffer_t ggml_backend_cpu_hbm_buffer_type_alloc_buffer (ggml_backend_buffer_type_t buft , size_t size ) {
510
+ //void * ptr = hbw_malloc(size);
511
+ void * ptr ;
512
+ int result = hbw_posix_memalign (& ptr , ggml_backend_cpu_buffer_type_get_alignment (buft ), size );
513
+ if (result != 0 ) {
514
+ fprintf (stderr , "failed to allocate HBM buffer of size %zu\n" , size );
515
+ return NULL ;
516
+ }
517
+
518
+ // FIXME: this is a hack to avoid having to implement a new buffer type
519
+ ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr (ptr , size );
520
+ buffer -> buft = buft ;
521
+ buffer -> iface .free_buffer = ggml_backend_cpu_hbm_buffer_free_buffer ;
522
+
523
+ return buffer ;
524
+ }
525
+
526
+ ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type () {
527
+ static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_hbm = {
528
+ /* .iface = */ {
529
+ /* .alloc_buffer = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer ,
530
+ /* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment ,
531
+ /* .get_alloc_size = */ NULL , // defaults to ggml_nbytes
532
+ /* .supports_backend = */ ggml_backend_cpu_buffer_type_supports_backend ,
533
+ /* .is_host = */ ggml_backend_cpu_buffer_type_is_host ,
534
+ },
535
+ /* .context = */ NULL ,
536
+ };
537
+
538
+ return & ggml_backend_cpu_buffer_type_hbm ;
539
+ }
540
+ #endif
541
+
472
542
struct ggml_backend_cpu_context {
473
543
int n_threads ;
474
544
void * work_data ;
@@ -505,7 +575,7 @@ static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(ggml_backend
505
575
struct ggml_backend_plan_cpu * cpu_plan = malloc (sizeof (struct ggml_backend_plan_cpu ));
506
576
507
577
cpu_plan -> cplan = ggml_graph_plan (cgraph , cpu_ctx -> n_threads );
508
- cpu_plan -> cgraph = * cgraph ;
578
+ cpu_plan -> cgraph = * cgraph ; // FIXME: deep copy
509
579
510
580
if (cpu_plan -> cplan .work_size > 0 ) {
511
581
cpu_plan -> cplan .work_data = malloc (cpu_plan -> cplan .work_size );
@@ -1180,7 +1250,7 @@ void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml
1180
1250
// utils
1181
1251
void ggml_backend_view_init (ggml_backend_buffer_t buffer , struct ggml_tensor * tensor ) {
1182
1252
GGML_ASSERT (tensor -> buffer == NULL );
1183
- GGML_ASSERT (tensor -> data == NULL );
1253
+ // GGML_ASSERT(tensor->data == NULL); // views of pre-allocted tensors may have the data set, but still need to be initialized
1184
1254
GGML_ASSERT (tensor -> view_src != NULL );
1185
1255
GGML_ASSERT (tensor -> view_src -> buffer != NULL );
1186
1256
GGML_ASSERT (tensor -> view_src -> data != NULL );
0 commit comments