Skip to content

Commit 20c1b27

Browse files
committed
Update llama : add option to override model tensor buffers
1 parent fc9ca43 commit 20c1b27

File tree

1 file changed

+20
-1
lines changed

1 file changed

+20
-1
lines changed

llama_cpp/llama_cpp.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -645,11 +645,27 @@ class llama_model_kv_override(ctypes.Structure):
645645
key: bytes
646646
value: Union[int, float, bool, bytes]
647647

648+
# struct llama_model_tensor_buft_override {
649+
# const char * pattern;
650+
# ggml_backend_buffer_type_t buft;
651+
# };
652+
class llama_model_tensor_buft_override(ctypes.Structure):
653+
_fields_ = [
654+
("pattern", ctypes.c_char_p),
655+
("buft", ctypes.c_void_p),
656+
]
657+
658+
if TYPE_CHECKING:
659+
pattern: ctypes.c_char_p
660+
buft: ctypes.c_void_p
648661

649662
# struct llama_model_params {
650663
# // NULL-terminated list of devices to use for offloading (if NULL, all available devices are used)
651664
# ggml_backend_dev_t * devices;
652-
665+
#
666+
# // NULL-terminated list of buffer types to use for tensors that match a pattern
667+
# const struct llama_model_tensor_buft_override * tensor_buft_overrides;
668+
#
653669
# int32_t n_gpu_layers; // number of layers to store in VRAM
654670
# enum llama_split_mode split_mode; // how to split the model across multiple GPUs
655671

@@ -684,6 +700,7 @@ class llama_model_params(ctypes.Structure):
684700
"""Parameters for llama_model
685701
686702
Attributes:
703+
tensor_buft_overrides(llama_model_tensor_buft_override): NULL-terminated list of buffer types to use for tensors that match a pattern
687704
n_gpu_layers (int): number of layers to store in VRAM
688705
split_mode (int): how to split the model across multiple GPUs
689706
main_gpu (int): the GPU that is used for the entire model. main_gpu interpretation depends on split_mode: LLAMA_SPLIT_NONE: the GPU that is used for the entire model LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results LLAMA_SPLIT_LAYER: ignored
@@ -697,6 +714,7 @@ class llama_model_params(ctypes.Structure):
697714
check_tensors (bool): validate model tensor data"""
698715

699716
if TYPE_CHECKING:
717+
tensor_buft_overrides: ctypes.POINTER(llama_model_tensor_buft_override)
700718
n_gpu_layers: int
701719
split_mode: int
702720
main_gpu: int
@@ -711,6 +729,7 @@ class llama_model_params(ctypes.Structure):
711729

712730
_fields_ = [
713731
("devices", ctypes.c_void_p), # NOTE: unnused
732+
("tensor_buft_overrides", ctypes.POINTER(llama_model_tensor_buft_override)),
714733
("n_gpu_layers", ctypes.c_int32),
715734
("split_mode", ctypes.c_int),
716735
("main_gpu", ctypes.c_int32),

0 commit comments

Comments
 (0)