Skip to content

Commit 1eb130a

Browse files
committed
Update llama.cpp
1 parent ba3959e commit 1eb130a

File tree

2 files changed

+7
-4
lines changed

2 files changed

+7
-4
lines changed

Diff for: llama_cpp/llama_cpp.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ class llama_context_params(Structure):
117117
LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = ctypes.c_int(
118118
4
119119
) # tok_embeddings.weight and output.weight are F16
120+
LLAMA_FTYPE_MOSTLY_Q4_2 = ctypes.c_int(5) # except 1d tensors
121+
LLAMA_FTYPE_MOSTYL_Q4_3 = ctypes.c_int(6) # except 1d tensors
120122

121123
# Functions
122124

@@ -169,11 +171,12 @@ def llama_free(ctx: llama_context_p):
169171

170172
# TODO: not great API - very likely to change
171173
# Returns 0 on success
172-
def llama_model_quantize(fname_inp: bytes, fname_out: bytes, itype: c_int) -> c_int:
173-
return _lib.llama_model_quantize(fname_inp, fname_out, itype)
174+
# nthread - how many threads to use. If <=0, will use std::thread::hardware_concurrency(), else the number given
175+
def llama_model_quantize(fname_inp: bytes, fname_out: bytes, ftype: c_int, nthread: c_int) -> c_int:
176+
return _lib.llama_model_quantize(fname_inp, fname_out, ftype, nthread)
174177

175178

176-
_lib.llama_model_quantize.argtypes = [c_char_p, c_char_p, c_int]
179+
_lib.llama_model_quantize.argtypes = [c_char_p, c_char_p, c_int, c_int]
177180
_lib.llama_model_quantize.restype = c_int
178181

179182

Diff for: vendor/llama.cpp

0 commit comments

Comments
 (0)