Skip to content

Commit 4522ed7

Browse files
committed
convert-hf : allow converting the weird BitNet 1.3B
Its FFN size is 5460 which is not convenient. The offending tensors are kept in F16, which makes the final model 5.01 bpw.
1 parent 58b9064 commit 4522ed7

File tree

2 files changed

+14
-6
lines changed

2 files changed

+14
-6
lines changed

Diff for: convert-hf-to-gguf.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -299,12 +299,16 @@ def write_tensors(self):
299299
if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32:
300300
# TODO: cleaner model-specific per-tensor types
301301
# NOTE: Q1_3 is only relevant for BitNet 1.58b
302-
if self.ftype == gguf.LlamaFileType.MOSTLY_Q1_3 and not any(
303-
self.match_model_tensor_name(new_name, key, None)
304-
for key in [
305-
gguf.MODEL_TENSOR.TOKEN_EMBD,
306-
gguf.MODEL_TENSOR.OUTPUT,
307-
]
302+
if (
303+
self.ftype == gguf.LlamaFileType.MOSTLY_Q1_3
304+
and gguf.can_quantize_to_q1_3(data)
305+
and not any(
306+
self.match_model_tensor_name(new_name, key, None)
307+
for key in [
308+
gguf.MODEL_TENSOR.TOKEN_EMBD,
309+
gguf.MODEL_TENSOR.OUTPUT,
310+
]
311+
)
308312
):
309313
data = gguf.quantize_q1_3(data)
310314
assert data.dtype == np.uint8

Diff for: gguf-py/gguf/quants.py

+4
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,10 @@ def quantize_q8_0(data: np.ndarray):
126126
__q1_3_block_size, __q1_3_type_size = GGML_QUANT_SIZES[GGMLQuantizationType.Q1_3]
127127

128128

129+
def can_quantize_to_q1_3(n: np.ndarray) -> bool:
130+
return n.shape[-1] % __q1_3_block_size == 0
131+
132+
129133
def __quantize_q1_3_shape_change(s: tuple[int, ...]) -> tuple[int, ...]:
130134
return (*s[:-1], s[-1] // __q1_3_block_size * __q1_3_type_size)
131135

0 commit comments

Comments
 (0)