Skip to content

Commit 2328b67

Browse files
ggerganovakawrykow
authored andcommitted
k-quants : remove unnecessary tensor shape restrictions (ggml-org#2811)
1 parent dc43064 commit 2328b67

File tree

1 file changed

+3
-4
lines changed

1 file changed

+3
-4
lines changed

Diff for: llama.cpp

+3-4
Original file line numberDiff line numberDiff line change
@@ -4762,8 +4762,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
47624762

47634763
if (name == tn(LLM_TENSOR_OUTPUT, "weight")) {
47644764
int nx = tensor->ne[0];
4765-
int ny = tensor->ne[1];
4766-
if (nx % QK_K == 0 && ny % QK_K == 0) {
4765+
if (nx % QK_K == 0) {
47674766
new_type = GGML_TYPE_Q6_K;
47684767
}
47694768
} else if (name.find("attn_v.weight") != std::string::npos) {
@@ -4812,8 +4811,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
48124811
new_type == GGML_TYPE_Q5_K || new_type == GGML_TYPE_Q6_K) {
48134812
int nx = tensor->ne[0];
48144813
int ny = tensor->ne[1];
4815-
if (nx % QK_K != 0 || ny % QK_K != 0) {
4816-
LLAMA_LOG_INFO("\n\nTensor sizes %d x %d are not divisible by %d, required for k-quants.\n",nx,ny,QK_K);
4814+
if (nx % QK_K != 0) {
4815+
LLAMA_LOG_WARN("\n\n%s : tensor cols %d x %d are not divisible by %d, required for k-quants\n", __func__, nx, ny, QK_K);
48174816
convert_incompatible_tensor = true;
48184817
}
48194818
}

0 commit comments

Comments
 (0)