k-quants : remove unnecessary tensor shape restrictions (ggml-org#2811)

ggerganov · akawrykow · commit 2328b67871b2 · 2023-08-29T14:14:01.000-07:00
diff --git a/llama.cpp b/llama.cpp
@@ -4762,8 +4762,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
 
             if (name == tn(LLM_TENSOR_OUTPUT, "weight")) {
                 int nx = tensor->ne[0];
-                int ny = tensor->ne[1];
-                if (nx % QK_K == 0 && ny % QK_K == 0) {
+                if (nx % QK_K == 0) {
                     new_type = GGML_TYPE_Q6_K;
                 }
             } else if (name.find("attn_v.weight") != std::string::npos) {
@@ -4812,8 +4811,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
                 new_type == GGML_TYPE_Q5_K || new_type == GGML_TYPE_Q6_K) {
                 int nx = tensor->ne[0];
                 int ny = tensor->ne[1];
-                if (nx % QK_K != 0 || ny % QK_K != 0) {
-                    LLAMA_LOG_INFO("\n\nTensor sizes %d x %d are not divisible by %d, required for k-quants.\n",nx,ny,QK_K);
+                if (nx % QK_K != 0) {
+                    LLAMA_LOG_WARN("\n\n%s : tensor cols %d x %d are not divisible by %d, required for k-quants\n", __func__, nx, ny, QK_K);
                     convert_incompatible_tensor = true;
                 }
             }

Original file line number	Diff line number	Diff line change
`@@ -4762,8 +4762,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s`
`4762`	`4762`
`4763`	`4763`	`if (name == tn(LLM_TENSOR_OUTPUT, "weight")) {`
`4764`	`4764`	`int nx = tensor->ne[0];`
`4765`		`- int ny = tensor->ne[1];`
`4766`		`- if (nx % QK_K == 0 && ny % QK_K == 0) {`
	`4765`	`+ if (nx % QK_K == 0) {`
`4767`	`4766`	`new_type = GGML_TYPE_Q6_K;`
`4768`	`4767`	`}`
`4769`	`4768`	`} else if (name.find("attn_v.weight") != std::string::npos) {`
`@@ -4812,8 +4811,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s`
`4812`	`4811`	`new_type == GGML_TYPE_Q5_K \|\| new_type == GGML_TYPE_Q6_K) {`
`4813`	`4812`	`int nx = tensor->ne[0];`
`4814`	`4813`	`int ny = tensor->ne[1];`
`4815`		`- if (nx % QK_K != 0 \|\| ny % QK_K != 0) {`
`4816`		`- LLAMA_LOG_INFO("\n\nTensor sizes %d x %d are not divisible by %d, required for k-quants.\n",nx,ny,QK_K);`
	`4814`	`+ if (nx % QK_K != 0) {`
	`4815`	`+ LLAMA_LOG_WARN("\n\n%s : tensor cols %d x %d are not divisible by %d, required for k-quants\n", __func__, nx, ny, QK_K);`
`4817`	`4816`	`convert_incompatible_tensor = true;`
`4818`	`4817`	`}`
`4819`	`4818`	`}`