Address review comments

howard0su · howard0su · commit 28bd7cd7dfe1 · 2023-05-30T20:32:33.000+08:00
diff --git a/ggml-opencl.cpp b/ggml-opencl.cpp
@@ -7,7 +7,6 @@
 #define CL_TARGET_OPENCL_VERSION 110
 #include <clblast.h>
 
-#include <assert.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
@@ -1033,5 +1032,5 @@ void ggml_cl_transform_tensor(const void * data, ggml_tensor * tensor) {
     CL_CHECK(clFinish(queue));
 
     tensor->data = dst;
-    assert(tensor->backend == GGML_BACKEND_CL);
+    GGML_ASSERT(tensor->backend == GGML_BACKEND_CL);
 }
diff --git a/llama.cpp b/llama.cpp
@@ -730,8 +730,7 @@ struct llama_model_loader {
             switch(lt.ggml_tensor->backend) {
                 case GGML_BACKEND_CPU:
                     lt.ggml_tensor->data = lt.data;
-                    if (use_mmap && lmlock)
-                    {
+                    if (use_mmap && lmlock) {
                         lock_size += lt.size;
                         lmlock->grow_to(lock_size);
                     }
@@ -1075,7 +1074,7 @@ static void llama_model_load_internal(
 
             std::string layers_i = "layers." + std::to_string(i);
 
-            // TODO: Normalize this after OpenCL supports mat mul with repeat
+            // TODO: Update this after OpenCL supports multiply with repeat
             if (backend == GGML_BACKEND_CUDA) {
                 layer.attention_norm = ml->get_tensor(layers_i + ".attention_norm.weight", {n_embd}, backend);
             } else {
@@ -1103,7 +1102,7 @@ static void llama_model_load_internal(
                     ggml_nbytes(layer.wv)             + ggml_nbytes(layer.wo) + ggml_nbytes(layer.ffn_norm) +
                     ggml_nbytes(layer.w1)             + ggml_nbytes(layer.w2) + ggml_nbytes(layer.w3);
             } else if (backend == GGML_BACKEND_CL) {
-                // TODO: Until OpenCL supports mat mul with repeat
+                // TODO: Until OpenCL supports multiply with repeat
                 vram_total +=
                     ggml_nbytes(layer.wq) + ggml_nbytes(layer.wk) +
                     ggml_nbytes(layer.wv)             + ggml_nbytes(layer.wo) +