vllm-project · simon-mo · May 16, 2025 · May 16, 2025 · May 16, 2025 · May 16, 2025
diff --git a/csrc/moe/marlin_moe_wna16/marlin_template.h b/csrc/moe/marlin_moe_wna16/marlin_template.h
@@ -1767,17 +1767,20 @@ __global__ void Marlin(
 
       if constexpr (has_act_order) {
         slice_k_start += tb_k * stages;
-        slice_k_start_shared_fetch += tb_k * stages;
-        int first_group_id = g_idx[slice_k_start];
-        int last_g_idx = slice_k_start + stages * tb_k * 2;
-        if (last_g_idx >= prob_k) {
-          last_g_idx = prob_k - 1;
-        }
-        int last_group_id = g_idx[last_g_idx];
-        if (last_group_id >= sh_first_group_id + sh_num_groups) {
-          fetch_act_order_scales_to_shared(false, first_group_id,
-                                           last_group_id);
-          __syncthreads();
+
+        if (slice_k_start < prob_k) {
+          slice_k_start_shared_fetch += tb_k * stages;
+          int first_group_id = g_idx[slice_k_start];
+          int last_g_idx = slice_k_start + stages * tb_k * 2;
+          if (last_g_idx >= prob_k) {
+            last_g_idx = prob_k - 1;
+          }
+          int last_group_id = g_idx[last_g_idx];
+          if (last_group_id >= sh_first_group_id + sh_num_groups) {
+            fetch_act_order_scales_to_shared(false, first_group_id,
+                                             last_group_id);
+            __syncthreads();
+          }
         }
       }
       if (slice_iters == 0) {

diff --git a/csrc/quantization/gptq_marlin/marlin_template.h b/csrc/quantization/gptq_marlin/marlin_template.h
@@ -1588,16 +1588,20 @@ __global__ void Marlin(
 
     if constexpr (has_act_order) {
       slice_k_start += tb_k * stages;
-      slice_k_start_shared_fetch += tb_k * stages;
-      int first_group_id = g_idx[slice_k_start];
-      int last_g_idx = slice_k_start + stages * tb_k * 2;
-      if (last_g_idx >= prob_k) {
-        last_g_idx = prob_k - 1;
-      }
-      int last_group_id = g_idx[last_g_idx];
-      if (last_group_id >= sh_first_group_id + sh_num_groups) {
-        fetch_act_order_scales_to_shared(false, first_group_id, last_group_id);
-        __syncthreads();
+
+      if (slice_k_start < prob_k) {
+        slice_k_start_shared_fetch += tb_k * stages;
+        int first_group_id = g_idx[slice_k_start];
+        int last_g_idx = slice_k_start + stages * tb_k * 2;
+        if (last_g_idx >= prob_k) {
+          last_g_idx = prob_k - 1;
+        }
+        int last_group_id = g_idx[last_g_idx];
+        if (last_group_id >= sh_first_group_id + sh_num_groups) {
+          fetch_act_order_scales_to_shared(false, first_group_id,
+                                           last_group_id);
+          __syncthreads();
+        }
       }
     }
 

@@ -2,7 +2,7 @@ gptq_marlin, robertgshaw2/zephyr-7b-beta-channelwise-gptq, main
 gptq_marlin, TheBloke/Llama-2-7B-GPTQ, main
 gptq_marlin, TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ, main
 gptq_marlin, TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ, gptq-8bit--1g-actorder_True
-#gptq_marlin, TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ, gptq-8bit-32g-actorder_True
+gptq_marlin, TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ, gptq-8bit-32g-actorder_True
 gptq_marlin, TechxGenus/gemma-1.1-2b-it-GPTQ, main
 gptq, robertgshaw2/zephyr-7b-beta-channelwise-gptq, main
 gptq, TheBloke/Llama-2-7B-GPTQ, main