Fix ep (#814)

shihaobai · baishihao · web-flow · commit 6c464158c014 · 2025-04-09T13:35:12.000+08:00
Co-authored-by: baishihao &lt;baishihao@sensetime.com&gt;
diff --git a/lightllm/common/basemodel/basemodel.py b/lightllm/common/basemodel/basemodel.py
@@ -460,7 +460,7 @@ def create_inferstate(cur_batch: PrefillMicroBatch, batch_index):
                 infer_state.b_ready_cache_len = torch.zeros_like(
                     cur_batch.b_seq_len, dtype=cur_batch.b_seq_len.dtype, device=cur_batch.b_seq_len.device
                 )
-            infer_state.multimodal_params = None
+            infer_state.multimodal_params = cur_batch.multimodal_params
             infer_state.microbatch_index = batch_index
 
             infer_state.mem_manager = self.mem_manager
diff --git a/lightllm/common/basemodel/microbatch_overlap_objs.py b/lightllm/common/basemodel/microbatch_overlap_objs.py
@@ -25,3 +25,4 @@ class PrefillMicroBatch:
     b_start_loc: torch.Tensor
     b_seq_len: torch.Tensor
     b_ready_cache_len: torch.Tensor
+    multimodal_params: list
diff --git a/lightllm/server/router/model_infer/mode_backend/dp_backend/impl.py b/lightllm/server/router/model_infer/mode_backend/dp_backend/impl.py
@@ -92,7 +92,7 @@ def normal_decode(self, decode_reqs: List[InferReq], max_decode_num: int, uninit
         from .pre_process import padded_prepare_decode_inputs
 
         kwargs, run_reqs, padded_req_num = padded_prepare_decode_inputs(
-            decode_reqs, max_decode_num, is_multimodal=False
+            decode_reqs, max_decode_num, is_multimodal=self.is_multimodal
         )
         logits = self.model.forward(**kwargs)
 
@@ -118,7 +118,7 @@ def overlap_decode(self, decode_reqs: List[InferReq], max_decode_num: int, unini
             micro_batch1,
             run_reqs1,
             padded_req_num1,
-        ) = padded_overlap_prepare_decode_inputs(decode_reqs, max_decode_num, is_multimodal=False)
+        ) = padded_overlap_prepare_decode_inputs(decode_reqs, max_decode_num, is_multimodal=self.is_multimodal)
         logits, logits1 = self.model.microbatch_overlap_decode(micro_batch, micro_batch1)
         self._overlap_req_init_and_filter(uninit_reqs=uninit_reqs, ok_finished_reqs=ok_finished_reqs, clear_list=True)
         req_num, req_num1 = len(run_reqs), len(run_reqs1)
@@ -147,7 +147,7 @@ def overlap_prefill_reqs(self, prefill_reqs: List[InferReq], max_prefill_num: in
             micro_batch1,
             run_reqs1,
             padded_req_num1,
-        ) = padded_overlap_prepare_prefill_inputs(prefill_reqs, max_prefill_num, is_multimodal=False)
+        ) = padded_overlap_prepare_prefill_inputs(prefill_reqs, max_prefill_num, is_multimodal=self.is_multimodal)
         logits, logits1 = self.model.microbatch_overlap_prefill(micro_batch, micro_batch1)
         self._overlap_req_init_and_filter(uninit_reqs=uninit_reqs, ok_finished_reqs=ok_finished_reqs, clear_list=True)
         req_num, req_num1 = len(run_reqs), len(run_reqs1)
diff --git a/lightllm/server/router/model_infer/mode_backend/dp_backend/pre_process.py b/lightllm/server/router/model_infer/mode_backend/dp_backend/pre_process.py
@@ -336,6 +336,7 @@ def _padded_prepare_prefill_micro_batch(req_objs: List[InferReq], is_multimodal=
         b_start_loc=nopad_b_start_loc,
         b_seq_len=nopad_b_seq_len,
         b_ready_cache_len=b_ready_cache_len,
+        multimodal_params=batch_multimodal_params,
     )
 
     return micro_batch, run_reqs, padded_req_num

Original file line number	Diff line number	Diff line change
`@@ -460,7 +460,7 @@ def create_inferstate(cur_batch: PrefillMicroBatch, batch_index):`
`460`	`460`	`infer_state.b_ready_cache_len = torch.zeros_like(`
`461`	`461`	`cur_batch.b_seq_len, dtype=cur_batch.b_seq_len.dtype, device=cur_batch.b_seq_len.device`
`462`	`462`	`)`
`463`		`- infer_state.multimodal_params = None`
	`463`	`+ infer_state.multimodal_params = cur_batch.multimodal_params`
`464`	`464`	`infer_state.microbatch_index = batch_index`
`465`	`465`
`466`	`466`	`infer_state.mem_manager = self.mem_manager`
Original file line number	Diff line number	Diff line change
`@@ -336,6 +336,7 @@ def _padded_prepare_prefill_micro_batch(req_objs: List[InferReq], is_multimodal=`
`336`	`336`	`b_start_loc=nopad_b_start_loc,`
`337`	`337`	`b_seq_len=nopad_b_seq_len,`
`338`	`338`	`b_ready_cache_len=b_ready_cache_len,`
	`339`	`+ multimodal_params=batch_multimodal_params,`
`339`	`340`	`)`
`340`	`341`
`341`	`342`	`return micro_batch, run_reqs, padded_req_num`