@@ -719,12 +719,22 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
719
719
ResponseBatch* response_batch_shm_ptr = reinterpret_cast <ResponseBatch*>(
720
720
response_batch.value ().data_ .get () + sizeof (IPCMessageShm));
721
721
722
-
723
- // If the response sender is already closed, notify the backend NOT to
722
+ // The backend will clean up the response factory if there is an error in
723
+ // the response batch. It is necessary to handle cases where the response
724
+ // sender should have already cleaned up, ensuring the backend does not
724
725
// delete the response factory again during error handling.
725
726
if (err_message.find (" Response sender has been closed" ) !=
726
727
std::string::npos) {
727
728
response_batch_shm_ptr->is_response_factory_deleted = true ;
729
+ } else if (
730
+ err_message.find (" is using the decoupled mode and the execute function "
731
+ " must return None" ) != std::string::npos) {
732
+ for (py::handle py_request : py_request_list) {
733
+ InferRequest* request = py_request.cast <InferRequest*>();
734
+ if (request->GetResponseSender ()->IsClosed ()) {
735
+ response_batch_shm_ptr->is_response_factory_deleted = true ;
736
+ }
737
+ }
728
738
}
729
739
730
740
response_batch_shm_ptr->has_error = true ;
@@ -788,27 +798,6 @@ Stub::ProcessReturnedResponses(
788
798
}
789
799
// Only non-decoupled may return responses.
790
800
if (IsDecoupled ()) {
791
- // For decoupled mode, if before returning from this error, there was
792
- // already a response sent from the response sender, along with the complete
793
- // final flag, then use the `is_response_factory_deleted` flag to notify the
794
- // backend to NOT to delete the response factory again during error
795
- // handling.
796
- for (py::handle py_request : py_requests) {
797
- InferRequest* request = py_request.cast <InferRequest*>();
798
- if (request->GetResponseSender ()->IsClosed ()) {
799
- // Notify the backend to NOT to delete the response factory again during
800
- // error handling.
801
- if (!response_batch) {
802
- response_batch = std::move (shm_pool_->Construct <char >(
803
- sizeof (ResponseBatch) + sizeof (IPCMessageShm)));
804
- }
805
- ResponseBatch* response_batch_shm_ptr =
806
- reinterpret_cast <ResponseBatch*>(
807
- response_batch.value ().data_ .get () + sizeof (IPCMessageShm));
808
- response_batch_shm_ptr->is_response_factory_deleted = true ;
809
- }
810
- }
811
-
812
801
throw PythonBackendException (
813
802
" Python model '" + name_ +
814
803
" ' is using the decoupled mode and the execute function must return "
0 commit comments