@@ -654,7 +654,6 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
654
654
py::list py_request_list =
655
655
LoadRequestsFromSharedMemory (request_batch_shm_ptr);
656
656
std::unique_ptr<IPCMessage> execute_response;
657
- // IPCMessage::Create(shm_pool_, false /* Inline response */);
658
657
659
658
std::optional<AllocatedSharedMemory<char >> response_batch;
660
659
bool has_exception = false ;
@@ -675,8 +674,7 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
675
674
{
676
675
NVTX_RANGE (nvtx_, " PyExecute " + name_);
677
676
678
- execute_return =
679
- model_instance_.attr (" execute" )(py_request_list);
677
+ execute_return = model_instance_.attr (" execute" )(py_request_list);
680
678
681
679
bool is_coroutine = py::module::import (" asyncio" )
682
680
.attr (" iscoroutine" )(execute_return)
@@ -688,10 +686,12 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
688
686
} else {
689
687
py::object coroutine_return =
690
688
RunCoroutine (execute_return, false /* in_background */ );
691
- ProcessReturnedResponses (py_request_list, coroutine_return, response_batch);
689
+ ProcessReturnedResponses (
690
+ py_request_list, coroutine_return, response_batch);
692
691
}
693
692
} else {
694
- ProcessReturnedResponses (py_request_list, execute_return, response_batch);
693
+ ProcessReturnedResponses (
694
+ py_request_list, execute_return, response_batch);
695
695
}
696
696
}
697
697
}
@@ -712,11 +712,14 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
712
712
error_string;
713
713
LOG_ERROR << err_message.c_str ();
714
714
if (!response_batch) {
715
- response_batch = shm_pool_->Construct <char >(sizeof (ResponseBatch) + sizeof (IPCMessageShm));
716
- }
717
- ResponseBatch* response_batch_shm_ptr = reinterpret_cast <ResponseBatch*>(response_batch.value ().data_ .get () + sizeof (IPCMessageShm));
715
+ response_batch = shm_pool_->Construct <char >(
716
+ sizeof (ResponseBatch) + sizeof (IPCMessageShm));
717
+ }
718
+ ResponseBatch* response_batch_shm_ptr = reinterpret_cast <ResponseBatch*>(
719
+ response_batch.value ().data_ .get () + sizeof (IPCMessageShm));
718
720
719
- response_batch_shm_ptr = reinterpret_cast <ResponseBatch*>(response_batch.value ().data_ .get ());
721
+ response_batch_shm_ptr =
722
+ reinterpret_cast <ResponseBatch*>(response_batch.value ().data_ .get ());
720
723
response_batch_shm_ptr->has_error = true ;
721
724
error_string_shm = PbString::Create (shm_pool_, err_message);
722
725
response_batch_shm_ptr->error = error_string_shm->ShmHandle ();
@@ -732,14 +735,19 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
732
735
}
733
736
734
737
if (!response_batch) {
735
- response_batch = shm_pool_->Construct <char >(sizeof (ResponseBatch) + sizeof (IPCMessageShm));
736
- ResponseBatch* response_batch_shm_ptr =reinterpret_cast <ResponseBatch*>(response_batch.value ().data_ .get () + sizeof (IPCMessageShm));
737
- response_batch_shm_ptr->batch_size = 0 ;
738
- }
739
- ResponseBatch* response_batch_shm_ptr = reinterpret_cast <ResponseBatch*>(response_batch.value ().data_ .get () + sizeof (IPCMessageShm));
738
+ response_batch = shm_pool_->Construct <char >(
739
+ sizeof (ResponseBatch) + sizeof (IPCMessageShm));
740
+ ResponseBatch* response_batch_shm_ptr = reinterpret_cast <ResponseBatch*>(
741
+ response_batch.value ().data_ .get () + sizeof (IPCMessageShm));
742
+ response_batch_shm_ptr->batch_size = 0 ;
743
+ }
744
+ ResponseBatch* response_batch_shm_ptr = reinterpret_cast <ResponseBatch*>(
745
+ response_batch.value ().data_ .get () + sizeof (IPCMessageShm));
740
746
response_batch_shm_ptr->has_error = false ;
741
747
response_batch_shm_ptr->is_error_set = false ;
742
- execute_response = IPCMessage::Create (reinterpret_cast <IPCMessageShm*>(response_batch.value ().data_ .get ()), response_batch.value ().handle_ );
748
+ execute_response = IPCMessage::Create (
749
+ reinterpret_cast <IPCMessageShm*>(response_batch.value ().data_ .get ()),
750
+ response_batch.value ().handle_ );
743
751
execute_response->Args () = response_batch.value ().handle_ ;
744
752
execute_response->InlineResponse () = false ;
745
753
execute_response->Command () = PYTHONSTUB_ExecuteResponse;
@@ -761,7 +769,8 @@ Stub::ProcessResponse(InferResponse* response)
761
769
762
770
void
763
771
Stub::ProcessReturnedResponses (
764
- py::list py_requests, py::object py_responses_obj, std::optional<AllocatedSharedMemory<char >>& response_batch)
772
+ py::list py_requests, py::object py_responses_obj,
773
+ std::optional<AllocatedSharedMemory<char >>& response_batch)
765
774
{
766
775
// Return if there is nothing to process.
767
776
if (py::isinstance<py::none>(py_responses_obj)) {
@@ -812,29 +821,34 @@ Stub::ProcessReturnedResponses(
812
821
813
822
std::shared_ptr<InferResponse> response =
814
823
py_responses[i].cast <std::shared_ptr<InferResponse>>();
815
- request->GetResponseSender ()->UpdateStateAndCounters (response, TRITONSERVER_RESPONSE_COMPLETE_FINAL);
824
+ request->GetResponseSender ()->UpdateStateAndCounters (
825
+ response, TRITONSERVER_RESPONSE_COMPLETE_FINAL);
816
826
}
817
827
}
818
- response_batch = std::move (shm_pool_->Construct <char >(sizeof (IPCMessageShm) +
828
+ // Return all the created responses using response_batch. The reason
829
+ // that both of the paths are available is that sending the responses
830
+ // using response_batch is faster than using `response_sender`.
831
+ response_batch = std::move (shm_pool_->Construct <char >(
832
+ sizeof (IPCMessageShm) +
819
833
requests_size * sizeof (bi::managed_external_buffer::handle_t ) +
820
834
sizeof (ResponseBatch)));
821
- ResponseBatch* response_batch_shm_ptr =
822
- reinterpret_cast <ResponseBatch*>( response_batch.value ().data_ .get () + sizeof (IPCMessageShm));
835
+ ResponseBatch* response_batch_shm_ptr = reinterpret_cast <ResponseBatch*>(
836
+ response_batch.value ().data_ .get () + sizeof (IPCMessageShm));
823
837
824
838
bi::managed_external_buffer::handle_t * responses_shm_handle =
825
839
reinterpret_cast <bi::managed_external_buffer::handle_t *>(
826
- response_batch.value ().data_ .get () + sizeof (ResponseBatch) + sizeof (IPCMessageShm));
827
-
828
- for ( size_t i = 0 ; i < responses_size; i++) {
829
- // Check the return type of execute function.
830
- InferRequest* infer_request = py_requests[i]. cast <InferRequest*>();
831
- InferResponse* infer_response = py_responses [i].cast <InferResponse *>();
832
- infer_response-> PruneOutputTensors (
833
- infer_request->RequestedOutputNames ());
834
- ProcessResponse (infer_response);
835
- responses_shm_handle[i] = infer_response->ShmHandle ();
836
- }
837
- response_batch_shm_ptr->batch_size = requests_size;
840
+ response_batch.value ().data_ .get () + sizeof (ResponseBatch) +
841
+ sizeof (IPCMessageShm));
842
+
843
+ for ( size_t i = 0 ; i < responses_size; i++) {
844
+ // Check the return type of execute function.
845
+ InferRequest* infer_request = py_requests [i].cast <InferRequest *>();
846
+ InferResponse* infer_response = py_responses[i]. cast <InferResponse*>();
847
+ infer_response-> PruneOutputTensors ( infer_request->RequestedOutputNames ());
848
+ ProcessResponse (infer_response);
849
+ responses_shm_handle[i] = infer_response->ShmHandle ();
850
+ }
851
+ response_batch_shm_ptr->batch_size = requests_size;
838
852
}
839
853
840
854
py::object
0 commit comments