Skip to content

Commit 1282598

Browse files
committed
Fix up, add comments
1 parent 95519a1 commit 1282598

File tree

3 files changed

+16
-25
lines changed

3 files changed

+16
-25
lines changed

src/pb_stub.cc

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -719,12 +719,22 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
719719
ResponseBatch* response_batch_shm_ptr = reinterpret_cast<ResponseBatch*>(
720720
response_batch.value().data_.get() + sizeof(IPCMessageShm));
721721

722-
723-
// If the response sender is already closed, notify the backend NOT to
722+
// The backend will clean up the response factory if there is an error in
723+
// the response batch. It is necessary to handle cases where the response
724+
// sender should have already cleaned up, ensuring the backend does not
724725
// delete the response factory again during error handling.
725726
if (err_message.find("Response sender has been closed") !=
726727
std::string::npos) {
727728
response_batch_shm_ptr->is_response_factory_deleted = true;
729+
} else if (
730+
err_message.find("is using the decoupled mode and the execute function "
731+
"must return None") != std::string::npos) {
732+
for (py::handle py_request : py_request_list) {
733+
InferRequest* request = py_request.cast<InferRequest*>();
734+
if (request->GetResponseSender()->IsClosed()) {
735+
response_batch_shm_ptr->is_response_factory_deleted = true;
736+
}
737+
}
728738
}
729739

730740
response_batch_shm_ptr->has_error = true;
@@ -788,27 +798,6 @@ Stub::ProcessReturnedResponses(
788798
}
789799
// Only non-decoupled may return responses.
790800
if (IsDecoupled()) {
791-
// For decoupled mode, if before returning from this error, there was
792-
// already a response sent from the response sender, along with the complete
793-
// final flag, then use the `is_response_factory_deleted` flag to notify the
794-
// backend to NOT to delete the response factory again during error
795-
// handling.
796-
for (py::handle py_request : py_requests) {
797-
InferRequest* request = py_request.cast<InferRequest*>();
798-
if (request->GetResponseSender()->IsClosed()) {
799-
// Notify the backend to NOT to delete the response factory again during
800-
// error handling.
801-
if (!response_batch) {
802-
response_batch = std::move(shm_pool_->Construct<char>(
803-
sizeof(ResponseBatch) + sizeof(IPCMessageShm)));
804-
}
805-
ResponseBatch* response_batch_shm_ptr =
806-
reinterpret_cast<ResponseBatch*>(
807-
response_batch.value().data_.get() + sizeof(IPCMessageShm));
808-
response_batch_shm_ptr->is_response_factory_deleted = true;
809-
}
810-
}
811-
812801
throw PythonBackendException(
813802
"Python model '" + name_ +
814803
"' is using the decoupled mode and the execute function must return "

src/python_be.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1362,6 +1362,9 @@ ModelInstanceState::ProcessRequests(
13621362
reporter.SetBatchStatistics(total_batch_size);
13631363

13641364
if (response_batch_shm_ptr->has_error) {
1365+
// Clean up the response factory if an error occurred. The
1366+
// `is_response_factory_deleted` flag indicates whether the response factory
1367+
// has been deleted for some corner cases.
13651368
if (!response_batch_shm_ptr->is_response_factory_deleted) {
13661369
for (uint32_t r = 0; r < request_count; r++) {
13671370
TRITONBACKEND_ResponseFactory* response_factory =
@@ -1396,7 +1399,7 @@ ModelInstanceState::ProcessRequests(
13961399
// It is possible to have multiple responses batched together in a single
13971400
// response batch shm, where some of the responses are None due to the
13981401
// usage of response sender, so only create a TRITONBACKEND_Response
1399-
// object for the valid responses, and skip the None responses later.
1402+
// object for the valid responses.
14001403
if (response_shm_handle[i] == 0) {
14011404
responses->emplace_back(nullptr);
14021405
} else {

src/stub_launcher.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -728,7 +728,6 @@ TRITONSERVER_Error*
728728
StubLauncher::ReceiveMessageFromStub(
729729
bi::managed_external_buffer::handle_t& message)
730730
{
731-
// message = parent_message_queue_->Pop();
732731
bool success = false;
733732
while (!success) {
734733
uint64_t timeout_miliseconds = 1000;

0 commit comments

Comments
 (0)