@@ -1089,6 +1089,17 @@ ModelInstanceState::ResponseSendDecoupled(
1089
1089
ResponseSendMessage* send_message_payload =
1090
1090
reinterpret_cast <ResponseSendMessage*>(send_message.data_ .get ());
1091
1091
std::unique_ptr<PbString> error_message;
1092
+ ScopedDefer response_factory_deleter ([send_message_payload] {
1093
+ if (send_message_payload->flags == TRITONSERVER_RESPONSE_COMPLETE_FINAL) {
1094
+ TRITONBACKEND_ResponseFactory* response_factory =
1095
+ reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
1096
+ send_message_payload->response_factory_address );
1097
+ std::unique_ptr<
1098
+ TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
1099
+ lresponse_factory (reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
1100
+ response_factory));
1101
+ }
1102
+ });
1092
1103
ScopedDefer _ ([send_message_payload] {
1093
1104
{
1094
1105
bi::scoped_lock<bi::interprocess_mutex> guard{send_message_payload->mu };
@@ -1214,13 +1225,6 @@ ModelInstanceState::ResponseSendDecoupled(
1214
1225
SetErrorForResponseSendMessage (
1215
1226
send_message_payload, WrapTritonErrorInSharedPtr (error), error_message);
1216
1227
}
1217
-
1218
- if (send_message_payload->flags == TRITONSERVER_RESPONSE_COMPLETE_FINAL) {
1219
- std::unique_ptr<
1220
- TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
1221
- lresponse_factory (
1222
- reinterpret_cast <TRITONBACKEND_ResponseFactory*>(response_factory));
1223
- }
1224
1228
}
1225
1229
1226
1230
TRITONSERVER_Error*
@@ -1291,6 +1295,15 @@ ModelInstanceState::ProcessRequests(
1291
1295
1292
1296
if (response_batch_shm_ptr->has_error ) {
1293
1297
if (response_batch_shm_ptr->is_error_set ) {
1298
+ for (uint32_t r = 0 ; r < request_count; r++) {
1299
+ TRITONBACKEND_ResponseFactory* response_factory =
1300
+ reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
1301
+ pb_infer_requests[r]->GetResponseFactoryAddress ());
1302
+ std::unique_ptr<
1303
+ TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
1304
+ lresponse_factory (reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
1305
+ response_factory));
1306
+ }
1294
1307
auto error = PbString::LoadFromSharedMemory (
1295
1308
Stub ()->ShmPool (), response_batch_shm_ptr->error );
1296
1309
return TRITONSERVER_ErrorNew (
@@ -1357,6 +1370,16 @@ ModelInstanceState::ProcessRequests(
1357
1370
(*responses)[r] = nullptr ;
1358
1371
continue ;
1359
1372
}
1373
+ {
1374
+ TRITONBACKEND_ResponseFactory* response_factory =
1375
+ reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
1376
+ pb_infer_requests[r]->GetResponseFactoryAddress ());
1377
+ std::unique_ptr<
1378
+ TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
1379
+ lresponse_factory (
1380
+ reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
1381
+ response_factory));
1382
+ }
1360
1383
infer_response = InferResponse::LoadFromSharedMemory (
1361
1384
Stub ()->ShmPool (), response_shm_handle[r],
1362
1385
false /* open_cuda_handle */ );
0 commit comments