Skip to content

Commit f61d423

Browse files
authored
feat: Extend response parameters support to BLS in python backend (#395)
1 parent 1ea48a6 commit f61d423

File tree

3 files changed

+43
-6
lines changed

3 files changed

+43
-6
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,3 +138,5 @@ dmypy.json
138138
# pytype static type analyzer
139139
.pytype/
140140

141+
# vscode
142+
.vscode/settings.json

README.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -803,8 +803,11 @@ You can read more about the inference response parameters in the [parameters
803803
extension](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_parameters.md)
804804
documentation.
805805

806-
Inference response parameters is currently not supported on BLS inference
807-
responses received by BLS models.
806+
The parameters associated with an inference response can be retrieved using the
807+
`inference_response.parameters()` function. This function returns a JSON string
808+
where the keys are the keys of the parameters object and the values are the
809+
values for the parameters field. Note that you need to parse this string using
810+
`json.loads` to convert it to a dictionary.
808811

809812
## Managing Python Runtime and Libraries
810813

src/request_executor.cc

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ InferResponseComplete(
8484
std::unique_ptr<InferResponse> infer_response;
8585
std::vector<std::shared_ptr<PbTensor>> output_tensors;
8686
std::shared_ptr<PbError> pb_error;
87+
std::string parameters_string;
8788

8889
if (response != nullptr) {
8990
try {
@@ -140,6 +141,38 @@ InferResponseComplete(
140141
output_tensors.push_back(pb_tensor);
141142
}
142143
}
144+
145+
triton::common::TritonJson::Value parameters_json(
146+
triton::common::TritonJson::ValueType::OBJECT);
147+
uint32_t parameter_count;
148+
THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceResponseParameterCount(
149+
response, &parameter_count));
150+
151+
for (size_t i = 0; i < parameter_count; i++) {
152+
const char* name;
153+
TRITONSERVER_ParameterType type;
154+
const void* vvalue;
155+
THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceResponseParameter(
156+
response, i, &name, &type, &vvalue));
157+
if (type == TRITONSERVER_PARAMETER_INT) {
158+
THROW_IF_TRITON_ERROR(parameters_json.AddInt(
159+
name, *(reinterpret_cast<const int64_t*>(vvalue))));
160+
} else if (type == TRITONSERVER_PARAMETER_BOOL) {
161+
THROW_IF_TRITON_ERROR(parameters_json.AddBool(
162+
name, *(reinterpret_cast<const bool*>(vvalue))));
163+
} else if (type == TRITONSERVER_PARAMETER_STRING) {
164+
std::string string = reinterpret_cast<const char*>(vvalue);
165+
THROW_IF_TRITON_ERROR(parameters_json.AddString(name, string));
166+
} else {
167+
throw PythonBackendException(
168+
(std::string("Unsupported parameter type for parameter '") +
169+
name + "'."));
170+
}
171+
}
172+
173+
triton::common::TritonJson::WriteBuffer buffer;
174+
THROW_IF_TRITON_ERROR(parameters_json.Write(&buffer));
175+
parameters_string = buffer.Contents();
143176
}
144177
catch (const PythonBackendException& pb_exception) {
145178
if (response != nullptr) {
@@ -153,21 +186,20 @@ InferResponseComplete(
153186
output_tensors.clear();
154187
}
155188

156-
// TODO: [DLIS-7864] Pass response parameters from BLS response.
157189
if (!infer_payload->IsDecoupled()) {
158190
infer_response = std::make_unique<InferResponse>(
159-
output_tensors, pb_error, "" /* parameters */,
191+
output_tensors, pb_error, parameters_string,
160192
true /* is_last_response */);
161193
} else {
162194
if ((flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) == 0) {
163195
// Not the last response.
164196
infer_response = std::make_unique<InferResponse>(
165-
output_tensors, pb_error, "" /* parameters */,
197+
output_tensors, pb_error, parameters_string,
166198
false /* is_last_response */, userp /* id */);
167199
} else {
168200
// The last response.
169201
infer_response = std::make_unique<InferResponse>(
170-
output_tensors, pb_error, "" /* parameters */,
202+
output_tensors, pb_error, parameters_string,
171203
true /* is_last_response */, userp /* id */);
172204
}
173205
}

0 commit comments

Comments
 (0)