diff --git a/.gitignore b/.gitignore index 293f6455..bafd2974 100644 --- a/.gitignore +++ b/.gitignore @@ -138,3 +138,5 @@ dmypy.json # pytype static type analyzer .pytype/ +# vscode +.vscode/settings.json diff --git a/README.md b/README.md index a6242a44..b00dc0bf 100644 --- a/README.md +++ b/README.md @@ -803,8 +803,11 @@ You can read more about the inference response parameters in the [parameters extension](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_parameters.md) documentation. -Inference response parameters is currently not supported on BLS inference -responses received by BLS models. +The parameters associated with an inference response can be retrieved using the +`inference_response.parameters()` function. This function returns a JSON string +where the keys are the keys of the parameters object and the values are the +values for the parameters field. Note that you need to parse this string using +`json.loads` to convert it to a dictionary. ## Managing Python Runtime and Libraries diff --git a/src/request_executor.cc b/src/request_executor.cc index c197948d..3c51e626 100644 --- a/src/request_executor.cc +++ b/src/request_executor.cc @@ -84,6 +84,7 @@ InferResponseComplete( std::unique_ptr infer_response; std::vector> output_tensors; std::shared_ptr pb_error; + std::string parameters_string; if (response != nullptr) { try { @@ -140,6 +141,38 @@ InferResponseComplete( output_tensors.push_back(pb_tensor); } } + + triton::common::TritonJson::Value parameters_json( + triton::common::TritonJson::ValueType::OBJECT); + uint32_t parameter_count; + THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceResponseParameterCount( + response, ¶meter_count)); + + for (size_t i = 0; i < parameter_count; i++) { + const char* name; + TRITONSERVER_ParameterType type; + const void* vvalue; + THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceResponseParameter( + response, i, &name, &type, &vvalue)); + if (type == TRITONSERVER_PARAMETER_INT) { + THROW_IF_TRITON_ERROR(parameters_json.AddInt( + name, *(reinterpret_cast(vvalue)))); + } else if (type == TRITONSERVER_PARAMETER_BOOL) { + THROW_IF_TRITON_ERROR(parameters_json.AddBool( + name, *(reinterpret_cast(vvalue)))); + } else if (type == TRITONSERVER_PARAMETER_STRING) { + std::string string = reinterpret_cast(vvalue); + THROW_IF_TRITON_ERROR(parameters_json.AddString(name, string)); + } else { + throw PythonBackendException( + (std::string("Unsupported parameter type for parameter '") + + name + "'.")); + } + } + + triton::common::TritonJson::WriteBuffer buffer; + THROW_IF_TRITON_ERROR(parameters_json.Write(&buffer)); + parameters_string = buffer.Contents(); } catch (const PythonBackendException& pb_exception) { if (response != nullptr) { @@ -153,21 +186,20 @@ InferResponseComplete( output_tensors.clear(); } - // TODO: [DLIS-7864] Pass response parameters from BLS response. if (!infer_payload->IsDecoupled()) { infer_response = std::make_unique( - output_tensors, pb_error, "" /* parameters */, + output_tensors, pb_error, parameters_string, true /* is_last_response */); } else { if ((flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) == 0) { // Not the last response. infer_response = std::make_unique( - output_tensors, pb_error, "" /* parameters */, + output_tensors, pb_error, parameters_string, false /* is_last_response */, userp /* id */); } else { // The last response. infer_response = std::make_unique( - output_tensors, pb_error, "" /* parameters */, + output_tensors, pb_error, parameters_string, true /* is_last_response */, userp /* id */); } }