triton-inference-server · ziqif-nv · Feb 6, 2025 · Jan 30, 2025 · Jan 30, 2025 · Jan 30, 2025
diff --git a/.gitignore b/.gitignore
@@ -138,3 +138,5 @@ dmypy.json
 # pytype static type analyzer
 .pytype/
 
+# vscode
+.vscode/settings.json
diff --git a/README.md b/README.md
@@ -803,8 +803,11 @@ You can read more about the inference response parameters in the [parameters
 extension](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_parameters.md)
 documentation.
 
-Inference response parameters is currently not supported on BLS inference
-responses received by BLS models.
+The parameters associated with an inference response can be retrieved using the
+`inference_response.parameters()` function. This function returns a JSON string
+where the keys are the keys of the parameters object and the values are the
+values for the parameters field. Note that you need to parse this string using
+`json.loads` to convert it to a dictionary.
 
 ## Managing Python Runtime and Libraries
 

diff --git a/src/request_executor.cc b/src/request_executor.cc
@@ -84,6 +84,7 @@ InferResponseComplete(
   std::unique_ptr<InferResponse> infer_response;
   std::vector<std::shared_ptr<PbTensor>> output_tensors;
   std::shared_ptr<PbError> pb_error;
+  std::string parameters_string;
 
   if (response != nullptr) {
     try {
@@ -140,6 +141,38 @@ InferResponseComplete(
           output_tensors.push_back(pb_tensor);
         }
       }
+
+      triton::common::TritonJson::Value parameters_json(
+          triton::common::TritonJson::ValueType::OBJECT);
+      uint32_t parameter_count;
+      THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceResponseParameterCount(
+          response, &parameter_count));
+
+      for (size_t i = 0; i < parameter_count; i++) {
+        const char* name;
+        TRITONSERVER_ParameterType type;
+        const void* vvalue;
+        THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceResponseParameter(
+            response, i, &name, &type, &vvalue));
+        if (type == TRITONSERVER_PARAMETER_INT) {
+          THROW_IF_TRITON_ERROR(parameters_json.AddInt(
+              name, *(reinterpret_cast<const int64_t*>(vvalue))));
+        } else if (type == TRITONSERVER_PARAMETER_BOOL) {
+          THROW_IF_TRITON_ERROR(parameters_json.AddBool(
+              name, *(reinterpret_cast<const bool*>(vvalue))));
+        } else if (type == TRITONSERVER_PARAMETER_STRING) {
+          std::string string = reinterpret_cast<const char*>(vvalue);
+          THROW_IF_TRITON_ERROR(parameters_json.AddString(name, string));
+        } else {
+          throw PythonBackendException(
+              (std::string("Unsupported parameter type for parameter '") +
+               name + "'."));
+        }
+      }
+
+      triton::common::TritonJson::WriteBuffer buffer;
+      THROW_IF_TRITON_ERROR(parameters_json.Write(&buffer));
+      parameters_string = buffer.Contents();
     }
     catch (const PythonBackendException& pb_exception) {
       if (response != nullptr) {
@@ -153,21 +186,20 @@ InferResponseComplete(
       output_tensors.clear();
     }
 
-    // TODO: [DLIS-7864] Pass response parameters from BLS response.
     if (!infer_payload->IsDecoupled()) {
       infer_response = std::make_unique<InferResponse>(
-          output_tensors, pb_error, "" /* parameters */,
+          output_tensors, pb_error, parameters_string,
           true /* is_last_response */);
     } else {
       if ((flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) == 0) {
         // Not the last response.
         infer_response = std::make_unique<InferResponse>(
-            output_tensors, pb_error, "" /* parameters */,
+            output_tensors, pb_error, parameters_string,
             false /* is_last_response */, userp /* id */);
       } else {
         // The last response.
         infer_response = std::make_unique<InferResponse>(
-            output_tensors, pb_error, "" /* parameters */,
+            output_tensors, pb_error, parameters_string,
             true /* is_last_response */, userp /* id */);
       }
     }