Skip to content

Commit 3753f5c

Browse files
[QNN EP] Dump QNN json graph (microsoft#22843)
### Description Adds QNN provider options to save the generated QNN graphs into JSON files. These JSON files contain information about the QNN nodes, tensors, data types used in the corresponding graph. These JSON files can optionally be loaded by [QNN Netron](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/tools.html#qnn-netron-beta) to visualize the QNN graphs generated by QNN EP. New QNN provider options: - `"dump_json_qnn_graph"`: Set to "1" to dump QNN graphs generated by QNN EP as JSON files. Each graph partition assigned to QNN EP is dumped to a separate file. - `"json_qnn_graph_dir"`: Directory in which to dump QNN JSON graphs. If not specified, QNN graphs are dumped in the program's current working directory. Ignored if `"dump_json_qnn_graph"` is not set. ### Motivation and Context Makes it easier to debug, visualize, and understand the QNN graphs generated by QNN EP. --------- Co-authored-by: Hector Li <[email protected]>
1 parent 5803a14 commit 3753f5c

14 files changed

+453
-19
lines changed

cmake/onnxruntime_providers_qnn.cmake

+4-2
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
onnxruntime_add_static_library(onnxruntime_providers_qnn ${onnxruntime_providers_qnn_srcs})
2323
onnxruntime_add_include_to_target(onnxruntime_providers_qnn onnxruntime_common onnxruntime_framework onnx
2424
onnx_proto protobuf::libprotobuf-lite
25-
flatbuffers::flatbuffers Boost::mp11)
25+
flatbuffers::flatbuffers Boost::mp11
26+
nlohmann_json::nlohmann_json)
2627
add_dependencies(onnxruntime_providers_qnn onnx ${onnxruntime_EXTERNAL_DEPENDENCIES})
2728
set_target_properties(onnxruntime_providers_qnn PROPERTIES CXX_STANDARD_REQUIRED ON)
2829
set_target_properties(onnxruntime_providers_qnn PROPERTIES FOLDER "ONNXRuntime")
@@ -50,7 +51,8 @@
5051
source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_qnn_srcs})
5152
onnxruntime_add_shared_library_module(onnxruntime_providers_qnn ${onnxruntime_providers_qnn_srcs})
5253
onnxruntime_add_include_to_target(onnxruntime_providers_qnn ${ONNXRUNTIME_PROVIDERS_SHARED} ${GSL_TARGET} onnx
53-
onnxruntime_common Boost::mp11 safeint_interface)
54+
onnxruntime_common Boost::mp11 safeint_interface
55+
nlohmann_json::nlohmann_json)
5456
target_link_libraries(onnxruntime_providers_qnn PRIVATE ${ONNXRUNTIME_PROVIDERS_SHARED} ${ABSEIL_LIBS} ${CMAKE_DL_LIBS})
5557
add_dependencies(onnxruntime_providers_qnn onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
5658
target_include_directories(onnxruntime_providers_qnn PRIVATE ${ONNXRUNTIME_ROOT}

include/onnxruntime/core/session/onnxruntime_c_api.h

+4
Original file line numberDiff line numberDiff line change
@@ -3674,6 +3674,10 @@ struct OrtApi {
36743674
* be available.
36753675
* - "0": Default. Disabled.
36763676
* - "1": Enabled.
3677+
* "dump_json_qnn_graph": Set to "1" to dump QNN graphs generated by QNN EP as JSON files. Each graph partition
3678+
* assigned to QNN EP is dumped to a separate file.
3679+
* "json_qnn_graph_dir": Directory in which to dump QNN JSON graphs. If not specified, QNN graphs are dumped in the
3680+
* program's current working directory. Ignored if "dump_json_qnn_graph" is not set.
36773681
*
36783682
* SNPE supported keys:
36793683
* "runtime": SNPE runtime engine, options: "CPU", "CPU_FLOAT32", "GPU", "GPU_FLOAT32_16_HYBRID", "GPU_FLOAT16",

onnxruntime/core/providers/qnn/builder/qnn_def.cc

+11-6
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#include "core/providers/qnn/builder/qnn_def.h"
55
#include "core/providers/qnn/builder/qnn_utils.h"
6+
#include <functional>
67
#include <memory>
78
#include <ostream>
89
#include <cstring>
@@ -432,6 +433,15 @@ Status CompareQnnQuantParams(const Qnn_QuantizeParams_t& qparam0, const Qnn_Quan
432433
return Status::OK();
433434
}
434435

436+
uint32_t CalcQnnTensorNumElems(const Qnn_Tensor_t& qnn_tensor) {
437+
uint32_t* qnn_tensor_dims = GetQnnTensorDims(qnn_tensor);
438+
uint32_t qnn_tensor_rank = GetQnnTensorRank(qnn_tensor);
439+
return std::accumulate(qnn_tensor_dims,
440+
qnn_tensor_dims + qnn_tensor_rank,
441+
1,
442+
std::multiplies<uint32_t>());
443+
}
444+
435445
bool CreateTensorInQnnGraph(const QNN_INTERFACE_VER_TYPE& qnn_interface,
436446
const Qnn_GraphHandle_t& graph,
437447
const std::string& node_name,
@@ -466,12 +476,7 @@ bool CreateTensorInQnnGraph(const QNN_INTERFACE_VER_TYPE& qnn_interface,
466476
return false;
467477
}
468478
// verify size expressed by the dims matches the raw tensor size
469-
auto qnn_tensor_dims = GetQnnTensorDims(qnn_tensor);
470-
auto qnn_tensor_rank = GetQnnTensorRank(qnn_tensor);
471-
uint32_t qnn_tensor_size = std::accumulate(qnn_tensor_dims,
472-
qnn_tensor_dims + qnn_tensor_rank,
473-
static_cast<uint32_t>(data_size),
474-
std::multiplies<uint32_t>());
479+
uint32_t qnn_tensor_size = CalcQnnTensorNumElems(qnn_tensor) * gsl::narrow_cast<uint32_t>(data_size);
475480
auto qnn_tensor_buf_size = GetQnnTensorClientBuf(qnn_tensor).dataSize;
476481
if (qnn_tensor_size != qnn_tensor_buf_size) {
477482
ss << "Data length mismatch for static tensor. node_name: " << node_name

onnxruntime/core/providers/qnn/builder/qnn_def.h

+1
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ Qnn_DataType_t GetQnnTensorDataType(const Qnn_Tensor_t& qnn_tensor);
122122
Qnn_TensorMemType_t GetQnnTensorMemType(const Qnn_Tensor_t& qnn_tensor);
123123
uint32_t GetQnnTensorRank(const Qnn_Tensor_t& qnn_tensor);
124124
uint32_t* GetQnnTensorDims(const Qnn_Tensor_t& qnn_tensor);
125+
uint32_t CalcQnnTensorNumElems(const Qnn_Tensor_t& qnn_tensor);
125126
const Qnn_ClientBuffer_t& GetQnnTensorClientBuf(const Qnn_Tensor_t& qnn_tensor);
126127
Qnn_MemHandle_t GetQnnTensorMemHandle(const Qnn_Tensor_t& qnn_tensor);
127128
const Qnn_QuantizeParams_t& GetQnnTensorQParams(const Qnn_Tensor_t& qnn_tensor);

onnxruntime/core/providers/qnn/builder/qnn_model.cc

+17-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "qnn_model.h"
55

66
#include <iostream>
7+
#include <fstream>
78
#include <gsl/gsl>
89
#include "QnnOpDef.h"
910

@@ -94,7 +95,8 @@ Status QnnModel::ComposeGraph(const GraphViewer& graph_viewer,
9495
const onnxruntime::Node& fused_node,
9596
const qnn::ModelSettings& model_settings,
9697
const logging::Logger& logger,
97-
const QnnGraph_Config_t** graph_configs) {
98+
const QnnGraph_Config_t** graph_configs,
99+
const std::string& json_qnn_graph_path) {
98100
LOGS(logger, VERBOSE) << "ComposeGraph Graph name: " << graph_viewer.Name();
99101

100102
// Holder for the NodeUnits in the graph, this will guarantee the NodeUnits is
@@ -137,7 +139,20 @@ Status QnnModel::ComposeGraph(const GraphViewer& graph_viewer,
137139
}
138140
}
139141

140-
ORT_RETURN_IF_NOT(qnn_model_wrapper.ComposeQnnGraph(), "Failed to compose Qnn graph.");
142+
const bool build_json_graph = !json_qnn_graph_path.empty();
143+
ORT_RETURN_IF_NOT(qnn_model_wrapper.ComposeQnnGraph(build_json_graph), "Failed to compose Qnn graph.");
144+
145+
if (build_json_graph) {
146+
const nlohmann::json& json_graph = qnn_model_wrapper.GetQnnJSONGraph();
147+
std::ofstream ofs(json_qnn_graph_path);
148+
149+
if (ofs.is_open()) {
150+
ofs << json_graph.dump();
151+
ofs.close();
152+
} else {
153+
LOGS(logger, WARNING) << "Could not open JSON graph file: " << json_qnn_graph_path;
154+
}
155+
}
141156

142157
rt = GetGraphInfoFromModel(qnn_model_wrapper, logger);
143158
if (!rt) {

onnxruntime/core/providers/qnn/builder/qnn_model.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ class QnnModel {
3535
const onnxruntime::Node& fused_node,
3636
const qnn::ModelSettings& model_settings,
3737
const logging::Logger& logger,
38-
const QnnGraph_Config_t** graph_configs = nullptr);
38+
const QnnGraph_Config_t** graph_configs = nullptr,
39+
const std::string& json_qnn_graph_path = "");
3940

4041
Status FinalizeGraphs(const logging::Logger& logger);
4142

onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc

+5-1
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ bool QnnModelWrapper::CreateQnnNode(const std::string& qnn_node_name,
267267
}
268268
}
269269

270-
bool QnnModelWrapper::ComposeQnnGraph() {
270+
bool QnnModelWrapper::ComposeQnnGraph(bool build_json_qnn_graph) {
271271
LOGS(logger_, VERBOSE) << "Compose Qnn Graph.";
272272
// ORT_RETURN_IF(qnn_op_property_list_.empty(), "Empty Qnn op list, no graph to compose.");
273273
if (qnn_op_property_list_.empty()) {
@@ -306,6 +306,10 @@ bool QnnModelWrapper::ComposeQnnGraph() {
306306
LOGS(logger_, ERROR) << error_msg;
307307
return false;
308308
}
309+
310+
if (build_json_qnn_graph) {
311+
json_qnn_graph_.AddOp(op_config_wrapper);
312+
}
309313
}
310314

311315
return true;

onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h

+9-2
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@
88
#include <vector>
99

1010
#include "QnnInterface.h"
11-
#include "qnn_def.h"
11+
#include "nlohmann/json.hpp"
1212

1313
#include "core/providers/qnn/ort_api.h"
14+
#include "core/providers/qnn/builder/qnn_def.h"
1415
#include "core/providers/qnn/builder/qnn_quant_params_wrapper.h"
16+
#include "core/providers/qnn/builder/qnn_utils.h"
1517

1618
namespace onnxruntime {
1719
namespace qnn {
@@ -91,7 +93,7 @@ class QnnModelWrapper {
9193
std::vector<std::string>&& param_tensor_names,
9294
bool do_op_validation = false);
9395

94-
bool ComposeQnnGraph();
96+
bool ComposeQnnGraph(bool build_json_qnn_graph = false);
9597

9698
Qnn_GraphHandle_t GetQnnGraph() const { return graph_; }
9799

@@ -129,6 +131,10 @@ class QnnModelWrapper {
129131
return input_index_map_.find(tensor_name) != input_index_map_.end();
130132
}
131133

134+
const nlohmann::json& GetQnnJSONGraph() {
135+
return json_qnn_graph_.Finalize();
136+
}
137+
132138
Qnn_TensorType_t GetTensorType(const std::string& tensor_name) const {
133139
if (IsInitializerInput(tensor_name)) {
134140
return QNN_TENSOR_TYPE_STATIC;
@@ -321,6 +327,7 @@ class QnnModelWrapper {
321327
const std::unordered_set<std::string>& initializer_lookup_;
322328
QnnBackendType qnn_backend_type_ = QnnBackendType::CPU;
323329
ModelSettings model_settings_ = {};
330+
utils::QnnJSONGraph json_qnn_graph_;
324331
}; // QnnModelWrapper
325332

326333
} // namespace qnn

0 commit comments

Comments
 (0)