From eb9ab2a73fb9a7d3a5355afce3cf08fddbbbecf7 Mon Sep 17 00:00:00 2001 From: Edward Chen <18449977+edgchen1@users.noreply.github.com> Date: Sun, 23 Feb 2025 22:20:09 -0800 Subject: [PATCH 1/4] [QNN EP] Use absolute path of libcdsprpc.dll on Windows so it doesn't need to be copied anywhere. (#23791) ### Description Look up and use absolute path of libcdsprpc.dll on Windows. ### Motivation and Context The QNN EP's HTP shared memory allocator requires use of the libcdsprpc shared library. On Windows, this previously required copying libcdsprpc.dll from some driver-specific path to somewhere the running code could find it. After this change, libcdsprpc.dll no longer needs to be copied. --- .../core/providers/qnn/rpcmem_library.cc | 159 +++++++++++++++--- .../core/providers/qnn/rpcmem_library.h | 8 +- .../test/providers/qnn/qnn_basic_test.cc | 14 +- onnxruntime/test/shared_lib/test_inference.cc | 15 +- 4 files changed, 146 insertions(+), 50 deletions(-) diff --git a/onnxruntime/core/providers/qnn/rpcmem_library.cc b/onnxruntime/core/providers/qnn/rpcmem_library.cc index 93c5ed54ab371..20918f8bc6de1 100644 --- a/onnxruntime/core/providers/qnn/rpcmem_library.cc +++ b/onnxruntime/core/providers/qnn/rpcmem_library.cc @@ -2,44 +2,157 @@ // Licensed under the MIT License #include "core/providers/qnn/rpcmem_library.h" + +#if defined(_WIN32) +#include + +#include +#include +#endif // defined(_WIN32) + #include "core/providers/qnn/ort_api.h" namespace onnxruntime::qnn { +// Unload the dynamic library referenced by `library_handle`. +// Avoid throwing because this may run from a dtor. +void DynamicLibraryHandleDeleter::operator()(void* library_handle) noexcept { + if (library_handle == nullptr) { + return; + } + + const auto& env = GetDefaultEnv(); + const auto unload_status = env.UnloadDynamicLibrary(library_handle); + + if (!unload_status.IsOK()) { + LOGS_DEFAULT(WARNING) << "Failed to unload dynamic library. Error: " << unload_status.ErrorMessage(); + } +} + namespace { -const PathChar* GetRpcMemSharedLibraryPath() { #if defined(_WIN32) - return ORT_TSTR("libcdsprpc.dll"); -#else - return ORT_TSTR("libcdsprpc.so"); -#endif + +struct ServiceHandleDeleter { + void operator()(SC_HANDLE handle) { ::CloseServiceHandle(handle); } +}; + +using UniqueServiceHandle = std::unique_ptr, ServiceHandleDeleter>; + +Status ReadEnvironmentVariable(const wchar_t* name, std::wstring& value_out) { + const DWORD value_size = ::GetEnvironmentVariableW(name, nullptr, 0); + ORT_RETURN_IF(value_size == 0, + "Failed to get environment variable length. GetEnvironmentVariableW error: ", ::GetLastError()); + + std::vector value(value_size); + + ORT_RETURN_IF(::GetEnvironmentVariableW(name, value.data(), value_size) == 0, + "Failed to get environment variable value. GetEnvironmentVariableW error: ", ::GetLastError()); + + value_out = std::wstring{value.data()}; + return Status::OK(); } -DynamicLibraryHandle LoadDynamicLibrary(const PathString& path, bool global_symbols) { - // Custom deleter to unload the shared library. Avoid throwing from it because it may run in dtor. - const auto unload_library = [](void* library_handle) { - if (library_handle == nullptr) { - return; - } +Status GetServiceBinaryDirectoryPath(const wchar_t* service_name, + std::filesystem::path& service_binary_directory_path_out) { + SC_HANDLE scm_handle_raw = ::OpenSCManagerW(nullptr, // local computer + nullptr, // SERVICES_ACTIVE_DATABASE + STANDARD_RIGHTS_READ); + ORT_RETURN_IF(scm_handle_raw == nullptr, + "Failed to open handle to service control manager. OpenSCManagerW error: ", ::GetLastError()); + + auto scm_handle = UniqueServiceHandle{scm_handle_raw}; + + SC_HANDLE service_handle_raw = ::OpenServiceW(scm_handle.get(), + service_name, + SERVICE_QUERY_CONFIG); + ORT_RETURN_IF(service_handle_raw == nullptr, + "Failed to open service handle. OpenServiceW error: ", ::GetLastError()); + + auto service_handle = UniqueServiceHandle{service_handle_raw}; + + // get service config required buffer size + DWORD service_config_buffer_size{}; + ORT_RETURN_IF(!::QueryServiceConfigW(service_handle.get(), nullptr, 0, &service_config_buffer_size) && + ::GetLastError() != ERROR_INSUFFICIENT_BUFFER, + "Failed to query service configuration buffer size. QueryServiceConfigW error: ", ::GetLastError()); - const auto& env = GetDefaultEnv(); - const auto unload_status = env.UnloadDynamicLibrary(library_handle); + // get the service config + std::vector service_config_buffer(service_config_buffer_size); + QUERY_SERVICE_CONFIGW* service_config = reinterpret_cast(service_config_buffer.data()); + ORT_RETURN_IF(!::QueryServiceConfigW(service_handle.get(), service_config, service_config_buffer_size, + &service_config_buffer_size), + "Failed to query service configuration. QueryServiceConfigW error: ", ::GetLastError()); - if (!unload_status.IsOK()) { - LOGS_DEFAULT(WARNING) << "Failed to unload shared library. Error: " << unload_status.ErrorMessage(); - } - }; + std::wstring service_binary_path_name = service_config->lpBinaryPathName; + // replace system root placeholder with the value of the SYSTEMROOT environment variable + const std::wstring system_root_placeholder = L"\\SystemRoot"; + + ORT_RETURN_IF(service_binary_path_name.find(system_root_placeholder, 0) != 0, + "Service binary path '", ToUTF8String(service_binary_path_name), + "' does not start with expected system root placeholder value '", + ToUTF8String(system_root_placeholder), "'."); + + std::wstring system_root{}; + ORT_RETURN_IF_ERROR(ReadEnvironmentVariable(L"SYSTEMROOT", system_root)); + service_binary_path_name.replace(0, system_root_placeholder.size(), system_root); + + const auto service_binary_path = std::filesystem::path{service_binary_path_name}; + auto service_binary_directory_path = service_binary_path.parent_path(); + + ORT_RETURN_IF(!std::filesystem::exists(service_binary_directory_path), + "Service binary directory path does not exist: ", service_binary_directory_path.string()); + + service_binary_directory_path_out = std::move(service_binary_directory_path); + return Status::OK(); +} + +#endif // defined(_WIN32) + +Status GetRpcMemDynamicLibraryPath(PathString& path_out) { +#if defined(_WIN32) + + std::filesystem::path qcnspmcdm_dir_path{}; + ORT_RETURN_IF_ERROR(GetServiceBinaryDirectoryPath(L"qcnspmcdm", qcnspmcdm_dir_path)); + const auto libcdsprpc_path = qcnspmcdm_dir_path / L"libcdsprpc.dll"; + path_out = libcdsprpc_path.wstring(); + return Status::OK(); + +#else // ^^^ defined(_WIN32) / vvv !defined(_WIN32) + + path_out = ORT_TSTR("libcdsprpc.so"); + return Status::OK(); + +#endif // !defined(_WIN32) +} + +Status LoadDynamicLibrary(const PathString& path, bool global_symbols, + UniqueDynamicLibraryHandle& library_handle_out) { const auto& env = GetDefaultEnv(); - void* library_handle = nullptr; + void* library_handle_raw = nullptr; + ORT_RETURN_IF_ERROR(env.LoadDynamicLibrary(path, global_symbols, &library_handle_raw)); + + library_handle_out = UniqueDynamicLibraryHandle{library_handle_raw}; + return Status::OK(); +} + +UniqueDynamicLibraryHandle GetRpcMemDynamicLibraryHandle() { + std::string_view error_message_prefix = "Failed to initialize RPCMEM dynamic library handle: "; + + PathString rpcmem_library_path{}; + auto status = GetRpcMemDynamicLibraryPath(rpcmem_library_path); + if (!status.IsOK()) { + ORT_THROW(error_message_prefix, status.ErrorMessage()); + } - const auto load_status = env.LoadDynamicLibrary(path, global_symbols, &library_handle); - if (!load_status.IsOK()) { - ORT_THROW("Failed to load ", ToUTF8String(path), ": ", load_status.ErrorMessage()); + UniqueDynamicLibraryHandle library_handle{}; + status = LoadDynamicLibrary(rpcmem_library_path, /* global_symbols */ false, library_handle); + if (!status.IsOK()) { + ORT_THROW(error_message_prefix, status.ErrorMessage()); } - return DynamicLibraryHandle{library_handle, unload_library}; + return library_handle; } RpcMemApi CreateApi(void* library_handle) { @@ -58,7 +171,7 @@ RpcMemApi CreateApi(void* library_handle) { } // namespace RpcMemLibrary::RpcMemLibrary() - : library_handle_(LoadDynamicLibrary(GetRpcMemSharedLibraryPath(), /* global_symbols */ false)), + : library_handle_(GetRpcMemDynamicLibraryHandle()), api_{CreateApi(library_handle_.get())} { } diff --git a/onnxruntime/core/providers/qnn/rpcmem_library.h b/onnxruntime/core/providers/qnn/rpcmem_library.h index 0642c96798188..2746e147373bb 100644 --- a/onnxruntime/core/providers/qnn/rpcmem_library.h +++ b/onnxruntime/core/providers/qnn/rpcmem_library.h @@ -10,7 +10,11 @@ namespace onnxruntime::qnn { -using DynamicLibraryHandle = std::unique_ptr; +struct DynamicLibraryHandleDeleter { + void operator()(void* library_handle) noexcept; +}; + +using UniqueDynamicLibraryHandle = std::unique_ptr; // This namespace contains constants and typedefs corresponding to functions from rpcmem.h. // https://github.com/quic/fastrpc/blob/v0.1.1/inc/rpcmem.h @@ -61,7 +65,7 @@ class RpcMemLibrary { const RpcMemApi& Api() const { return api_; } private: - DynamicLibraryHandle library_handle_; + UniqueDynamicLibraryHandle library_handle_; RpcMemApi api_; }; diff --git a/onnxruntime/test/providers/qnn/qnn_basic_test.cc b/onnxruntime/test/providers/qnn/qnn_basic_test.cc index 98d07fa06c009..0b51b6f8e503d 100644 --- a/onnxruntime/test/providers/qnn/qnn_basic_test.cc +++ b/onnxruntime/test/providers/qnn/qnn_basic_test.cc @@ -1166,18 +1166,8 @@ TEST_F(QnnHTPBackendTests, UseHtpSharedMemoryAllocatorForInputs) { try { qnn_ep = QnnExecutionProviderWithOptions(provider_options); } catch (const OnnxRuntimeException& e) { - // handle particular exception that indicates that the libcdsprpc.so / dll can't be loaded - // NOTE: To run this on a local Windows ARM64 device, you need to copy libcdsprpc.dll to the build directory: - // - Open File Explorer - // - Go to C:/Windows/System32/DriverStore/FileRepository/ - // - Search for a folder that begins with qcnspmcdm8380.inf_arm64_ and open it - // - Copy the libcdsprpc.dll into the build/[PATH CONTAINING onnxruntime.dll] directory of the application. - // TODO(adrianlizarraga): Update CMake build for unittests to automatically copy libcdsprpc.dll into build directory -#if defined(_WIN32) - constexpr const char* expected_error_message = "Failed to load libcdsprpc.dll"; -#else - constexpr const char* expected_error_message = "Failed to load libcdsprpc.so"; -#endif + // handle exception that indicates that the libcdsprpc.so / dll can't be loaded + constexpr const char* expected_error_message = "Failed to initialize RPCMEM dynamic library handle"; ASSERT_THAT(e.what(), testing::HasSubstr(expected_error_message)); GTEST_SKIP() << "HTP shared memory allocator is unavailable."; } diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc index 59920487a7248..ca9ca0f82a25a 100644 --- a/onnxruntime/test/shared_lib/test_inference.cc +++ b/onnxruntime/test/shared_lib/test_inference.cc @@ -1960,20 +1960,9 @@ static bool CreateSessionWithQnnEpAndQnnHtpSharedMemoryAllocator(PATH_TYPE model session = Ort::Session{*ort_env, model_path, session_options}; return true; } catch (const Ort::Exception& e) { - // handle particular exception that indicates that the libcdsprpc.so / dll can't be loaded - // NOTE: To run this on a local Windows ARM64 device, you need to copy libcdsprpc.dll to the build directory: - // - Open File Explorer - // - Go to C:/Windows/System32/DriverStore/FileRepository/ - // - Search for a folder that begins with qcnspmcdm8380.inf_arm64_ and open it - // - Copy the libcdsprpc.dll into the build/[PATH CONTAINING onnxruntime.dll] directory of the application. - // TODO(adrianlizarraga): Update CMake build for unittests to automatically copy libcdsprpc.dll into build directory + // handle exception that indicates that the libcdsprpc.so / dll can't be loaded std::string_view error_message = e.what(); - -#if defined(_WIN32) - std::string_view expected_error_message = "Failed to load libcdsprpc.dll"; -#else - std::string_view expected_error_message = "Failed to load libcdsprpc.so"; -#endif + std::string_view expected_error_message = "Failed to initialize RPCMEM dynamic library handle"; if (e.GetOrtErrorCode() == ORT_FAIL && error_message.find(expected_error_message) != std::string_view::npos) { From aca623a2fc8427a6d072b4d31e55881abf3ef1e0 Mon Sep 17 00:00:00 2001 From: Yifan Li <109183385+yf711@users.noreply.github.com> Date: Wed, 26 Feb 2025 17:25:44 -0800 Subject: [PATCH 2/4] [TensorRT EP] update oss parser to latest (#23710) ### Description * Update oss parser version to latest commit of 10.8-GA branch ### Motivation and Context * Action needed to adapt latest onnx-tensorrt 10.8-GA branch to fix scatterND attribute issue and `plugin.h` not found issue --- cmake/deps.txt | 2 +- .../azure-pipelines/linux-gpu-tensorrt-daily-perf-pipeline.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/deps.txt b/cmake/deps.txt index d2e0fd63215f4..d0bab93d3c16f 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -37,7 +37,7 @@ mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.1.1.zip;d5ee mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.82.0.zip;9bc9e01dffb64d9e0773b2e44d2f22c51aace063 onnx;https://github.com/onnx/onnx/archive/refs/tags/v1.17.0.zip;13a60ac5217c104139ce0fd024f48628e7bcf5bc # Use the latest commit of 10.8-GA -onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/118ed0aea197fa9a7d3ea66180a1d5ddb9deecc3.zip;b78aed3728ad4daf6dc47ea10c1d243dee1d95b1 +onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/c5ca8912f30e9ad630a0ef565e3d5f4bd5e91563.zip;588b294aaa9e84679ed5815cea1d399210ac98c2 protobuf;https://github.com/protocolbuffers/protobuf/archive/refs/tags/v21.12.zip;7cf2733949036c7d52fda017badcab093fe73bfa protoc_win64;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip;b4521f7ada5b260380f94c4bd7f1b7684c76969a protoc_win32;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win32.zip;3688010318192c46ce73213cdfb6b3e5656da874 diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-daily-perf-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-daily-perf-pipeline.yml index 4417ebce60989..c44d3cff09e96 100644 --- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-daily-perf-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-daily-perf-pipeline.yml @@ -83,7 +83,7 @@ jobs: - name: parser ${{ if eq(parameters.UseTensorrtOssParser, true) }}: - value: --use_tensorrt_oss_parser $(parameters.UseTensorrtOssParser) }} + value: --use_tensorrt_oss_parser $(parameters.UseTensorrtOssParser) steps: - ${{ if and(eq(parameters.TrtVersion, 'BIN'), eq(parameters.UseTensorrtOssParser, false)) }}: From 090e43bdd08dfe29f2346f47ee25370a36bf0fdc Mon Sep 17 00:00:00 2001 From: Ankit Maheshkar Date: Thu, 27 Feb 2025 21:11:06 +0530 Subject: [PATCH 3/4] [OVEP] Update support for Contrib Ops (#23789) ### Description This PR enables Contrib Ops support in OVEP namely below - DynamicQuantizeMatMul, FusedMatMul, QuickGelu, SkipSimplifiedLayerNormalization Co-authored-by: n1harika --- onnxruntime/core/providers/openvino/ov_versions/data_ops.cc | 4 ++++ onnxruntime/test/contrib_ops/dynamic_quantize_matmul_test.cc | 2 +- onnxruntime/test/contrib_ops/embed_layer_norm_op_test.cc | 4 ---- onnxruntime/test/contrib_ops/fused_matmul_op_test.cc | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index 548fe6b156329..f7326642a5544 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -121,6 +121,7 @@ std::vector supported_op_mode = { {"DepthToSpace", V_2020_4, {"CPU", "GPU"}}, {"DequantizeLinear", V_2021_4, {"CPU", "GPU"}}, {"DequantizeLinear", V_2024_4, {"NPU"}}, + {"DynamicQuantizeMatMul", V_2025_0, {"CPU", "GPU"}}, {"Div", V_2020_4, {"CPU", "GPU"}}, {"Dropout", V_2020_4, {"CPU", "GPU"}}, {"Elu", V_2020_4, {"CPU", "GPU"}}, @@ -136,6 +137,7 @@ std::vector supported_op_mode = { {"Floor", V_2020_4, {"CPU", "GPU"}}, {"FusedConv", V_2023_0, {"CPU", "GPU"}}, {"FusedGemm", V_2023_0, {"CPU", "GPU"}}, + {"FusedMatMul", V_2025_0, {"CPU", "GPU"}}, {"Gather", V_2020_4, {"CPU", "GPU"}}, {"GatherElements", V_2022_2, {"CPU", "GPU"}}, {"GatherND", V_2021_4, {"CPU", "GPU"}}, @@ -190,6 +192,7 @@ std::vector supported_op_mode = { {"PRelu", V_2020_4, {"CPU", "GPU"}}, {"QLinearMatMul", V_2022_3, {"CPU"}}, {"QuantizeLinear", V_2021_4, {"CPU", "GPU"}}, + {"QuickGelu", V_2025_0, {"CPU", "GPU"}}, {"RNN", V_2023_1, {"CPU", "GPU"}}, {"RandomNormalLike", V_2023_0, {"CPU", "GPU"}}, {"RandomNormalLike", V_2023_0, {"CPU", "GPU"}}, @@ -229,6 +232,7 @@ std::vector supported_op_mode = { {"Sinh", V_2020_4, {"CPU"}}, {"Size", V_2022_1, {"CPU", "GPU"}}, {"SkipLayerNormalization", V_2024_5, {"CPU", "GPU"}}, + {"SkipSimplifiedLayerNormalization", V_2025_0, {"CPU", "GPU"}}, {"Slice", V_2020_4, {"CPU", "GPU"}}, {"Softmax", V_2020_4, {"CPU", "GPU"}}, {"Softplus", V_2022_1, {"CPU", "GPU"}}, diff --git a/onnxruntime/test/contrib_ops/dynamic_quantize_matmul_test.cc b/onnxruntime/test/contrib_ops/dynamic_quantize_matmul_test.cc index 0b64ea3de8ded..c9a7116bf8052 100644 --- a/onnxruntime/test/contrib_ops/dynamic_quantize_matmul_test.cc +++ b/onnxruntime/test/contrib_ops/dynamic_quantize_matmul_test.cc @@ -141,7 +141,7 @@ void TestDynamicQuantizeMatMul(bool is_matrix_b_constant, per_column, has_zp, has_bias); test.AddOutput("Y", Y_dims, Y_data); test.SetOutputRelErr("Y", 0.02f); - test.Run(); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); } template diff --git a/onnxruntime/test/contrib_ops/embed_layer_norm_op_test.cc b/onnxruntime/test/contrib_ops/embed_layer_norm_op_test.cc index 043717a9f6e92..9ecbb04ebccca 100644 --- a/onnxruntime/test/contrib_ops/embed_layer_norm_op_test.cc +++ b/onnxruntime/test/contrib_ops/embed_layer_norm_op_test.cc @@ -158,11 +158,7 @@ static void RunTest(const embedlayernorm::OpData& data, execution_providers.push_back(DefaultDmlExecutionProvider()); tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); } else { -#if defined(USE_OPENVINO) tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); -#else - tester.Run(); -#endif } } } diff --git a/onnxruntime/test/contrib_ops/fused_matmul_op_test.cc b/onnxruntime/test/contrib_ops/fused_matmul_op_test.cc index db5ce1742e37c..b1762d16795d1 100644 --- a/onnxruntime/test/contrib_ops/fused_matmul_op_test.cc +++ b/onnxruntime/test/contrib_ops/fused_matmul_op_test.cc @@ -212,8 +212,8 @@ void RunFusedMatMulTest(const char* op_name, int32_t opset_version = 7, bool tra test.AddOutput("Y", t.expected_dims, t.expected_vals); - // Disable TensorRT because of unsupported data type - test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); + // Disable OpenVINO, TensorRT because of unsupported data type + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); } } From 899ee83c1f68ed8a7d338ed1ff89796668d0566b Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Thu, 27 Feb 2025 07:43:39 -0800 Subject: [PATCH 4/4] Update onnxruntime_external_deps.cmake: add missing EXCLUDE_FROM_ALL (#23829) ### Description To resolve #23821 ### Motivation and Context Similar to #23641 . --- cmake/external/onnxruntime_external_deps.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index 1b1e11c9772f9..ebf20ab21bbd2 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -301,6 +301,7 @@ if(NOT TARGET Boost::mp11) onnxruntime_fetchcontent_declare( mp11 URL ${DEP_URL_mp11} + EXCLUDE_FROM_ALL FIND_PACKAGE_ARGS NAMES Boost ) onnxruntime_fetchcontent_makeavailable(mp11)