diff --git a/CHANGELOG.md b/CHANGELOG.md index e15d41f23c..6f57941c65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file. ## [Unreleased] ### Added - Device descriptors "max_compute_units", "max_work_item_dimensions", "max_work_item_sizes", "max_work_group_size", "max_num_sub_groups" and "aspects" for int64 atomics inside dpctl C API and inside the dpctl.SyclDevice class. +- MemoryUSM* classes moved to `dpctl.memory` module, added support for aligned allocation, added support for `prefetch` and `mem_advise` (sychronous) methods, implemented `copy_to_host`, `copy_from_host` and `copy_from_device` methods, pickling support, and zero-copy interoperability with Python objects which implement `__sycl_usm_array_inerface__` protocol. ### Removed - The Legacy OpenCL interface. diff --git a/backends/include/dppl_sycl_device_interface.h b/backends/include/dppl_sycl_device_interface.h index f677c642fc..1a5a43ab84 100644 --- a/backends/include/dppl_sycl_device_interface.h +++ b/backends/include/dppl_sycl_device_interface.h @@ -203,4 +203,15 @@ DPPLDevice_GetVendorName (__dppl_keep const DPPLSyclDeviceRef DRef); DPPL_API bool DPPLDevice_IsHostUnifiedMemory (__dppl_keep const DPPLSyclDeviceRef DRef); +/*! + * @brief Checks if two DPPLSyclDeviceRef objects point to the same + * sycl::device. + * + * @param DevRef1 First opaque pointer to the sycl device. + * @param DevRef2 Second opaque pointer to the sycl device. + * @return True if the underlying sycl::device are same, false otherwise. + */ +DPPL_API +bool DPPLDevice_AreEq (__dppl_keep const DPPLSyclDeviceRef DevRef1, + __dppl_keep const DPPLSyclDeviceRef DevRef2); DPPL_C_EXTERN_C_END diff --git a/backends/include/dppl_sycl_queue_interface.h b/backends/include/dppl_sycl_queue_interface.h index 5ba2011907..2272858181 100644 --- a/backends/include/dppl_sycl_queue_interface.h +++ b/backends/include/dppl_sycl_queue_interface.h @@ -199,4 +199,30 @@ DPPL_API void DPPLQueue_Memcpy (__dppl_keep const DPPLSyclQueueRef QRef, void *Dest, const void *Src, size_t Count); +/*! + * @brief C-API wrapper for sycl::queue::prefetch, the function waits on an event + * till the prefetch operation completes. + * + * @param QRef An opaque pointer to the sycl queue. + * @param Ptr An USM pointer to memory. + * @param Count A number of bytes to prefetch. + */ +DPPL_API +void DPPLQueue_Prefetch (__dppl_keep DPPLSyclQueueRef QRef, + const void *Ptr, size_t Count); + +/*! + * @brief C-API wrapper for sycl::queue::mem_advise, the function waits on an event + * till the operation completes. + * + * @param QRef An opaque pointer to the sycl queue. + * @param Ptr An USM pointer to memory. + * @param Count A number of bytes to prefetch. + * @param Advice Device-defined advice for the specified allocation. + * A value of 0 reverts the advice for Ptr to the default behavior. + */ +DPPL_API +void DPPLQueue_MemAdvise (__dppl_keep DPPLSyclQueueRef QRef, + const void *Ptr, size_t Count, int Advice); + DPPL_C_EXTERN_C_END diff --git a/backends/include/dppl_sycl_queue_manager.h b/backends/include/dppl_sycl_queue_manager.h index 73822146d4..4f072d9c03 100644 --- a/backends/include/dppl_sycl_queue_manager.h +++ b/backends/include/dppl_sycl_queue_manager.h @@ -158,4 +158,25 @@ DPPLQueueMgr_PushQueue (DPPLSyclBackendType BETy, DPPL_API void DPPLQueueMgr_PopQueue (); + +/*! + * @brief Creates a new instance of SYCL queue from SYCL context and + * SYCL device. + * + * The instance is not placed into queue manager. The user assumes + * ownership of the queue reference and should deallocate it using + * DPPLQueue_Delete. + * + * @param CRef Sycl context reference + * @param DRef Sycl device reference + * + * @return A copy of the sycl::queue created from given context and device + * references. + */ +DPPL_API +__dppl_give DPPLSyclQueueRef +DPPLQueueMgr_GetQueueFromContextAndDevice(__dppl_keep DPPLSyclContextRef CRef, + __dppl_keep DPPLSyclDeviceRef DRef); + + DPPL_C_EXTERN_C_END diff --git a/backends/include/dppl_sycl_usm_interface.h b/backends/include/dppl_sycl_usm_interface.h index 6b771d7c2d..608a0da020 100644 --- a/backends/include/dppl_sycl_usm_interface.h +++ b/backends/include/dppl_sycl_usm_interface.h @@ -34,35 +34,94 @@ DPPL_C_EXTERN_C_BEGIN /*! - * @brief Crete USM shared memory. + * @brief Create USM shared memory. * - * @return The pointer to USM shared memory. + * @param size Number of bytes to allocate + * @param QRef Sycl queue reference to use in allocation + * + * @return The pointer to USM shared memory. On failure, returns nullptr. */ DPPL_API __dppl_give DPPLSyclUSMRef DPPLmalloc_shared (size_t size, __dppl_keep const DPPLSyclQueueRef QRef); /*! - * @brief Crete USM host memory. + * @brief Create USM shared memory. + * + * @param alignment Allocation's byte alignment + * @param size Number of bytes to allocate + * @param QRef Sycl queue reference to use in allocation * - * @return The pointer to USM host memory. + * @return The pointer to USM shared memory with the requested alignment. + * On failure, returns nullptr. + */ +DPPL_API +__dppl_give DPPLSyclUSMRef +DPPLaligned_alloc_shared (size_t alignment, size_t size, + __dppl_keep const DPPLSyclQueueRef QRef); + +/*! + * @brief Create USM host memory. + * + * @param size Number of bytes to allocate + * @param QRef Sycl queue reference to use in allocation + * + * @return The pointer to USM host memory. On failure, returns nullptr. */ DPPL_API __dppl_give DPPLSyclUSMRef DPPLmalloc_host (size_t size, __dppl_keep const DPPLSyclQueueRef QRef); /*! - * @brief Crete USM device memory. + * @brief Create USM host memory. + * + * @param alignment Allocation's byte alignment + * @param size Number of bytes to allocate + * @param QRef Sycl queue reference to use in allocation * - * @return The pointer to USM device memory. + * @return The pointer to USM host memory with the requested alignment. + * On failure, returns nullptr. + */ +DPPL_API +__dppl_give DPPLSyclUSMRef +DPPLaligned_alloc_host (size_t alignment, size_t size, + __dppl_keep const DPPLSyclQueueRef QRef); + +/*! + * @brief Create USM device memory. + * + * @param size Number of bytes to allocate + * @param QRef Sycl queue reference to use in allocation + * + * @return The pointer to USM device memory. On failure, returns nullptr. */ DPPL_API __dppl_give DPPLSyclUSMRef DPPLmalloc_device (size_t size, __dppl_keep const DPPLSyclQueueRef QRef); +/*! + * @brief Create USM device memory. + * + * @param alignment Allocation's byte alignment + * @param size Number of bytes to allocate + * @param QRef Sycl queue reference to use in allocation + * + * @return The pointer to USM device memory with requested alignment. + * On failure, returns nullptr. + */ +DPPL_API +__dppl_give DPPLSyclUSMRef +DPPLaligned_alloc_device (size_t alignment, size_t size, + __dppl_keep const DPPLSyclQueueRef QRef); + /*! * @brief Free USM memory. * + * @param MRef USM pointer to free + * @param QRef Sycl queue reference to use. + * + * USM pointer must have been allocated using the same context as the one + * used to construct the queue. */ DPPL_API void DPPLfree_with_queue (__dppl_take DPPLSyclUSMRef MRef, @@ -79,6 +138,9 @@ void DPPLfree_with_context (__dppl_take DPPLSyclUSMRef MRef, /*! * @brief Get pointer type. * + * @param MRef USM Memory + * @param CRef Sycl context reference associated with the pointer + * * @return "host", "device", "shared" or "unknown" */ DPPL_API @@ -86,4 +148,16 @@ const char * DPPLUSM_GetPointerType (__dppl_keep const DPPLSyclUSMRef MRef, __dppl_keep const DPPLSyclContextRef CRef); +/*! + * @brief Get the device associated with USM pointer. + * + * @param MRef USM pointer + * @param CRef Sycl context reference associated with the pointer + * + * @return A DPPLSyclDeviceRef pointer to the sycl device. + */ +DPPL_API +DPPLSyclDeviceRef +DPPLUSM_GetPointerDevice (__dppl_keep const DPPLSyclUSMRef MRef, + __dppl_keep const DPPLSyclContextRef CRef); DPPL_C_EXTERN_C_END diff --git a/backends/source/dppl_sycl_device_interface.cpp b/backends/source/dppl_sycl_device_interface.cpp index 0dbf2affe1..8c14bfd38e 100644 --- a/backends/source/dppl_sycl_device_interface.cpp +++ b/backends/source/dppl_sycl_device_interface.cpp @@ -261,3 +261,12 @@ bool DPPLDevice_IsHostUnifiedMemory (__dppl_keep const DPPLSyclDeviceRef DRef) } return false; } + +bool DPPLDevice_AreEq(__dppl_keep const DPPLSyclDeviceRef DevRef1, + __dppl_keep const DPPLSyclDeviceRef DevRef2) +{ + if(!(DevRef1 && DevRef2)) + // \todo handle error + return false; + return (*unwrap(DevRef1) == *unwrap(DevRef2)); +} diff --git a/backends/source/dppl_sycl_platform_interface.cpp b/backends/source/dppl_sycl_platform_interface.cpp index 2aa0af7ed4..1db0987145 100644 --- a/backends/source/dppl_sycl_platform_interface.cpp +++ b/backends/source/dppl_sycl_platform_interface.cpp @@ -41,7 +41,7 @@ get_set_of_non_hostbackends () { std::set be_set; for (auto p : platform::get_platforms()) { - if(p.is_host()) + if(p.is_host()) continue; auto be = p.get_backend(); switch (be) @@ -155,12 +155,12 @@ void DPPLPlatform_DumpInfo () */ size_t DPPLPlatform_GetNumNonHostPlatforms () { - auto nNonHostPlatforms = 0ul; - for (auto &p : platform::get_platforms()) { - if (p.is_host()) - continue; - ++nNonHostPlatforms; - } + auto nNonHostPlatforms = 0ul; + for (auto &p : platform::get_platforms()) { + if (p.is_host()) + continue; + ++nNonHostPlatforms; + } return nNonHostPlatforms; } diff --git a/backends/source/dppl_sycl_queue_interface.cpp b/backends/source/dppl_sycl_queue_interface.cpp index 0231df8cf8..7e66b9eb8b 100644 --- a/backends/source/dppl_sycl_queue_interface.cpp +++ b/backends/source/dppl_sycl_queue_interface.cpp @@ -290,10 +290,28 @@ DPPLQueue_Wait (__dppl_keep DPPLSyclQueueRef QRef) SyclQueue->wait(); } -void DPPLQueue_Memcpy (__dppl_take const DPPLSyclQueueRef QRef, +void DPPLQueue_Memcpy (__dppl_keep const DPPLSyclQueueRef QRef, void *Dest, const void *Src, size_t Count) { auto Q = unwrap(QRef); auto event = Q->memcpy(Dest, Src, Count); event.wait(); } + +void +DPPLQueue_Prefetch (__dppl_keep DPPLSyclQueueRef QRef, + const void *Ptr, size_t Count) +{ + auto Q = unwrap(QRef); + auto event = Q->prefetch(Ptr, Count); + event.wait(); +} + +void +DPPLQueue_MemAdvise (__dppl_keep DPPLSyclQueueRef QRef, + const void *Ptr, size_t Count, int Advice) +{ + auto Q = unwrap(QRef); + auto event = Q->mem_advise(Ptr, Count, static_cast(Advice)); + event.wait(); +} diff --git a/backends/source/dppl_sycl_queue_manager.cpp b/backends/source/dppl_sycl_queue_manager.cpp index f708b4aea2..805ce5a8e2 100644 --- a/backends/source/dppl_sycl_queue_manager.cpp +++ b/backends/source/dppl_sycl_queue_manager.cpp @@ -40,6 +40,8 @@ namespace // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(queue, DPPLSyclQueueRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(device, DPPLSyclDeviceRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(context, DPPLSyclContextRef) /*! * @brief A helper class to support the DPPLSyclQueuemanager. @@ -99,7 +101,7 @@ class QMgrHelper { QVec *active_queues; try { - auto def_device = std::move(default_selector().select_device()); + auto def_device { default_selector().select_device() }; auto BE = def_device.get_platform().get_backend(); auto DevTy = def_device.get_info(); @@ -534,3 +536,17 @@ void DPPLQueueMgr_PopQueue () { QMgrHelper::popSyclQueue(); } + +/*! + * The function constructs a new SYCL queue instance from SYCL conext and + * SYCL device. + */ +DPPLSyclQueueRef +DPPLQueueMgr_GetQueueFromContextAndDevice (__dppl_keep DPPLSyclContextRef CRef, + __dppl_keep DPPLSyclDeviceRef DRef) +{ + auto dev = unwrap(DRef); + auto ctx = unwrap(CRef); + + return wrap(new queue(*ctx, *dev)); +} diff --git a/backends/source/dppl_sycl_usm_interface.cpp b/backends/source/dppl_sycl_usm_interface.cpp index 959398f462..dd79a45bb1 100644 --- a/backends/source/dppl_sycl_usm_interface.cpp +++ b/backends/source/dppl_sycl_usm_interface.cpp @@ -25,6 +25,7 @@ //===----------------------------------------------------------------------===// #include "dppl_sycl_usm_interface.h" +#include "dppl_sycl_device_interface.h" #include "Support/CBindingWrapping.h" #include /* SYCL headers */ @@ -35,6 +36,7 @@ namespace { // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(queue, DPPLSyclQueueRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(device, DPPLSyclDeviceRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(context, DPPLSyclContextRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(void, DPPLSyclUSMRef) @@ -48,6 +50,15 @@ DPPLmalloc_shared (size_t size, __dppl_keep const DPPLSyclQueueRef QRef) return wrap(Ptr); } +__dppl_give DPPLSyclUSMRef +DPPLaligned_alloc_shared (size_t alignment, size_t size, + __dppl_keep const DPPLSyclQueueRef QRef) +{ + auto Q = unwrap(QRef); + auto Ptr = aligned_alloc_shared(alignment, size, *Q); + return wrap(Ptr); +} + __dppl_give DPPLSyclUSMRef DPPLmalloc_host (size_t size, __dppl_keep const DPPLSyclQueueRef QRef) { @@ -56,6 +67,15 @@ DPPLmalloc_host (size_t size, __dppl_keep const DPPLSyclQueueRef QRef) return wrap(Ptr); } +__dppl_give DPPLSyclUSMRef +DPPLaligned_alloc_host (size_t alignment, size_t size, + __dppl_keep const DPPLSyclQueueRef QRef) +{ + auto Q = unwrap(QRef); + auto Ptr = aligned_alloc_host(alignment, size, *Q); + return wrap(Ptr); +} + __dppl_give DPPLSyclUSMRef DPPLmalloc_device (size_t size, __dppl_keep const DPPLSyclQueueRef QRef) { @@ -64,6 +84,15 @@ DPPLmalloc_device (size_t size, __dppl_keep const DPPLSyclQueueRef QRef) return wrap(Ptr); } +__dppl_give DPPLSyclUSMRef +DPPLaligned_alloc_device (size_t alignment, size_t size, + __dppl_keep const DPPLSyclQueueRef QRef) +{ + auto Q = unwrap(QRef); + auto Ptr = aligned_alloc_device(alignment, size, *Q); + return wrap(Ptr); +} + void DPPLfree_with_queue (__dppl_take DPPLSyclUSMRef MRef, __dppl_keep const DPPLSyclQueueRef QRef) { @@ -99,3 +128,15 @@ DPPLUSM_GetPointerType (__dppl_keep const DPPLSyclUSMRef MRef, return "unknown"; } } + +DPPLSyclDeviceRef +DPPLUSM_GetPointerDevice (__dppl_keep const DPPLSyclUSMRef MRef, + __dppl_keep const DPPLSyclContextRef CRef) +{ + auto Ptr = unwrap(MRef); + auto C = unwrap(CRef); + + auto Dev = get_pointer_device(Ptr, *C); + + return wrap(new device(Dev)); +} diff --git a/backends/tests/CMakeLists.txt b/backends/tests/CMakeLists.txt index d0efc5ebfe..0e24753dc4 100644 --- a/backends/tests/CMakeLists.txt +++ b/backends/tests/CMakeLists.txt @@ -29,6 +29,7 @@ else() test_sycl_program_interface test_sycl_queue_interface test_sycl_queue_manager + test_sycl_usm_interface ) # Copy the spir-v input files to test build directory diff --git a/backends/tests/test_sycl_queue_manager.cpp b/backends/tests/test_sycl_queue_manager.cpp index 55f8cb725d..600c78e8e7 100644 --- a/backends/tests/test_sycl_queue_manager.cpp +++ b/backends/tests/test_sycl_queue_manager.cpp @@ -244,6 +244,23 @@ TEST_F (TestDPPLSyclQueueManager, CheckIsCurrentQueue2) DPPLQueueMgr_PopQueue(); } +TEST_F (TestDPPLSyclQueueManager, CreateQueueFromDeviceAndContext) +{ + auto Q = DPPLQueueMgr_GetCurrentQueue(); + auto D = DPPLQueue_GetDevice(Q); + auto C = DPPLQueue_GetContext(Q); + + auto Q2 = DPPLQueueMgr_GetQueueFromContextAndDevice(C, D); + auto D2 = DPPLQueue_GetDevice(Q2); + auto C2 = DPPLQueue_GetContext(Q2); + + EXPECT_TRUE(DPPLDevice_AreEq(D, D2)); + EXPECT_TRUE(DPPLContext_AreEq(C, C2)); + + DPPLQueue_Delete(Q2); + DPPLQueue_Delete(Q); +} + int main (int argc, char** argv) { diff --git a/backends/tests/test_sycl_usm_interface.cpp b/backends/tests/test_sycl_usm_interface.cpp new file mode 100644 index 0000000000..d07157029f --- /dev/null +++ b/backends/tests/test_sycl_usm_interface.cpp @@ -0,0 +1,192 @@ +//===-------- test_sycl_usm_interface.cpp - dpctl-C_API ---*--- C++ --*--===// +// +// Data Parallel Control Library (dpCtl) +// +// Copyright 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file has unit test cases for functions defined in +/// dppl_sycl_usm_interface.h. +/// +//===----------------------------------------------------------------------===// + +#include "dppl_sycl_context_interface.h" +#include "dppl_sycl_device_interface.h" +#include "dppl_sycl_event_interface.h" +#include "dppl_sycl_queue_interface.h" +#include "dppl_sycl_queue_manager.h" +#include "dppl_sycl_usm_interface.h" +#include "Support/CBindingWrapping.h" +#include +#include + +using namespace cl::sycl; + +namespace +{ +constexpr size_t SIZE = 1024; + +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(void, DPPLSyclUSMRef); + +bool has_devices () +{ + bool ret = false; + for (auto &p : platform::get_platforms()) { + if (p.is_host()) + continue; + if(!p.get_devices().empty()) { + ret = true; + break; + } + } + return ret; +} + +void +common_test_body (size_t nbytes, const DPPLSyclUSMRef Ptr, + const DPPLSyclQueueRef Q, const char *expected) +{ + auto Ctx = DPPLQueue_GetContext(Q); + + auto kind = DPPLUSM_GetPointerType(Ptr, Ctx); + EXPECT_TRUE(0 == std::strncmp(kind, expected, 4)); + + auto Dev = DPPLUSM_GetPointerDevice(Ptr, Ctx); + auto QueueDev = DPPLQueue_GetDevice(Q); + EXPECT_TRUE(DPPLDevice_AreEq(Dev, QueueDev)); + + EXPECT_NO_FATAL_FAILURE(DPPLQueue_Prefetch(Q, Ptr, nbytes)); + + DPPLDevice_Delete(QueueDev); + DPPLDevice_Delete(Dev); + DPPLContext_Delete(Ctx); +} + +} // end of namespace + +struct TestDPPLSyclUSMInterface : public ::testing::Test +{ + + TestDPPLSyclUSMInterface () + { } + + ~TestDPPLSyclUSMInterface () + { } +}; + +TEST_F (TestDPPLSyclUSMInterface, MallocShared) +{ + if (!has_devices()) + GTEST_SKIP_("Skipping: No Sycl Devices.\n"); + + auto Q = DPPLQueueMgr_GetCurrentQueue(); + const size_t nbytes = 1024; + + auto Ptr = DPPLmalloc_shared(nbytes, Q); + EXPECT_TRUE(bool(Ptr)); + + common_test_body(nbytes, Ptr, Q, "shared"); + DPPLfree_with_queue(Ptr, Q); + DPPLQueue_Delete(Q); +} + +TEST_F (TestDPPLSyclUSMInterface, MallocDevice) +{ + if (!has_devices()) + GTEST_SKIP_("Skipping: No Sycl Devices.\n"); + + auto Q = DPPLQueueMgr_GetCurrentQueue(); + const size_t nbytes = 1024; + + auto Ptr = DPPLmalloc_device(nbytes, Q); + EXPECT_TRUE(bool(Ptr)); + + common_test_body(nbytes, Ptr, Q, "device"); + DPPLfree_with_queue(Ptr, Q); + DPPLQueue_Delete(Q); +} + +TEST_F (TestDPPLSyclUSMInterface, MallocHost) +{ + if (!has_devices()) + GTEST_SKIP_("Skipping: No Sycl Devices.\n"); + + auto Q = DPPLQueueMgr_GetCurrentQueue(); + const size_t nbytes = 1024; + + auto Ptr = DPPLmalloc_host(nbytes, Q); + EXPECT_TRUE(bool(Ptr)); + + common_test_body(nbytes, Ptr, Q, "host"); + DPPLfree_with_queue(Ptr, Q); + DPPLQueue_Delete(Q); +} + +TEST_F (TestDPPLSyclUSMInterface, AlignedAllocShared) +{ + if (!has_devices()) + GTEST_SKIP_("Skipping: No Sycl Devices.\n"); + + auto Q = DPPLQueueMgr_GetCurrentQueue(); + const size_t nbytes = 1024; + + auto Ptr = DPPLaligned_alloc_shared(64, nbytes, Q); + EXPECT_TRUE(bool(Ptr)); + + common_test_body(nbytes, Ptr, Q, "shared"); + DPPLfree_with_queue(Ptr, Q); + DPPLQueue_Delete(Q); +} + +TEST_F (TestDPPLSyclUSMInterface, AlignedAllocDevice) +{ + if (!has_devices()) + GTEST_SKIP_("Skipping: No Sycl Devices.\n"); + + auto Q = DPPLQueueMgr_GetCurrentQueue(); + const size_t nbytes = 1024; + + auto Ptr = DPPLaligned_alloc_device(64, nbytes, Q); + EXPECT_TRUE(bool(Ptr)); + + common_test_body(nbytes, Ptr, Q, "device"); + DPPLfree_with_queue(Ptr, Q); + DPPLQueue_Delete(Q); +} + +TEST_F (TestDPPLSyclUSMInterface, AlignedAllocHost) +{ + if (!has_devices()) + GTEST_SKIP_("Skipping: No Sycl Devices.\n"); + + auto Q = DPPLQueueMgr_GetCurrentQueue(); + const size_t nbytes = 1024; + + auto Ptr = DPPLaligned_alloc_host(64, nbytes, Q); + EXPECT_TRUE(bool(Ptr)); + + common_test_body(nbytes, Ptr, Q, "host"); + DPPLfree_with_queue(Ptr, Q); +} + +int +main (int argc, char** argv) +{ + ::testing::InitGoogleTest(&argc, argv); + int ret = RUN_ALL_TESTS(); + return ret; +} diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat index 1d811447a8..f6868a945b 100644 --- a/conda-recipe/bld.bat +++ b/conda-recipe/bld.bat @@ -1,5 +1,5 @@ call "%ONEAPI_ROOT%compiler\latest\env\vars.bat" -IF ERRORLEVEL 1 exit 1 +IF ERRORLEVEL 1 exit /b 1 REM conda uses %ERRORLEVEL% but FPGA scripts can set it. So it should be reseted. set ERRORLEVEL= @@ -10,7 +10,7 @@ rmdir /S /Q build_cmake mkdir build_cmake cd build_cmake -set "DPCPP_ROOT=%ONEAPI_ROOT%\compiler\latest\windows" +set "DPCPP_ROOT=%ONEAPI_ROOT%compiler\latest\windows" set "INSTALL_PREFIX=%cd%\..\install" rmdir /S /Q "%INSTALL_PREFIX%" @@ -21,11 +21,11 @@ cmake -G Ninja ^ "-DCMAKE_PREFIX_PATH=%LIBRARY_PREFIX%" ^ "-DDPCPP_ROOT=%DPCPP_ROOT%" ^ "%SRC_DIR%/backends" -IF %ERRORLEVEL% NEQ 0 exit 1 +IF %ERRORLEVEL% NEQ 0 exit /b 1 ninja -n ninja install -IF %ERRORLEVEL% NEQ 0 exit 1 +IF %ERRORLEVEL% NEQ 0 exit /b 1 cd .. xcopy install\lib\*.lib dpctl /E /Y @@ -41,4 +41,4 @@ set "DPPL_SYCL_INTERFACE_INCLDIR=dpctl\include" "%PYTHON%" setup.py clean --all "%PYTHON%" setup.py build install -IF %ERRORLEVEL% NEQ 0 exit 1 +IF %ERRORLEVEL% NEQ 0 exit /b 1 diff --git a/dpctl/__init__.pxd b/dpctl/__init__.pxd index 719ca53546..89f3dbe551 100644 --- a/dpctl/__init__.pxd +++ b/dpctl/__init__.pxd @@ -1,4 +1,4 @@ -##===------------- sycl_core.pxd - dpctl module --------*- Cython -*-------===## +##===------------- __init__.pxd - dpctl module --------*- Cython -*-------===## ## ## Data Parallel Control (dpCtl) ## @@ -28,3 +28,5 @@ # cython: language_level=3 from dpctl._sycl_core cimport * +from dpctl._memory import * + diff --git a/dpctl/__init__.py b/dpctl/__init__.py index af9aa93076..f425e4e6e9 100644 --- a/dpctl/__init__.py +++ b/dpctl/__init__.py @@ -1,4 +1,4 @@ -##===----------------- _memory.pyx - dpctl module -------*- Cython -*------===## +##===----------------- __init__.py - dpctl module -------*- Cython -*------===## ## ## Data Parallel Control (dpCtl) ## diff --git a/dpctl/_backend.pxd b/dpctl/_backend.pxd index 584ae79fd8..e354ae4187 100644 --- a/dpctl/_backend.pxd +++ b/dpctl/_backend.pxd @@ -186,6 +186,10 @@ cdef extern from "dppl_sycl_queue_interface.h": cdef void DPPLQueue_Wait (const DPPLSyclQueueRef QRef) cdef void DPPLQueue_Memcpy (const DPPLSyclQueueRef Q, void *Dest, const void *Src, size_t Count) + cdef void DPPLQueue_Prefetch (const DPPLSyclQueueRef Q, + const void *Src, size_t Count) + cdef void DPPLQueue_MemAdvise (const DPPLSyclQueueRef Q, + const void *Src, size_t Count, int Advice) cdef extern from "dppl_sycl_queue_manager.h": @@ -206,15 +210,26 @@ cdef extern from "dppl_sycl_queue_manager.h": DPPLSyclDeviceType DeviceTy, size_t DNum ) + cdef DPPLSyclQueueRef DPPLQueueMgr_GetQueueFromContextAndDevice( + DPPLSyclContextRef CRef, + DPPLSyclDeviceRef DRef) cdef extern from "dppl_sycl_usm_interface.h": cdef DPPLSyclUSMRef DPPLmalloc_shared (size_t size, DPPLSyclQueueRef QRef) cdef DPPLSyclUSMRef DPPLmalloc_host (size_t size, DPPLSyclQueueRef QRef) cdef DPPLSyclUSMRef DPPLmalloc_device (size_t size, DPPLSyclQueueRef QRef) + cdef DPPLSyclUSMRef DPPLaligned_alloc_shared (size_t alignment, + size_t size, DPPLSyclQueueRef QRef) + cdef DPPLSyclUSMRef DPPLaligned_alloc_host (size_t alignment, + size_t size, DPPLSyclQueueRef QRef) + cdef DPPLSyclUSMRef DPPLaligned_alloc_device (size_t alignment, + size_t size, DPPLSyclQueueRef QRef) cdef void DPPLfree_with_queue (DPPLSyclUSMRef MRef, DPPLSyclQueueRef QRef) cdef void DPPLfree_with_context (DPPLSyclUSMRef MRef, DPPLSyclContextRef CRef) cdef const char* DPPLUSM_GetPointerType (DPPLSyclUSMRef MRef, DPPLSyclContextRef CRef) + cdef DPPLSyclDeviceRef DPPLUSM_GetPointerDevice (DPPLSyclUSMRef MRef, + DPPLSyclContextRef CRef) diff --git a/dpctl/_memory.pyx b/dpctl/_memory.pyx deleted file mode 100644 index 96259b0451..0000000000 --- a/dpctl/_memory.pyx +++ /dev/null @@ -1,156 +0,0 @@ -##===--------------- _memory.pyx - dpctl module --------*- Cython -*-------===## -## -## Data Parallel Control (dpCtl) -## -## Copyright 2020 Intel Corporation -## -## Licensed under the Apache License, Version 2.0 (the "License"); -## you may not use this file except in compliance with the License. -## You may obtain a copy of the License at -## -## http://www.apache.org/licenses/LICENSE-2.0 -## -## Unless required by applicable law or agreed to in writing, software -## distributed under the License is distributed on an "AS IS" BASIS, -## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -## See the License for the specific language governing permissions and -## limitations under the License. -## -##===----------------------------------------------------------------------===## -## -## \file -## This file implements Python buffer protocol using Sycl USM shared and host -## allocators. The USM device allocator is also exposed through this module for -## use in other Python modules. -## -##===----------------------------------------------------------------------===## - -# distutils: language = c++ -# cython: language_level=3 - -import dpctl -from dpctl._backend cimport * -from ._sycl_core cimport SyclContext, SyclQueue - -from cpython cimport Py_buffer - - -cdef class Memory: - - cdef _cinit(self, Py_ssize_t nbytes, ptr_type, SyclQueue queue): - cdef DPPLSyclUSMRef p - - self.memory_ptr = NULL - self.nbytes = 0 - self.queue = None - - if (nbytes > 0): - if queue is None: - queue = dpctl.get_current_queue() - - if (ptr_type == "shared"): - p = DPPLmalloc_shared(nbytes, queue.get_queue_ref()) - elif (ptr_type == "host"): - p = DPPLmalloc_host(nbytes, queue.get_queue_ref()) - elif (ptr_type == "device"): - p = DPPLmalloc_device(nbytes, queue.get_queue_ref()) - else: - raise RuntimeError("Pointer type is unknown: {}" \ - .format(ptr_type)) - - if (p): - self.memory_ptr = p - self.nbytes = nbytes - self.queue = queue - else: - raise RuntimeError("Null memory pointer returned") - else: - raise ValueError("Non-positive number of bytes found.") - - def __dealloc__(self): - if (self.memory_ptr): - DPPLfree_with_queue(self.memory_ptr, - self.queue.get_queue_ref()) - self.memory_ptr = NULL - self.nbytes = 0 - self.queue = None - - cdef _getbuffer(self, Py_buffer *buffer, int flags): - # memory_ptr is Ref which is pointer to SYCL type. For USM it is void*. - buffer.buf = self.memory_ptr - buffer.format = 'B' # byte - buffer.internal = NULL # see References - buffer.itemsize = 1 - buffer.len = self.nbytes - buffer.ndim = 1 - buffer.obj = self - buffer.readonly = 0 - buffer.shape = &self.nbytes - buffer.strides = &buffer.itemsize - buffer.suboffsets = NULL # for pointer arrays only - - property nbytes: - def __get__(self): - return self.nbytes - - property _pointer: - def __get__(self): - return (self.memory_ptr) - - property _context: - def __get__(self): - return self.queue.get_sycl_context() - - property _queue: - def __get__(self): - return self.queue - - def __repr__(self): - return "" \ - .format(self.nbytes, hex((self.memory_ptr))) - - def _usm_type(self, syclobj=None): - cdef const char* kind - cdef SyclContext ctx - cdef SyclQueue q - if syclobj is None: - ctx = self._context - kind = DPPLUSM_GetPointerType(self.memory_ptr, - ctx.get_context_ref()) - elif isinstance(syclobj, SyclContext): - ctx = (syclobj) - kind = DPPLUSM_GetPointerType(self.memory_ptr, - ctx.get_context_ref()) - elif isinstance(syclobj, SyclQueue): - q = (syclobj) - ctx = q.get_sycl_context() - kind = DPPLUSM_GetPointerType(self.memory_ptr, - ctx.get_context_ref()) - else: - raise ValueError("syclobj keyword can be either None, " - "or an instance of SyclConext or SyclQueue") - return kind.decode('UTF-8') - - -cdef class MemoryUSMShared(Memory): - - def __cinit__(self, Py_ssize_t nbytes, SyclQueue queue=None): - self._cinit(nbytes, "shared", queue) - - def __getbuffer__(self, Py_buffer *buffer, int flags): - self._getbuffer(buffer, flags) - - -cdef class MemoryUSMHost(Memory): - - def __cinit__(self, Py_ssize_t nbytes, SyclQueue queue=None): - self._cinit(nbytes, "host", queue) - - def __getbuffer__(self, Py_buffer *buffer, int flags): - self._getbuffer(buffer, flags) - - -cdef class MemoryUSMDevice(Memory): - - def __cinit__(self, Py_ssize_t nbytes, SyclQueue queue=None): - self._cinit(nbytes, "device", queue) diff --git a/dpctl/_sycl_core.pxd b/dpctl/_sycl_core.pxd index 5a775de16c..0fe7d677cd 100644 --- a/dpctl/_sycl_core.pxd +++ b/dpctl/_sycl_core.pxd @@ -128,6 +128,8 @@ cdef class SyclQueue: @staticmethod cdef SyclQueue _create (DPPLSyclQueueRef qref) + @staticmethod + cdef SyclQueue _create_from_context_and_device (SyclContext ctx, SyclDevice dev) cpdef bool equals (self, SyclQueue q) cpdef SyclContext get_sycl_context (self) cpdef SyclDevice get_sycl_device (self) @@ -136,7 +138,9 @@ cdef class SyclQueue: list lS=*, list dEvents=*) cpdef void wait (self) cdef DPPLSyclQueueRef get_queue_ref (self) - cpdef memcpy (self, dest, src, int count) + cpdef memcpy (self, dest, src, size_t count) + cpdef prefetch (self, ptr, size_t count=*) + cpdef mem_advise (self, ptr, size_t count, int mem) cpdef SyclQueue get_current_queue() diff --git a/dpctl/_sycl_core.pyx b/dpctl/_sycl_core.pyx index 3da9d08ce2..3fa887d9a4 100644 --- a/dpctl/_sycl_core.pyx +++ b/dpctl/_sycl_core.pyx @@ -30,7 +30,7 @@ from __future__ import print_function from enum import Enum, auto import logging from ._backend cimport * -from ._memory cimport Memory +from .memory._memory cimport _Memory from libc.stdlib cimport malloc, free @@ -373,6 +373,21 @@ cdef class SyclQueue: ret._queue_ref = qref return ret + @staticmethod + cdef SyclQueue _create_from_context_and_device(SyclContext ctx, SyclDevice dev): + cdef SyclQueue ret = SyclQueue.__new__(SyclQueue) + cdef DPPLSyclContextRef cref = ctx.get_context_ref() + cdef DPPLSyclDeviceRef dref = dev.get_device_ref() + cdef DPPLSyclQueueRef qref = DPPLQueueMgr_GetQueueFromContextAndDevice( + cref, dref) + + if qref is NULL: + raise SyclQueueCreationError("Queue creation failed.") + ret._queue_ref = qref + ret._context = ctx + ret._device = dev + return ret + def __dealloc__ (self): DPPLQueue_Delete(self._queue_ref) @@ -430,7 +445,7 @@ cdef class SyclQueue: elif isinstance(arg, ctypes.c_double): kargs[idx] = (ctypes.addressof(arg)) kargty[idx] = _arg_data_type._DOUBLE - elif isinstance(arg, Memory): + elif isinstance(arg, _Memory): kargs[idx]= (arg._pointer) kargty[idx] = _arg_data_type._VOID_PTR else: @@ -601,22 +616,48 @@ cdef class SyclQueue: cpdef void wait (self): DPPLQueue_Wait(self._queue_ref) - cpdef memcpy (self, dest, src, int count): + cpdef memcpy (self, dest, src, size_t count): cdef void *c_dest cdef void *c_src - if isinstance(dest, Memory): - c_dest = (dest).memory_ptr + if isinstance(dest, _Memory): + c_dest = (<_Memory>dest).memory_ptr else: - raise TypeError("Parameter dest should be Memory.") + raise TypeError("Parameter `dest` should have type _Memory.") - if isinstance(src, Memory): - c_src = (src).memory_ptr + if isinstance(src, _Memory): + c_src = (<_Memory>src).memory_ptr else: - raise TypeError("Parameter src should be Memory.") + raise TypeError("Parameter `src` should have type _Memory.") DPPLQueue_Memcpy(self._queue_ref, c_dest, c_src, count) + cpdef prefetch (self, mem, size_t count=0): + cdef void *ptr + + if isinstance(mem, _Memory): + ptr = (<_Memory>mem).memory_ptr + else: + raise TypeError("Parameter `mem` should have type _Memory") + + if (count <=0 or count > self.nbytes): + count = self.nbytes + + DPPLQueue_Prefetch(self._queue_ref, ptr, count) + + cpdef mem_advise (self, mem, size_t count, int advice): + cdef void *ptr + + if isinstance(mem, _Memory): + ptr = (<_Memory>mem).memory_ptr + else: + raise TypeError("Parameter `mem` should have type _Memory") + + if (count <=0 or count > self.nbytes): + count = self.nbytes + + DPPLQueue_MemAdvise(self._queue_ref, ptr, count, advice) + cdef class _SyclRTManager: ''' Wrapper for the C API's sycl queue manager interface. diff --git a/dpctl/_memory.pxd b/dpctl/memory/__init__.pxd similarity index 60% rename from dpctl/_memory.pxd rename to dpctl/memory/__init__.pxd index 2ab5066c8d..1744802cf1 100644 --- a/dpctl/_memory.pxd +++ b/dpctl/memory/__init__.pxd @@ -1,4 +1,4 @@ -##===--------------- _memory.pxd - dpctl module --------*- Cython -*-------===## +##===------------- __init__.pxd - dpctl module --------*- Cython -*-------===## ## ## Data Parallel Control (dpCtl) ## @@ -17,30 +17,14 @@ ## limitations under the License. ## ##===----------------------------------------------------------------------===## +## +## \file +## This file declares the extension types and functions for the Cython API +## implemented in sycl_core.pyx. +## +##===----------------------------------------------------------------------===## # distutils: language = c++ # cython: language_level=3 -from ._backend cimport DPPLSyclUSMRef -from ._sycl_core cimport SyclQueue - - -cdef class Memory: - cdef DPPLSyclUSMRef memory_ptr - cdef Py_ssize_t nbytes - cdef SyclQueue queue - - cdef _cinit(self, Py_ssize_t nbytes, ptr_type, SyclQueue queue) - cdef _getbuffer(self, Py_buffer *buffer, int flags) - - -cdef class MemoryUSMShared(Memory): - pass - - -cdef class MemoryUSMHost(Memory): - pass - - -cdef class MemoryUSMDevice(Memory): - pass +from ._memory cimport * diff --git a/dpctl/memory/__init__.py b/dpctl/memory/__init__.py new file mode 100644 index 0000000000..3d7cd66e6d --- /dev/null +++ b/dpctl/memory/__init__.py @@ -0,0 +1,41 @@ +##===---------- memory/__init__.py - dpctl module -------*- Python -*------===## +## +## Data Parallel Control (dpCtl) +## +## Copyright 2020 Intel Corporation +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +##===----------------------------------------------------------------------===## +## +## \file +## This top-level dpctl module. +## +##===----------------------------------------------------------------------===## +""" + Data Parallel Control Memory + + `dpctl.memory` provides Python objects for untyped USM memory + container of bytes for each kind of USM pointers: shared pointers, + device pointers and host pointers. + + Shared and host pointers are accessible from both host and a device, + while device pointers are only accessible from device. + + Python objects corresponding to shared and host pointers implement + Python simple buffer protocol. It is therefore possible to use these + objects to maniputalate USM memory using NumPy or `bytearray`, + `memoryview`, or `array.array` classes. + +""" +from ._memory import MemoryUSMShared, MemoryUSMDevice, MemoryUSMHost diff --git a/dpctl/memory/_memory.pxd b/dpctl/memory/_memory.pxd new file mode 100644 index 0000000000..b475627800 --- /dev/null +++ b/dpctl/memory/_memory.pxd @@ -0,0 +1,59 @@ +##===--------------- _memory.pxd - dpctl module --------*- Cython -*-------===## +## +## Data Parallel Control (dpCtl) +## +## Copyright 2020 Intel Corporation +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +##===----------------------------------------------------------------------===## + +# distutils: language = c++ +# cython: language_level=3 + +from .._backend cimport DPPLSyclUSMRef +from .._sycl_core cimport SyclQueue, SyclDevice, SyclContext + + +cdef class _Memory: + cdef DPPLSyclUSMRef memory_ptr + cdef Py_ssize_t nbytes + cdef SyclQueue queue + cdef object refobj + + cdef _cinit_empty(self) + cdef _cinit_alloc(self, Py_ssize_t alignment, Py_ssize_t nbytes, + bytes ptr_type, SyclQueue queue) + cdef _cinit_other(self, object other) + cdef _getbuffer(self, Py_buffer *buffer, int flags) + + cpdef copy_to_host(self, object obj=*) + cpdef copy_from_host(self, object obj) + cpdef copy_from_device(self, object obj) + + cpdef bytes tobytes(self) + + @staticmethod + cdef SyclDevice get_pointer_device(DPPLSyclUSMRef p, SyclContext ctx) + + +cdef class MemoryUSMShared(_Memory): + pass + + +cdef class MemoryUSMHost(_Memory): + pass + + +cdef class MemoryUSMDevice(_Memory): + pass diff --git a/dpctl/memory/_memory.pyx b/dpctl/memory/_memory.pyx new file mode 100644 index 0000000000..b4f1848bf4 --- /dev/null +++ b/dpctl/memory/_memory.pyx @@ -0,0 +1,521 @@ +##===--------------- _memory.pyx - dpctl module --------*- Cython -*-------===## +## +## Data Parallel Control (dpCtl) +## +## Copyright 2020 Intel Corporation +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +##===----------------------------------------------------------------------===## +## +## \file +## This file implements Python buffer protocol using Sycl USM shared and host +## allocators. The USM device allocator is also exposed through this module for +## use in other Python modules. +## +##===----------------------------------------------------------------------===## + +# distutils: language = c++ +# cython: language_level=3 + +import dpctl +from dpctl._backend cimport * +from .._sycl_core cimport SyclContext, SyclQueue, SyclDevice +from .._sycl_core cimport get_current_queue + +from cpython cimport Py_buffer +from cpython.bytes cimport PyBytes_AS_STRING, PyBytes_FromStringAndSize + +import numpy as np + +cdef _throw_sycl_usm_ary_iface(): + raise ValueError("__sycl_usm_array_interface__ is malformed") + + +cdef void copy_via_host(void *dest_ptr, SyclQueue dest_queue, + void *src_ptr, SyclQueue src_queue, size_t nbytes): + """ + Copies `nbytes` bytes from `src_ptr` USM memory to + `dest_ptr` USM memory using host as the intemediary. + + This is useful when `src_ptr` and `dest_ptr` are bound to incompatible + SYCL contexts. + """ + # could also have used bytearray(nbytes) + cdef unsigned char[::1] host_buf = np.empty((nbytes,), dtype="|u1") + + DPPLQueue_Memcpy( + src_queue.get_queue_ref(), + &host_buf[0], + src_ptr, + nbytes + ) + + DPPLQueue_Memcpy( + dest_queue.get_queue_ref(), + dest_ptr, + &host_buf[0], + nbytes + ) + + +cdef class _BufferData: + """ + Internal data struct populated from parsing + `__sycl_usm_array_interface__` dictionary + """ + cdef DPPLSyclUSMRef p + cdef int writeable + cdef object dt + cdef Py_ssize_t itemsize + cdef Py_ssize_t nbytes + cdef SyclQueue queue + + @staticmethod + cdef _BufferData from_sycl_usm_ary_iface(dict ary_iface): + cdef object ary_data_tuple = ary_iface.get('data', None) + cdef object ary_typestr = ary_iface.get('typestr', None) + cdef object ary_shape = ary_iface.get('shape', None) + cdef object ary_strides = ary_iface.get('strides', None) + cdef object ary_syclobj = ary_iface.get('syclobj', None) + cdef Py_ssize_t ary_offset = ary_iface.get('offset', 0) + cdef int ary_version = ary_iface.get('version', 0) + cdef object dt + cdef _BufferData buf + cdef Py_ssize_t arr_data_ptr + cdef SyclDevice dev + cdef SyclContext ctx + + if ary_version != 1: + _throw_sycl_usm_ary_iface() + if not ary_data_tuple or len(ary_data_tuple) != 2: + _throw_sycl_usm_ary_iface() + if not ary_shape or len(ary_shape) != 1 or ary_shape[0] < 1: + raise ValueError + try: + dt = np.dtype(ary_typestr) + except TypeError: + _throw_sycl_usm_ary_iface() + if (ary_strides and len(ary_strides) != 1 + and ary_strides[0] != dt.itemsize): + raise ValueError("Must be contiguous") + + if (not ary_syclobj or + not isinstance(ary_syclobj, + (dpctl.SyclQueue, dpctl.SyclContext))): + _throw_sycl_usm_ary_iface() + + buf = _BufferData.__new__(_BufferData) + arr_data_ptr = ary_data_tuple[0] + buf.p = (arr_data_ptr) + buf.writeable = 1 if ary_data_tuple[1] else 0 + buf.itemsize = (dt.itemsize) + buf.nbytes = (ary_shape[0]) * buf.itemsize + + if isinstance(ary_syclobj, dpctl.SyclQueue): + buf.queue = ary_syclobj + else: + # Obtain device from pointer and context + ctx = ary_syclobj + dev = _Memory.get_pointer_device(buf.p, ctx) + # Use context and device to create a queue to + # be able to copy memory + buf.queue = SyclQueue._create_from_context_and_device(ctx, dev) + + return buf + + +def _to_memory(unsigned char [::1] b, str usm_kind): + """ + Constructs Memory of the same size as the argument + and copies data into it""" + cdef _Memory res + + if (usm_kind == "shared"): + res = MemoryUSMShared(len(b)) + elif (usm_kind == "device"): + res = MemoryUSMDevice(len(b)) + elif (usm_kind == "host"): + res = MemoryUSMHost(len(b)) + else: + raise ValueError( + "Unrecognized usm_kind={} stored in the " + "pickle".format(usm_kind)) + res.copy_from_host(b) + + return res + + +cdef class _Memory: + cdef _cinit_empty(self): + self.memory_ptr = NULL + self.nbytes = 0 + self.queue = None + self.refobj = None + + cdef _cinit_alloc(self, Py_ssize_t alignment, Py_ssize_t nbytes, + bytes ptr_type, SyclQueue queue): + cdef DPPLSyclUSMRef p + + self._cinit_empty() + + if (nbytes > 0): + if queue is None: + queue = get_current_queue() + + if (ptr_type == b"shared"): + if alignment > 0: + p = DPPLaligned_alloc_shared(alignment, nbytes, + queue.get_queue_ref()) + else: + p = DPPLmalloc_shared(nbytes, queue.get_queue_ref()) + elif (ptr_type == b"host"): + if alignment > 0: + p = DPPLaligned_alloc_host(alignment, nbytes, + queue.get_queue_ref()) + else: + p = DPPLmalloc_host(nbytes, queue.get_queue_ref()) + elif (ptr_type == b"device"): + if (alignment > 0): + p = DPPLaligned_alloc_device(alignment, nbytes, + queue.get_queue_ref()) + else: + p = DPPLmalloc_device(nbytes, queue.get_queue_ref()) + else: + raise RuntimeError("Pointer type is unknown: {}" \ + .format(ptr_type.decode("UTF-8"))) + + if (p): + self.memory_ptr = p + self.nbytes = nbytes + self.queue = queue + else: + raise RuntimeError("Null memory pointer returned") + else: + raise ValueError("Non-positive number of bytes found.") + + cdef _cinit_other(self, object other): + cdef _Memory other_mem + if isinstance(other, _Memory): + other_mem = <_Memory> other + self.memory_ptr = other_mem.memory_ptr + self.nbytes = other_mem.nbytes + self.queue = other_mem.queue + if other_mem.refobj is None: + self.refobj = other + else: + self.refobj = other_mem.refobj + elif hasattr(other, '__sycl_usm_array_interface__'): + other_iface = other.__sycl_usm_array_interface__ + if isinstance(other_iface, dict): + other_buf = _BufferData.from_sycl_usm_ary_iface(other_iface) + self.memory_ptr = other_buf.p + self.nbytes = other_buf.nbytes + self.queue = other_buf.queue + # self.writeable = other_buf.writeable + self.refobj = other + else: + raise ValueError( + "Argument {} does not correctly expose" + "`__sycl_usm_array_interface__`.".format(other) + ) + else: + raise ValueError( + "Argument {} does not expose " + "`__sycl_usm_array_interface__`.".format(other) + ) + + def __dealloc__(self): + if (self.refobj is None and self.memory_ptr): + DPPLfree_with_queue(self.memory_ptr, + self.queue.get_queue_ref()) + self._cinit_empty() + + cdef _getbuffer(self, Py_buffer *buffer, int flags): + # memory_ptr is Ref which is pointer to SYCL type. For USM it is void*. + cdef SyclContext ctx = self._context + cdef const char * kind = DPPLUSM_GetPointerType( + self.memory_ptr, + ctx.get_context_ref()) + if kind == b'device': + raise ValueError('USM Device memory is not host accessible') + buffer.buf = self.memory_ptr + buffer.format = 'B' # byte + buffer.internal = NULL # see References + buffer.itemsize = 1 + buffer.len = self.nbytes + buffer.ndim = 1 + buffer.obj = self + buffer.readonly = 0 + buffer.shape = &self.nbytes + buffer.strides = &buffer.itemsize + buffer.suboffsets = NULL # for pointer arrays only + + property nbytes: + def __get__(self): + return self.nbytes + + property size: + def __get__(self): + return self.nbytes + + property _pointer: + def __get__(self): + return (self.memory_ptr) + + property _context: + def __get__(self): + return self.queue.get_sycl_context() + + property _queue: + def __get__(self): + return self.queue + + property reference_obj: + def __get__(self): + return self.refobj + + def __repr__(self): + return "" \ + .format(self.nbytes, hex((self.memory_ptr))) + + def __len__(self): + return self.nbytes + + def __sizeof__(self): + return self.nbytes + + def __bytes__(self): + return self.tobytes() + + def __reduce__(self): + return _to_memory, (self.copy_to_host(), self.get_usm_type()) + + property __sycl_usm_array_interface__: + def __get__(self): + cdef dict iface = { + "data": ((self.memory_ptr), + True), # bool(self.writeable)), + "shape": (self.nbytes,), + "strides": None, + "typestr": "|u1", + "version": 1, + "syclobj": self.queue + } + return iface + + def get_usm_type(self, syclobj=None): + cdef const char* kind + cdef SyclContext ctx + cdef SyclQueue q + if syclobj is None: + ctx = self._context + kind = DPPLUSM_GetPointerType(self.memory_ptr, + ctx.get_context_ref()) + elif isinstance(syclobj, SyclContext): + ctx = (syclobj) + kind = DPPLUSM_GetPointerType(self.memory_ptr, + ctx.get_context_ref()) + elif isinstance(syclobj, SyclQueue): + q = (syclobj) + ctx = q.get_sycl_context() + kind = DPPLUSM_GetPointerType(self.memory_ptr, + ctx.get_context_ref()) + else: + raise ValueError("syclobj keyword can be either None, " + "or an instance of SyclContext or SyclQueue") + return kind.decode('UTF-8') + + cpdef copy_to_host(self, obj=None): + """Copy content of instance's memory into memory of + `obj`, or allocate NumPy array of obj is None""" + # Cython does the right thing here + cdef unsigned char[::1] host_buf = obj + + if (host_buf is None): + # Python object did not have buffer interface + # allocate new memory + obj = np.empty((self.nbytes,), dtype="|u1") + host_buf = obj + elif (len(host_buf) < self.nbytes): + raise ValueError("Destination object is too small to " + "accommodate {} bytes".format(self.nbytes)) + # call kernel to copy from + DPPLQueue_Memcpy( + self.queue.get_queue_ref(), + &host_buf[0], # destination + self.memory_ptr, # source + self.nbytes + ) + + return obj + + cpdef copy_from_host(self, object obj): + """Copy content of Python buffer provided by `obj` to instance memory.""" + cdef const unsigned char[::1] host_buf = obj + cdef Py_ssize_t buf_len = len(host_buf) + + if (buf_len > self.nbytes): + raise ValueError("Source object is too large to be " + "accommodated in {} bytes buffer".format(self.nbytes)) + # call kernel to copy from + DPPLQueue_Memcpy( + self.queue.get_queue_ref(), + self.memory_ptr, # destination + &host_buf[0], # source + buf_len + ) + + cpdef copy_from_device(self, object sycl_usm_ary): + """Copy SYCL memory underlying the argument object into + the memory of the instance""" + cdef _BufferData src_buf + cdef const char* kind + + if not hasattr(sycl_usm_ary, '__sycl_usm_array_interface__'): + raise ValueError("Object does not implement " + "`__sycl_usm_array_interface__` protocol") + sycl_usm_ary_iface = sycl_usm_ary.__sycl_usm_array_interface__ + if isinstance(sycl_usm_ary_iface, dict): + src_buf = _BufferData.from_sycl_usm_ary_iface(sycl_usm_ary_iface) + + if (src_buf.nbytes > self.nbytes): + raise ValueError("Source object is too large to " + "be accommondated in {} bytes buffer".format(self.nbytes)) + kind = DPPLUSM_GetPointerType( + src_buf.p, self.queue.get_sycl_context().get_context_ref()) + if (kind == b'unknown'): + copy_via_host( + self.memory_ptr, self.queue, # dest + src_buf.p, src_buf.queue, # src + src_buf.nbytes + ) + else: + DPPLQueue_Memcpy( + self.queue.get_queue_ref(), + self.memory_ptr, + src_buf.p, + src_buf.nbytes + ) + else: + raise TypeError + + cpdef bytes tobytes(self): + """Constructs bytes object populated with copy of USM memory""" + cdef Py_ssize_t nb = self.nbytes + cdef bytes b = PyBytes_FromStringAndSize(NULL, nb) + # convert bytes to memory view + cdef unsigned char* ptr = PyBytes_AS_STRING(b) + # string is null terminated + cdef unsigned char[::1] mv = (ptr)[:nb] + self.copy_to_host(mv) # output is discarded + return b + + @staticmethod + cdef SyclDevice get_pointer_device(DPPLSyclUSMRef p, SyclContext ctx): + cdef DPPLSyclDeviceRef dref = DPPLUSM_GetPointerDevice(p, ctx.get_context_ref()) + + return SyclDevice._create(dref) + + +cdef class MemoryUSMShared(_Memory): + """ + MemoryUSMShared(nbytes, alignment=0, queue=None, copy=False) allocates nbytes of + USM shared memory. + + Non-positive alignments are not used (malloc_shared is used instead). + The queue=None the current `dpctl.get_current_queue()` is used to allocate memory. + + MemoryUSMShared(usm_obj) constructor create instance from `usm_obj` expected to + implement `__sycl_usm_array_interface__` protocol and exposing a contiguous block of + USM memory of USM shared type. Using copy=True to perform a copy if USM type is other + than 'shared'. + """ + def __cinit__(self, other, *, Py_ssize_t alignment=0, SyclQueue queue=None, int copy=False): + if (isinstance(other, int)): + self._cinit_alloc(alignment, other, b"shared", queue) + else: + self._cinit_other(other) + if (self.get_usm_type() != "shared"): + if copy: + self._cinit_alloc(0, self.nbytes, b"shared", queue) + self.copy_from_device(other) + else: + raise ValueError("USM pointer in the argument {} is not a USM shared pointer. " + "Zero-copy operation is not possible with copy=False. " + "Either use copy=True, or use a constructor appropriate for " + "type '{}'".format(other, self.get_usm_type())) + + def __getbuffer__(self, Py_buffer *buffer, int flags): + self._getbuffer(buffer, flags) + + +cdef class MemoryUSMHost(_Memory): + """ + MemoryUSMHost(nbytes, alignment=0, queue=None, copy=False) allocates nbytes of + USM host memory. + + Non-positive alignments are not used (malloc_host is used instead). + The queue=None the current `dpctl.get_current_queue()` is used to allocate memory. + + MemoryUSMDevice(usm_obj) constructor create instance from `usm_obj` expected to + implement `__sycl_usm_array_interface__` protocol and exposing a contiguous block of + USM memory of USM host type. Using copy=True to perform a copy if USM type is other + than 'host'. + """ + def __cinit__(self, other, *, Py_ssize_t alignment=0, SyclQueue queue=None, int copy=False): + if (isinstance(other, int)): + self._cinit_alloc(alignment, other, b"host", queue) + else: + self._cinit_other(other) + if (self.get_usm_type() != "host"): + if copy: + self._cinit_alloc(0, self.nbytes, b"host", queue) + self.copy_from_device(other) + else: + raise ValueError("USM pointer in the argument {} is not a USM host pointer. " + "Zero-copy operation is not possible with copy=False. " + "Either use copy=True, or use a constructor appropriate for " + "type '{}'".format(other, self.get_usm_type())) + + def __getbuffer__(self, Py_buffer *buffer, int flags): + self._getbuffer(buffer, flags) + + +cdef class MemoryUSMDevice(_Memory): + """ + MemoryUSMDevice(nbytes, alignment=0, queue=None, copy=False) allocates nbytes of + USM device memory. + + Non-positive alignments are not used (malloc_device is used instead). + The queue=None the current `dpctl.get_current_queue()` is used to allocate memory. + + MemoryUSMDevice(usm_obj) constructor create instance from `usm_obj` expected to + implement `__sycl_usm_array_interface__` protocol and exposing a contiguous block of + USM memory of USM device type. Using copy=True to perform a copy if USM type is other + than 'device'. + """ + def __cinit__(self, other, *, Py_ssize_t alignment=0, SyclQueue queue=None, int copy=False): + if (isinstance(other, int)): + self._cinit_alloc(alignment, other, b"device", queue) + else: + self._cinit_other(other) + if (self.get_usm_type() != "device"): + if copy: + self._cinit_alloc(0, self.nbytes, b"device", queue) + self.copy_from_device(other) + else: + raise ValueError("USM pointer in the argument {} is not a USM device pointer. " + "Zero-copy operation is not possible with copy=False. " + "Either use copy=True, or use a constructor appropriate for " + "type '{}'".format(other, self.get_usm_type())) diff --git a/dpctl/tests/test_sycl_kernel_submit.py b/dpctl/tests/test_sycl_kernel_submit.py index 772eb15042..e1ffa96e19 100644 --- a/dpctl/tests/test_sycl_kernel_submit.py +++ b/dpctl/tests/test_sycl_kernel_submit.py @@ -25,7 +25,7 @@ import ctypes import dpctl import unittest -import dpctl._memory as dpctl_mem +import dpctl.memory as dpctl_mem import numpy as np diff --git a/dpctl/tests/test_sycl_queue_memcpy.py b/dpctl/tests/test_sycl_queue_memcpy.py index ad4cdaf92d..6e3bb7dc72 100644 --- a/dpctl/tests/test_sycl_queue_memcpy.py +++ b/dpctl/tests/test_sycl_queue_memcpy.py @@ -23,14 +23,14 @@ ##===----------------------------------------------------------------------===## import dpctl +import dpctl.memory import unittest class TestQueueMemcpy(unittest.TestCase): def _create_memory(self): nbytes = 1024 - queue = dpctl.get_current_queue() - mobj = dpctl._memory.MemoryUSMShared(nbytes, queue) + mobj = dpctl.memory.MemoryUSMShared(nbytes) return mobj @unittest.skipUnless( @@ -61,13 +61,15 @@ def test_memcpy_type_error(self): q.memcpy(None, mobj, 3) self.assertEqual(type(cm.exception), TypeError) - self.assertEqual(str(cm.exception), "Parameter dest should be Memory.") + self.assertEqual( + str(cm.exception), "Parameter `dest` should have type _Memory." + ) with self.assertRaises(TypeError) as cm: q.memcpy(mobj, None, 3) self.assertEqual(type(cm.exception), TypeError) - self.assertEqual(str(cm.exception), "Parameter src should be Memory.") + self.assertEqual(str(cm.exception), "Parameter `src` should have type _Memory.") if __name__ == "__main__": diff --git a/dpctl/tests/test_sycl_usm.py b/dpctl/tests/test_sycl_usm.py index 6f938028ce..0e33f9f067 100644 --- a/dpctl/tests/test_sycl_usm.py +++ b/dpctl/tests/test_sycl_usm.py @@ -24,7 +24,21 @@ import unittest import dpctl -from dpctl._memory import MemoryUSMShared, MemoryUSMHost, MemoryUSMDevice +from dpctl.memory import MemoryUSMShared, MemoryUSMHost, MemoryUSMDevice +import numpy as np + + +class Dummy(MemoryUSMShared): + """ + Class that exposes `__sycl_usm_array_interface__` with + SYCL context for sycl object, instead of Sycl queue. + """ + + @property + def __sycl_usm_array_interface(self): + iface = super().__sycl_usm_array_interface__ + iface["syclob"] = iface["syclobj"].get_sycl_context() + return iface class TestMemory(unittest.TestCase): @@ -34,15 +48,22 @@ class TestMemory(unittest.TestCase): def test_memory_create(self): nbytes = 1024 queue = dpctl.get_current_queue() - mobj = MemoryUSMShared(nbytes, queue) + mobj = MemoryUSMShared(nbytes, alignment=64, queue=queue) self.assertEqual(mobj.nbytes, nbytes) + self.assertTrue(hasattr(mobj, "__sycl_usm_array_interface__")) def _create_memory(self): nbytes = 1024 queue = dpctl.get_current_queue() - mobj = MemoryUSMShared(nbytes, queue) + mobj = MemoryUSMShared(nbytes, alignment=64, queue=queue) return mobj + def _create_host_buf(self, nbytes): + ba = bytearray(nbytes) + for i in range(nbytes): + ba[i] = (i % 32) + ord("a") + return ba + @unittest.skipUnless( dpctl.has_sycl_platforms(), "No SYCL devices except the default host device." ) @@ -50,7 +71,7 @@ def test_memory_without_context(self): mobj = self._create_memory() # Without context - self.assertEqual(mobj._usm_type(), "shared") + self.assertEqual(mobj.get_usm_type(), "shared") @unittest.skipUnless(dpctl.has_cpu_queues(), "No OpenCL CPU queues available") def test_memory_cpu_context(self): @@ -60,12 +81,12 @@ def test_memory_cpu_context(self): with dpctl.device_context("opencl:cpu:0"): # type respective to the context in which # memory was created - usm_type = mobj._usm_type() + usm_type = mobj.get_usm_type() self.assertEqual(usm_type, "shared") current_queue = dpctl.get_current_queue() # type as view from current queue - usm_type = mobj._usm_type(current_queue) + usm_type = mobj.get_usm_type(current_queue) # type can be unknown if current queue is # not in the same SYCL context self.assertTrue(usm_type in ["unknown", "shared"]) @@ -76,10 +97,10 @@ def test_memory_gpu_context(self): # GPU context with dpctl.device_context("opencl:gpu:0"): - usm_type = mobj._usm_type() + usm_type = mobj.get_usm_type() self.assertEqual(usm_type, "shared") current_queue = dpctl.get_current_queue() - usm_type = mobj._usm_type(current_queue) + usm_type = mobj.get_usm_type(current_queue) self.assertTrue(usm_type in ["unknown", "shared"]) @unittest.skipUnless( @@ -91,6 +112,54 @@ def test_buffer_protocol(self): mv2 = memoryview(mobj) self.assertEqual(mv1, mv2) + @unittest.skipUnless( + dpctl.has_sycl_platforms(), "No SYCL devices except the default host device." + ) + def test_copy_host_roundtrip(self): + mobj = self._create_memory() + host_src_obj = self._create_host_buf(mobj.nbytes) + mobj.copy_from_host(host_src_obj) + host_dest_obj = mobj.copy_to_host() + del mobj + self.assertEqual(host_src_obj, host_dest_obj) + + @unittest.skipUnless( + dpctl.has_sycl_platforms(), "No SYCL devices except the default host device." + ) + def test_zero_copy(self): + mobj = self._create_memory() + mobj2 = type(mobj)(mobj) + + self.assertTrue(mobj2.reference_obj is mobj) + mobj_data = mobj.__sycl_usm_array_interface__["data"] + mobj2_data = mobj2.__sycl_usm_array_interface__["data"] + self.assertEqual(mobj_data, mobj2_data) + + @unittest.skipUnless( + dpctl.has_sycl_platforms(), "No SYCL devices except the default host device." + ) + def test_pickling(self): + import pickle + + mobj = self._create_memory() + host_src_obj = self._create_host_buf(mobj.nbytes) + mobj.copy_from_host(host_src_obj) + + mobj_reconstructed = pickle.loads(pickle.dumps(mobj)) + self.assertEqual( + type(mobj), type(mobj_reconstructed), "Pickling should preserve type" + ) + self.assertEqual( + mobj.tobytes(), + mobj_reconstructed.tobytes(), + "Pickling should preserve buffer content", + ) + self.assertNotEqual( + mobj._pointer, + mobj_reconstructed._pointer, + "Pickling/unpickling changes pointer", + ) + class TestMemoryUSMBase: """ Base tests for MemoryUSM* """ @@ -101,19 +170,53 @@ class TestMemoryUSMBase: @unittest.skipUnless( dpctl.has_sycl_platforms(), "No SYCL devices except the default host device." ) - def test_create_with_queue(self): + def test_create_with_size_and_alignment_and_queue(self): + q = dpctl.get_current_queue() + m = self.MemoryUSMClass(1024, alignment=64, queue=q) + self.assertEqual(m.nbytes, 1024) + self.assertEqual(m.get_usm_type(), self.usm_type) + + @unittest.skipUnless( + dpctl.has_sycl_platforms(), "No SYCL devices except the default host device." + ) + def test_create_with_size_and_queue(self): q = dpctl.get_current_queue() - m = self.MemoryUSMClass(1024, q) + m = self.MemoryUSMClass(1024, queue=q) self.assertEqual(m.nbytes, 1024) - self.assertEqual(m._usm_type(), self.usm_type) + self.assertEqual(m.get_usm_type(), self.usm_type) @unittest.skipUnless( dpctl.has_sycl_platforms(), "No SYCL devices except the default host device." ) - def test_create_without_queue(self): + def test_create_with_size_and_alignment(self): + m = self.MemoryUSMClass(1024, alignment=64) + self.assertEqual(m.nbytes, 1024) + self.assertEqual(m.get_usm_type(), self.usm_type) + + @unittest.skipUnless( + dpctl.has_sycl_platforms(), "No SYCL devices except the default host device." + ) + def test_create_with_only_size(self): m = self.MemoryUSMClass(1024) self.assertEqual(m.nbytes, 1024) - self.assertEqual(m._usm_type(), self.usm_type) + self.assertEqual(m.get_usm_type(), self.usm_type) + + @unittest.skipUnless( + dpctl.has_sycl_platforms(), "No SYCL Devices except the default host device." + ) + def test_sycl_usm_array_interface(self): + import sys + + if self.MemoryUSMClass is MemoryUSMHost and sys.platform in ["win32", "cygwin"]: + # MemoryUSMHost.copy_to_host() hangs on Windows. TODO: investigate + raise unittest.SkipTest + m = self.MemoryUSMClass(256) + m2 = Dummy(m.nbytes) + hb = np.random.randint(0, 256, size=256, dtype="|u1") + m2.copy_from_host(hb) + # test that USM array interface works with SyclContext as 'syclobj' + m.copy_from_device(m2) + self.assertTrue(np.array_equal(m.copy_to_host(), hb)) class TestMemoryUSMShared(TestMemoryUSMBase, unittest.TestCase): diff --git a/scripts/build_for_develop.bat b/scripts/build_for_develop.bat new file mode 100644 index 0000000000..2a4a2265d1 --- /dev/null +++ b/scripts/build_for_develop.bat @@ -0,0 +1,61 @@ +call "%ONEAPI_ROOT%\compiler\latest\env\vars.bat" +IF ERRORLEVEL 1 exit /b 1 +REM conda uses %ERRORLEVEL% but FPGA scripts can set it. So it should be reseted. +set ERRORLEVEL= + +rmdir /S /Q build_cmake +mkdir build_cmake + +rmdir /S /Q install +mkdir install +cd install +set "INSTALL_PREFIX=%cd%" + +cd ..\build_cmake + +set "DPCPP_ROOT=%ONEAPI_ROOT%\compiler\latest\windows" +set NUMPY_INC= +for /f "delims=" %%a in ('%CONDA_PREFIX%\python.exe -c "import numpy; print(numpy.get_include())"') do @set NUMPY_INC=%%a +set PYTHON_INC= +for /f "delims=" %%a in ('%CONDA_PREFIX%\python.exe -c "import distutils.sysconfig as sc; print(sc.get_python_inc())"') do @set PYTHON_INC=%%a + +cmake -G Ninja ^ + -DCMAKE_BUILD_TYPE=Release ^ + "-DCMAKE_CXX_FLAGS=-Wno-unused-function /EHa" ^ + "-DCMAKE_INSTALL_PREFIX=%INSTALL_PREFIX%" ^ + "-DCMAKE_PREFIX_PATH=%INSTALL_PREFIX%" ^ + "-DDPCPP_ROOT=%DPCPP_ROOT%" ^ + "-DCMAKE_C_COMPILER:PATH=%DPCPP_ROOT%\bin\clang-cl.exe" ^ + "-DCMAKE_CXX_COMPILER:PATH=%DPCPP_ROOT%\bin\dpcpp.exe" ^ + "-DPYTHON_INCLUDE_DIR=%PYTHON_INC%" ^ + "-DGTEST_INCLUDE_DIR=%CONDA_PREFIX%\Library\include" ^ + "-DGTEST_LIB_DIR=%CONDA_PREFIX%\Library\lib" ^ + "-DNUMPY_INCLUDE_DIR=%NUMPY_INC%" ^ + "%cd%\..\backends" +IF %ERRORLEVEL% NEQ 0 exit /b 1 + +ninja -n +IF %ERRORLEVEL% NEQ 0 exit /b 1 +ninja check +IF %ERRORLEVEL% NEQ 0 exit /b 1 +ninja install +IF %ERRORLEVEL% NEQ 0 exit /b 1 + +cd .. +xcopy install\lib\*.lib dpctl /E /Y +xcopy install\bin\*.dll dpctl /E /Y + +mkdir dpctl\include +xcopy backends\include dpctl\include /E /Y + + +REM required by _sycl_core(dpctl) +set "DPPL_SYCL_INTERFACE_LIBDIR=dpctl" +set "DPPL_SYCL_INTERFACE_INCLDIR=dpctl\include" +set "CC=clang-cl.exe" +set "CXX=dpcpp.exe" + +python setup.py clean --all +python setup.py build_ext --inplace develop +python -m unittest dpctl.tests +IF %ERRORLEVEL% NEQ 0 exit /b 1 diff --git a/setup.py b/setup.py index d4634e700a..52d17cfc9c 100644 --- a/setup.py +++ b/setup.py @@ -136,9 +136,9 @@ def extensions(): **extension_args ), Extension( - "dpctl._memory", + "dpctl.memory._memory", [ - os.path.join("dpctl", "_memory.pyx"), + os.path.join("dpctl", "memory", "_memory.pyx"), ], **extension_args ),