diff --git a/unified-runtime/test/adapters/cuda/context_tests.cpp b/unified-runtime/test/adapters/cuda/context_tests.cpp index 77d0b42cd0ba1..4f4df4683cb37 100644 --- a/unified-runtime/test/adapters/cuda/context_tests.cpp +++ b/unified-runtime/test/adapters/cuda/context_tests.cpp @@ -43,37 +43,6 @@ TEST_P(cudaUrContextCreateTest, CreateWithChildThread) { callContextFromOtherThread.join(); } -TEST_P(cudaUrContextCreateTest, ActiveContext) { - uur::raii::Context context = nullptr; - ASSERT_SUCCESS(urContextCreate(1, &device, nullptr, context.ptr())); - ASSERT_NE(context, nullptr); - - uur::raii::Queue queue = nullptr; - ur_queue_properties_t queue_props{UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr, - 0}; - ASSERT_SUCCESS(urQueueCreate(context, device, &queue_props, queue.ptr())); - ASSERT_NE(queue, nullptr); - - // check that the queue has the correct context - ASSERT_EQ(context, queue->getContext()); - - // create a buffer - uur::raii::Mem buffer = nullptr; - ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, 1024, - nullptr, buffer.ptr())); - ASSERT_NE(buffer, nullptr); - - // check that the context is now the active CUDA context - CUcontext cudaCtx = nullptr; - ASSERT_SUCCESS_CUDA(cuCtxGetCurrent(&cudaCtx)); - ASSERT_NE(cudaCtx, nullptr); - - ur_native_handle_t native_context = 0; - ASSERT_SUCCESS(urContextGetNativeHandle(context, &native_context)); - ASSERT_NE(reinterpret_cast(native_context), nullptr); - ASSERT_EQ(cudaCtx, reinterpret_cast(native_context)); -} - TEST_P(cudaUrContextCreateTest, ContextLifetimeExisting) { // start by setting up a CUDA context on the thread CUcontext original; diff --git a/unified-runtime/test/adapters/cuda/memory_tests.cpp b/unified-runtime/test/adapters/cuda/memory_tests.cpp index 6839b0b95fc82..ac4bfefdd7468 100644 --- a/unified-runtime/test/adapters/cuda/memory_tests.cpp +++ b/unified-runtime/test/adapters/cuda/memory_tests.cpp @@ -14,11 +14,12 @@ TEST_P(cudaMemoryTest, urMemBufferNoActiveContext) { constexpr size_t memSize = 1024u; CUcontext current = nullptr; - do { + ASSERT_SUCCESS_CUDA(cuCtxGetCurrent(¤t)); + while (current != nullptr) { CUcontext oldContext = nullptr; ASSERT_SUCCESS_CUDA(cuCtxPopCurrent(&oldContext)); ASSERT_SUCCESS_CUDA(cuCtxGetCurrent(¤t)); - } while (current != nullptr); + } uur::raii::Mem mem; ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, memSize, diff --git a/unified-runtime/test/conformance/enqueue/helpers.h b/unified-runtime/test/conformance/enqueue/helpers.h index a4d127c4e5b0f..fd8c428a52589 100644 --- a/unified-runtime/test/conformance/enqueue/helpers.h +++ b/unified-runtime/test/conformance/enqueue/helpers.h @@ -203,7 +203,15 @@ struct urMultiQueueMultiDeviceTestWithParam urContextCreate(devices.size(), devices.data(), nullptr, &context)); // Duplicate our devices until we hit the minimum size specified. - auto srcDevices = devices; + std::vector srcDevices; + // If the test actually only wants one device duplicated a bunch of times + // we take devices[0] and discard any other devices that were discovered. + if (trueMultiDevice) { + srcDevices = devices; + } else { + srcDevices.push_back(devices[0]); + devices.clear(); + } while (devices.size() < minDevices) { devices.insert(devices.end(), srcDevices.begin(), srcDevices.end()); } @@ -224,6 +232,7 @@ struct urMultiQueueMultiDeviceTestWithParam ur_context_handle_t context; std::vector queues; + bool trueMultiDevice = true; }; } // namespace uur diff --git a/unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunch.cpp b/unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunch.cpp index ef5c0228ede48..3dd977d556184 100644 --- a/unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunch.cpp +++ b/unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunch.cpp @@ -565,7 +565,7 @@ UUR_INSTANTIATE_PLATFORM_TEST_SUITE(urEnqueueKernelLaunchMultiDeviceTest); // TODO: rewrite this test, right now it only works for a single queue // (the context is only created for one device) TEST_P(urEnqueueKernelLaunchMultiDeviceTest, KernelLaunchReadDifferentQueues) { - UUR_KNOWN_FAILURE_ON(uur::LevelZero{}, uur::LevelZeroV2{}); + UUR_KNOWN_FAILURE_ON(uur::CUDA{}, uur::LevelZero{}, uur::LevelZeroV2{}); uur::KernelLaunchHelper helper = uur::KernelLaunchHelper{platform, context, kernel, queues[0]}; diff --git a/unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp b/unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp index c97471aee8a34..1102892d15a61 100644 --- a/unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp +++ b/unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp @@ -155,13 +155,14 @@ struct urEnqueueKernelLaunchIncrementTest using Param = uur::BoolTestParam; - using urMultiQueueLaunchMemcpyTest::context; using urMultiQueueLaunchMemcpyTest::queues; - using urMultiQueueLaunchMemcpyTest::devices; using urMultiQueueLaunchMemcpyTest::kernels; using urMultiQueueLaunchMemcpyTest::SharedMem; void SetUp() override { + // We actually need a single device used multiple times for this test, as + // opposed to utilizing all available devices for the platform. + this->trueMultiDevice = false; UUR_RETURN_ON_FATAL_FAILURE( urMultiQueueLaunchMemcpyTest:: SetUp()); // Use single device, duplicated numOps times @@ -344,9 +345,28 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceTest, Success) { } } -using urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest = - urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam< - std::tuple>; +struct urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest + : urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam< + std::tuple> { + using Param = std::tuple; + + using urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam::devices; + using urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam::queues; + using urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam::kernels; + using urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam< + Param>::SharedMem; + + void SetUp() override { + useEvents = std::get<0>(getParam()).value; + queuePerThread = std::get<1>(getParam()).value; + // With !queuePerThread this becomes a test on a single device + this->trueMultiDevice = queuePerThread; + urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam::SetUp(); + } + + bool useEvents; + bool queuePerThread; +}; UUR_PLATFORM_TEST_SUITE_WITH_PARAM( urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, @@ -356,11 +376,7 @@ UUR_PLATFORM_TEST_SUITE_WITH_PARAM( printParams); // Enqueue kernelLaunch concurrently from multiple threads -// With !queuePerThread this becomes a test on a single device TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) { - auto useEvents = std::get<0>(getParam()).value; - auto queuePerThread = std::get<1>(getParam()).value; - if (!queuePerThread) { UUR_KNOWN_FAILURE_ON(uur::LevelZero{}, uur::LevelZeroV2{}); } @@ -371,11 +387,11 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) { static constexpr size_t numOpsPerThread = 6; for (size_t i = 0; i < numThreads; i++) { - threads.emplace_back([this, i, queuePerThread, useEvents]() { + threads.emplace_back([this, i]() { constexpr size_t global_offset = 0; constexpr size_t n_dimensions = 1; - auto queue = queuePerThread ? queues[i] : queues.back(); + auto queue = this->queuePerThread ? queues[i] : queues.back(); auto kernel = kernels[i]; auto sharedPtr = SharedMem[i]; @@ -385,7 +401,7 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) { ur_event_handle_t *lastEvent = nullptr; ur_event_handle_t *signalEvent = nullptr; - if (useEvents) { + if (this->useEvents) { waitNum = j > 0 ? 1 : 0; lastEvent = j > 0 ? Events[j - 1].ptr() : nullptr; signalEvent = Events[j].ptr();