Skip to content

Commit a7774f2

Browse files
authored
[UR] Fix some tests that are broken when run with multiple cuda devices available. (#17216)
Also removes a test and adds known failures where appropriate (typically where the test only runs when multiple devices are available so the skip doesn't affect behaviour of single-device runs). The test removed is `cudaUrContextCreateTest.ActiveContext`. This test seems to be testing the assumption that a `urQueueCreate` followed by `urMemBufferCreate` will set the active cuda context to the one associated with the context passed to those calls. Neither of these calls set the active context, this may have changed at some point as the test dates back to a PI unit test. The test currently passes as long as only one device is available because a previous `urDeviceGetInfo` sets the active context to the one associated with that device, which is inevitably the same as the one associated with the UR context used in the test. Since the test is based on a faulty assumption about the adapter I think we can just delete it.
1 parent ee9ba99 commit a7774f2

File tree

5 files changed

+42
-47
lines changed

5 files changed

+42
-47
lines changed

unified-runtime/test/adapters/cuda/context_tests.cpp

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -43,37 +43,6 @@ TEST_P(cudaUrContextCreateTest, CreateWithChildThread) {
4343
callContextFromOtherThread.join();
4444
}
4545

46-
TEST_P(cudaUrContextCreateTest, ActiveContext) {
47-
uur::raii::Context context = nullptr;
48-
ASSERT_SUCCESS(urContextCreate(1, &device, nullptr, context.ptr()));
49-
ASSERT_NE(context, nullptr);
50-
51-
uur::raii::Queue queue = nullptr;
52-
ur_queue_properties_t queue_props{UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr,
53-
0};
54-
ASSERT_SUCCESS(urQueueCreate(context, device, &queue_props, queue.ptr()));
55-
ASSERT_NE(queue, nullptr);
56-
57-
// check that the queue has the correct context
58-
ASSERT_EQ(context, queue->getContext());
59-
60-
// create a buffer
61-
uur::raii::Mem buffer = nullptr;
62-
ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, 1024,
63-
nullptr, buffer.ptr()));
64-
ASSERT_NE(buffer, nullptr);
65-
66-
// check that the context is now the active CUDA context
67-
CUcontext cudaCtx = nullptr;
68-
ASSERT_SUCCESS_CUDA(cuCtxGetCurrent(&cudaCtx));
69-
ASSERT_NE(cudaCtx, nullptr);
70-
71-
ur_native_handle_t native_context = 0;
72-
ASSERT_SUCCESS(urContextGetNativeHandle(context, &native_context));
73-
ASSERT_NE(reinterpret_cast<CUcontext>(native_context), nullptr);
74-
ASSERT_EQ(cudaCtx, reinterpret_cast<CUcontext>(native_context));
75-
}
76-
7746
TEST_P(cudaUrContextCreateTest, ContextLifetimeExisting) {
7847
// start by setting up a CUDA context on the thread
7948
CUcontext original;

unified-runtime/test/adapters/cuda/memory_tests.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,12 @@ TEST_P(cudaMemoryTest, urMemBufferNoActiveContext) {
1414
constexpr size_t memSize = 1024u;
1515

1616
CUcontext current = nullptr;
17-
do {
17+
ASSERT_SUCCESS_CUDA(cuCtxGetCurrent(&current));
18+
while (current != nullptr) {
1819
CUcontext oldContext = nullptr;
1920
ASSERT_SUCCESS_CUDA(cuCtxPopCurrent(&oldContext));
2021
ASSERT_SUCCESS_CUDA(cuCtxGetCurrent(&current));
21-
} while (current != nullptr);
22+
}
2223

2324
uur::raii::Mem mem;
2425
ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, memSize,

unified-runtime/test/conformance/enqueue/helpers.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,15 @@ struct urMultiQueueMultiDeviceTestWithParam
203203
urContextCreate(devices.size(), devices.data(), nullptr, &context));
204204

205205
// Duplicate our devices until we hit the minimum size specified.
206-
auto srcDevices = devices;
206+
std::vector<ur_device_handle_t> srcDevices;
207+
// If the test actually only wants one device duplicated a bunch of times
208+
// we take devices[0] and discard any other devices that were discovered.
209+
if (trueMultiDevice) {
210+
srcDevices = devices;
211+
} else {
212+
srcDevices.push_back(devices[0]);
213+
devices.clear();
214+
}
207215
while (devices.size() < minDevices) {
208216
devices.insert(devices.end(), srcDevices.begin(), srcDevices.end());
209217
}
@@ -224,6 +232,7 @@ struct urMultiQueueMultiDeviceTestWithParam
224232

225233
ur_context_handle_t context;
226234
std::vector<ur_queue_handle_t> queues;
235+
bool trueMultiDevice = true;
227236
};
228237

229238
} // namespace uur

unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunch.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ UUR_INSTANTIATE_PLATFORM_TEST_SUITE(urEnqueueKernelLaunchMultiDeviceTest);
565565
// TODO: rewrite this test, right now it only works for a single queue
566566
// (the context is only created for one device)
567567
TEST_P(urEnqueueKernelLaunchMultiDeviceTest, KernelLaunchReadDifferentQueues) {
568-
UUR_KNOWN_FAILURE_ON(uur::LevelZero{}, uur::LevelZeroV2{});
568+
UUR_KNOWN_FAILURE_ON(uur::CUDA{}, uur::LevelZero{}, uur::LevelZeroV2{});
569569

570570
uur::KernelLaunchHelper helper =
571571
uur::KernelLaunchHelper{platform, context, kernel, queues[0]};

unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -155,13 +155,14 @@ struct urEnqueueKernelLaunchIncrementTest
155155

156156
using Param = uur::BoolTestParam;
157157

158-
using urMultiQueueLaunchMemcpyTest<numOps, Param>::context;
159158
using urMultiQueueLaunchMemcpyTest<numOps, Param>::queues;
160-
using urMultiQueueLaunchMemcpyTest<numOps, Param>::devices;
161159
using urMultiQueueLaunchMemcpyTest<numOps, Param>::kernels;
162160
using urMultiQueueLaunchMemcpyTest<numOps, Param>::SharedMem;
163161

164162
void SetUp() override {
163+
// We actually need a single device used multiple times for this test, as
164+
// opposed to utilizing all available devices for the platform.
165+
this->trueMultiDevice = false;
165166
UUR_RETURN_ON_FATAL_FAILURE(
166167
urMultiQueueLaunchMemcpyTest<numOps, Param>::
167168
SetUp()); // Use single device, duplicated numOps times
@@ -344,9 +345,28 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceTest, Success) {
344345
}
345346
}
346347

347-
using urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest =
348-
urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<
349-
std::tuple<uur::BoolTestParam, uur::BoolTestParam>>;
348+
struct urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest
349+
: urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<
350+
std::tuple<uur::BoolTestParam, uur::BoolTestParam>> {
351+
using Param = std::tuple<uur::BoolTestParam, uur::BoolTestParam>;
352+
353+
using urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<Param>::devices;
354+
using urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<Param>::queues;
355+
using urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<Param>::kernels;
356+
using urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<
357+
Param>::SharedMem;
358+
359+
void SetUp() override {
360+
useEvents = std::get<0>(getParam()).value;
361+
queuePerThread = std::get<1>(getParam()).value;
362+
// With !queuePerThread this becomes a test on a single device
363+
this->trueMultiDevice = queuePerThread;
364+
urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<Param>::SetUp();
365+
}
366+
367+
bool useEvents;
368+
bool queuePerThread;
369+
};
350370

351371
UUR_PLATFORM_TEST_SUITE_WITH_PARAM(
352372
urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest,
@@ -356,11 +376,7 @@ UUR_PLATFORM_TEST_SUITE_WITH_PARAM(
356376
printParams<urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest>);
357377

358378
// Enqueue kernelLaunch concurrently from multiple threads
359-
// With !queuePerThread this becomes a test on a single device
360379
TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) {
361-
auto useEvents = std::get<0>(getParam()).value;
362-
auto queuePerThread = std::get<1>(getParam()).value;
363-
364380
if (!queuePerThread) {
365381
UUR_KNOWN_FAILURE_ON(uur::LevelZero{}, uur::LevelZeroV2{});
366382
}
@@ -371,11 +387,11 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) {
371387
static constexpr size_t numOpsPerThread = 6;
372388

373389
for (size_t i = 0; i < numThreads; i++) {
374-
threads.emplace_back([this, i, queuePerThread, useEvents]() {
390+
threads.emplace_back([this, i]() {
375391
constexpr size_t global_offset = 0;
376392
constexpr size_t n_dimensions = 1;
377393

378-
auto queue = queuePerThread ? queues[i] : queues.back();
394+
auto queue = this->queuePerThread ? queues[i] : queues.back();
379395
auto kernel = kernels[i];
380396
auto sharedPtr = SharedMem[i];
381397

@@ -385,7 +401,7 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) {
385401
ur_event_handle_t *lastEvent = nullptr;
386402
ur_event_handle_t *signalEvent = nullptr;
387403

388-
if (useEvents) {
404+
if (this->useEvents) {
389405
waitNum = j > 0 ? 1 : 0;
390406
lastEvent = j > 0 ? Events[j - 1].ptr() : nullptr;
391407
signalEvent = Events[j].ptr();

0 commit comments

Comments
 (0)