diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp index 87a41d9fe1054..8a04e183a3122 100644 --- a/sycl/source/detail/kernel_program_cache.hpp +++ b/sycl/source/detail/kernel_program_cache.hpp @@ -290,7 +290,8 @@ class KernelProgramCache { } catch (const exception &Ex) { BuildResult->Error.Msg = Ex.what(); BuildResult->Error.Code = Ex.get_cl_code(); - if (BuildResult->Error.Code == PI_ERROR_OUT_OF_RESOURCES) { + if (BuildResult->Error.Code == PI_ERROR_OUT_OF_RESOURCES || + BuildResult->Error.Code == PI_ERROR_OUT_OF_HOST_MEMORY) { reset(); BuildResult->updateAndNotify(BuildState::BS_Initial); continue; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index c0b8ac875e67f..82246af25173d 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1223,7 +1223,8 @@ ProgramManager::ProgramPtr ProgramManager::build( nullptr, &LinkedProg); }; sycl::detail::pi::PiResult Error = doLink(); - if (Error == PI_ERROR_OUT_OF_RESOURCES) { + if (Error == PI_ERROR_OUT_OF_RESOURCES || + Error == PI_ERROR_OUT_OF_HOST_MEMORY) { Context->getKernelProgramCache().reset(); Error = doLink(); } @@ -2118,7 +2119,8 @@ ProgramManager::link(const device_image_plain &DeviceImage, /*user_data=*/nullptr, &LinkedProg); }; sycl::detail::pi::PiResult Error = doLink(); - if (Error == PI_ERROR_OUT_OF_RESOURCES) { + if (Error == PI_ERROR_OUT_OF_RESOURCES || + Error == PI_ERROR_OUT_OF_HOST_MEMORY) { ContextImpl->getKernelProgramCache().reset(); Error = doLink(); } diff --git a/sycl/unittests/kernel-and-program/OutOfResources.cpp b/sycl/unittests/kernel-and-program/OutOfResources.cpp index fe6f18b53e23e..e8c04bf8796c6 100644 --- a/sycl/unittests/kernel-and-program/OutOfResources.cpp +++ b/sycl/unittests/kernel-and-program/OutOfResources.cpp @@ -68,6 +68,7 @@ static sycl::unittest::PiImageArray<2> ImgArray{Img}; static int nProgramCreate = 0; static volatile bool outOfResourcesToggle = false; +static volatile bool outOfHostMemoryToggle = false; static pi_result redefinedProgramCreate(pi_context context, const void *il, size_t length, @@ -80,6 +81,17 @@ static pi_result redefinedProgramCreate(pi_context context, const void *il, return PI_SUCCESS; } +static pi_result +redefinedProgramCreateOutOfHostMemory(pi_context context, const void *il, + size_t length, pi_program *res_program) { + ++nProgramCreate; + if (outOfHostMemoryToggle) { + outOfHostMemoryToggle = false; + return PI_ERROR_OUT_OF_HOST_MEMORY; + } + return PI_SUCCESS; +} + TEST(OutOfResourcesTest, piProgramCreate) { sycl::unittest::PiMock Mock; Mock.redefineBefore( @@ -141,6 +153,70 @@ TEST(OutOfResourcesTest, piProgramCreate) { } } +TEST(OutOfHostMemoryTest, piProgramCreate) { + // Reset to zero. + nProgramCreate = 0; + + sycl::unittest::PiMock Mock; + Mock.redefineBefore( + redefinedProgramCreateOutOfHostMemory); + + sycl::platform Plt{Mock.getPlatform()}; + sycl::context Ctx{Plt}; + auto CtxImpl = detail::getSyclObjImpl(Ctx); + queue q(Ctx, default_selector_v); + + int runningTotal = 0; + // Cache is empty, so one piProgramCreate call. + q.single_task([] {}); + EXPECT_EQ(nProgramCreate, runningTotal += 1); + + // Now, we make the next piProgramCreate call fail with + // PI_ERROR_OUT_OF_HOST_MEMORY. The caching mechanism should catch this, + // clear the cache, and retry the piProgramCreate. + outOfHostMemoryToggle = true; + q.single_task([] {}); + EXPECT_FALSE(outOfHostMemoryToggle); + EXPECT_EQ(nProgramCreate, runningTotal += 2); + { + detail::KernelProgramCache::ProgramCache &Cache = + CtxImpl->getKernelProgramCache().acquireCachedPrograms().get(); + EXPECT_EQ(Cache.size(), 1U) << "Expected 1 program in the cache"; + } + + // The next piProgramCreate call will fail with + // PI_ERROR_OUT_OF_HOST_MEMORY. But OutOfResourcesKernel2 is in + // the cache, so we expect no new piProgramCreate calls. + outOfHostMemoryToggle = true; + q.single_task([] {}); + EXPECT_TRUE(outOfHostMemoryToggle); + EXPECT_EQ(nProgramCreate, runningTotal); + + // OutOfResourcesKernel1 is not in the cache, so we have to + // build it. From what we set before, this call will fail, + // the cache will clear out, and will try again. + q.single_task([] {}); + EXPECT_FALSE(outOfHostMemoryToggle); + EXPECT_EQ(nProgramCreate, runningTotal += 2); + { + detail::KernelProgramCache::ProgramCache &Cache = + CtxImpl->getKernelProgramCache().acquireCachedPrograms().get(); + EXPECT_EQ(Cache.size(), 1U) << "Expected 1 program in the cache"; + } + + // Finally, OutOfResourcesKernel1 will be in the cache, but + // OutOfResourceKenel2 will not, so one more piProgramCreate. + // Toggle is not set, so this should succeed. + q.single_task([] {}); + q.single_task([] {}); + EXPECT_EQ(nProgramCreate, runningTotal += 1); + { + detail::KernelProgramCache::ProgramCache &Cache = + CtxImpl->getKernelProgramCache().acquireCachedPrograms().get(); + EXPECT_EQ(Cache.size(), 2U) << "Expected 2 program in the cache"; + } +} + static int nProgramLink = 0; static pi_result @@ -158,6 +234,20 @@ redefinedProgramLink(pi_context context, pi_uint32 num_devices, return PI_SUCCESS; } +static pi_result redefinedProgramLinkOutOfHostMemory( + pi_context context, pi_uint32 num_devices, const pi_device *device_list, + const char *options, pi_uint32 num_input_programs, + const pi_program *input_programs, + void (*pfn_notify)(pi_program program, void *user_data), void *user_data, + pi_program *ret_program) { + ++nProgramLink; + if (outOfHostMemoryToggle) { + outOfHostMemoryToggle = false; + return PI_ERROR_OUT_OF_HOST_MEMORY; + } + return PI_SUCCESS; +} + TEST(OutOfResourcesTest, piProgramLink) { sycl::unittest::PiMock Mock; Mock.redefineBefore(redefinedProgramLink); @@ -191,4 +281,43 @@ TEST(OutOfResourcesTest, piProgramLink) { CtxImpl->getKernelProgramCache().acquireCachedPrograms().get(); EXPECT_EQ(Cache.size(), 0u) << "Expect no programs in the cache"; } -} \ No newline at end of file +} + +TEST(OutOfHostMemoryTest, piProgramLink) { + // Reset to zero. + nProgramLink = 0; + + sycl::unittest::PiMock Mock; + Mock.redefineBefore( + redefinedProgramLinkOutOfHostMemory); + + sycl::platform Plt{Mock.getPlatform()}; + sycl::context Ctx{Plt}; + auto CtxImpl = detail::getSyclObjImpl(Ctx); + queue q(Ctx, default_selector_v); + // Put some programs in the cache + q.single_task([] {}); + q.single_task([] {}); + { + detail::KernelProgramCache::ProgramCache &Cache = + CtxImpl->getKernelProgramCache().acquireCachedPrograms().get(); + EXPECT_EQ(Cache.size(), 2U) << "Expect 2 programs in the cache"; + } + + auto b1 = sycl::get_kernel_bundle(Ctx); + auto b2 = sycl::get_kernel_bundle(Ctx); + outOfHostMemoryToggle = true; + EXPECT_EQ(nProgramLink, 0); + auto b3 = sycl::link({b1, b2}); + EXPECT_FALSE(outOfHostMemoryToggle); + // one restart due to out of resources, one link per each of b1 and b2. + EXPECT_EQ(nProgramLink, 3); + // no programs should be in the cache due to out of resources. + { + detail::KernelProgramCache::ProgramCache &Cache = + CtxImpl->getKernelProgramCache().acquireCachedPrograms().get(); + EXPECT_EQ(Cache.size(), 0u) << "Expect no programs in the cache"; + } +}