diff --git a/openmp/libomptarget/test/lit.cfg b/openmp/libomptarget/test/lit.cfg index fc1d436e51b7b..565556e64ff29 100644 --- a/openmp/libomptarget/test/lit.cfg +++ b/openmp/libomptarget/test/lit.cfg @@ -34,6 +34,11 @@ if 'OMP_TARGET_OFFLOAD' in os.environ: if 'HSA_ENABLE_SDMA' in os.environ: config.environment['HSA_ENABLE_SDMA'] = os.environ['HSA_ENABLE_SDMA'] +# Architectures like gfx942 may or may not be APUs so an additional environment +# variable is required as some tests can be APU specific. +if 'IS_APU' in os.environ: + config.environment['IS_APU'] = os.environ['IS_APU'] + # set default environment variables for test if 'CHECK_OPENMP_ENV' in os.environ: test_env = os.environ['CHECK_OPENMP_ENV'].split() @@ -111,6 +116,7 @@ if config.libomptarget_has_libc: # For CUDA, this is the case with compute capability 70 (Volta) or higher. # For all other targets, we currently assume it is. supports_unified_shared_memory = True +supports_apu = False if config.libomptarget_current_target.startswith('nvptx'): try: cuda_arch = int(config.cuda_test_arch[:3]) @@ -126,8 +132,15 @@ elif config.libomptarget_current_target.startswith('amdgcn'): config.amdgpu_test_arch.startswith("gfx940") or config.amdgpu_test_arch.startswith("gfx942")): supports_unified_shared_memory = False + # check if AMD architecture is an APU: + if (config.amdgpu_test_arch.startswith("gfx940") or + (config.amdgpu_test_arch.startswith("gfx942") and + evaluate_bool_env(config.environment['IS_APU']))): + supports_apu = True if supports_unified_shared_memory: config.available_features.add('unified_shared_memory') +if supports_apu: + config.available_features.add('apu') # Setup environment to find dynamic library at runtime if config.operating_system == 'Windows': diff --git a/openmp/libomptarget/test/mapping/auto_zero_copy_apu.cpp b/openmp/libomptarget/test/mapping/auto_zero_copy_apu.cpp new file mode 100644 index 0000000000000..48360e4fd7f7b --- /dev/null +++ b/openmp/libomptarget/test/mapping/auto_zero_copy_apu.cpp @@ -0,0 +1,57 @@ +// clang-format off +// RUN: %libomptarget-compilexx-generic +// RUN: env HSA_XNACK=1 LIBOMPTARGET_INFO=30 %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic -check-prefix=INFO_ZERO -check-prefix=CHECK + +// RUN: %libomptarget-compilexx-generic +// RUN: env HSA_XNACK=0 LIBOMPTARGET_INFO=30 %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic -check-prefix=INFO_COPY -check-prefix=CHECK + +// UNSUPPORTED: aarch64-unknown-linux-gnu +// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +// UNSUPPORTED: nvptx64-nvidia-cuda +// UNSUPPORTED: nvptx64-nvidia-cuda-LTO +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-LTO + +// REQUIRES: apu + +// clang-format on + +#include + +int main() { + int n = 1024; + + // test various mapping types + int *a = new int[n]; + int k = 3; + int b[n]; + + for (int i = 0; i < n; i++) + b[i] = i; + + // clang-format off + // INFO_ZERO: Return HstPtrBegin 0x{{.*}} Size=4096 for unified shared memory + // INFO_ZERO: Return HstPtrBegin 0x{{.*}} Size=4096 for unified shared memory + + // INFO_COPY: Creating new map entry with HstPtrBase=0x{{.*}}, HstPtrBegin=0x{{.*}}, TgtAllocBegin=0x{{.*}}, TgtPtrBegin=0x{{.*}}, Size=4096, + // INFO_COPY: Creating new map entry with HstPtrBase=0x{{.*}}, HstPtrBegin=0x{{.*}}, TgtAllocBegin=0x{{.*}}, TgtPtrBegin=0x{{.*}}, Size=4096, + // INFO_COPY: Mapping exists with HstPtrBegin=0x{{.*}}, TgtPtrBegin=0x{{.*}}, Size=4096, DynRefCount=1 (update suppressed) + // INFO_COPY: Mapping exists with HstPtrBegin=0x{{.*}}, TgtPtrBegin=0x{{.*}}, Size=4096, DynRefCount=1 (update suppressed) +// clang-format on +#pragma omp target teams distribute parallel for map(tofrom : a[ : n]) \ + map(to : b[ : n]) + for (int i = 0; i < n; i++) + a[i] = i + b[i] + k; + + int err = 0; + for (int i = 0; i < n; i++) + if (a[i] != i + b[i] + k) + err++; + + // CHECK: PASS + if (err == 0) + printf("PASS\n"); + return err; +}