From 9cb54656888f2cefe4ee6579cd64f5f28c00d01e Mon Sep 17 00:00:00 2001 From: Georgi Mirazchiyski Date: Mon, 23 Sep 2024 14:59:29 +0100 Subject: [PATCH 01/10] [SYCL][Ext][Bindless] Initial implementation of image spirv builtins on HIP This PR implements the required spirv builtins in the libclc for AMD targets to support image fetching (excluding sampled fetch for now) and sampling as well as image array fetching and sampling. Additionally, End-to-end tests are updated to require the aspects corresponding to the feature that is being tested from the Bindless Images extension. This helps avoid having to manually say which backend adapter is supported or unsupported and instead rely on support based on aspect/device queries to drive the execution of the tests. Signed-off-by: Georgi Mirazchiyski --- libclc/libspirv/lib/amdgcn-amdhsa/SOURCES | 1 + .../lib/amdgcn-amdhsa/images/image.cl | 1076 +++++++++++++++++ sycl/cmake/modules/FetchUnifiedRuntime.cmake | 2 +- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 2 +- .../sycl_ext_oneapi_bindless_images.asciidoc | 10 +- .../bindless_images/3_channel_format.cpp | 4 + .../array/fetch_sampled_array.cpp | 2 +- .../array/read_sampled_array.cpp | 14 +- .../array/read_write_1d_subregion.cpp | 6 +- .../array/read_write_2d_subregion.cpp | 2 +- .../array/read_write_unsampled_array.cpp | 3 +- .../cubemap/cubemap_sampled.cpp | 2 - .../cubemap/cubemap_unsampled.cpp | 2 +- .../bindless_images/device_to_device_copy.cpp | 5 +- .../device_to_device_copy_1D_subregion.cpp | 11 +- .../device_to_device_copy_2D_subregion.cpp | 2 +- .../device_to_device_copy_3D_subregion.cpp | 2 +- .../read_write_unsampled_semaphore.cpp | 5 +- .../examples/example_1_1D_read_write.cpp | 5 +- .../examples/example_2_2D_dynamic_read.cpp | 2 +- ...ipmap_anisotropic_filtering_and_levels.cpp | 3 +- .../example_4_1D_array_read_write.cpp | 5 +- .../examples/example_5_sample_cubemap.cpp | 2 +- ...example_6_import_memory_and_semaphores.cpp | 8 +- .../bindless_images/image_get_info.cpp | 6 +- .../bindless_images/mipmap/mipmap_read_1D.cpp | 2 +- .../bindless_images/mipmap/mipmap_read_2D.cpp | 2 +- .../bindless_images/mipmap/mipmap_read_3D.cpp | 2 +- .../bindless_images/read_norm_types.cpp | 3 + .../test-e2e/bindless_images/read_sampled.cpp | 18 +- .../bindless_images/read_write_1D.cpp | 3 + .../read_write_1D_subregion.cpp | 3 + .../read_write_3D_subregion.cpp | 3 + .../bindless_images/read_write_unsampled.cpp | 3 + .../sampled_fetch/fetch_1D_USM.cpp | 1 - .../sampled_fetch/fetch_2D.cpp | 1 - .../sampled_fetch/fetch_2D_USM.cpp | 1 - .../sampled_fetch/fetch_3D.cpp | 1 - sycl/test-e2e/bindless_images/sampling_2D.cpp | 2 +- .../sampling_2D_USM_shared.cpp | 8 +- .../bindless_images/sampling_2D_half.cpp | 7 + sycl/test-e2e/bindless_images/sampling_3D.cpp | 3 + .../sampling_unique_addr_modes.cpp | 2 +- .../user_types/mipmap_read_user_type_2D.cpp | 2 +- .../user_types/read_write_user_type.cpp | 6 +- .../vulkan_interop/mipmaps.cpp | 3 +- .../vulkan_interop/sampled_images.cpp | 2 +- .../vulkan_interop/sampled_images_USM.cpp | 3 +- .../vulkan_interop/unsampled_images.cpp | 2 +- .../unsampled_images_semaphore.cpp | 3 +- 50 files changed, 1214 insertions(+), 54 deletions(-) create mode 100644 libclc/libspirv/lib/amdgcn-amdhsa/images/image.cl diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES b/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES index 3665db09f6bd1..6e3a1a4107b36 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES +++ b/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES @@ -16,6 +16,7 @@ atomic/atomic_sub.cl atomic/atomic_store.cl conversion/GenericCastToPtrExplicit.cl synchronization/barrier.cl +images/image.cl math/acos.cl math/acosh.cl math/asin.cl diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/images/image.cl b/libclc/libspirv/lib/amdgcn-amdhsa/images/image.cl new file mode 100644 index 0000000000000..c8a0bbfae83f9 --- /dev/null +++ b/libclc/libspirv/lib/amdgcn-amdhsa/images/image.cl @@ -0,0 +1,1076 @@ +#include +#include + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif + +#ifdef cl_khr_3d_image_writes +#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable +#endif + +#ifdef _WIN32 +#define _CLC_MANGLE_FUNC_IMG_HANDLE(namelength, name, prefix, postfix) \ + _Z##namelength##name##prefix##y##postfix +#else +#define _CLC_MANGLE_FUNC_IMG_HANDLE(namelength, name, prefix, postfix) \ + _Z##namelength##name##prefix##m##postfix +#endif + +// Helpers for casting between two builitin vector types and/or scalar types. + +// Using the builtin as_type() and as_typen() functions to reinterpret types. +// The restriction being is that element "type"s need to be of the same size. +#define _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_VEC3(vec4_elem_t, to_t) \ + _CLC_INLINE to_t##3 __clc_cast_from_##vec4_elem_t##4_to_##to_t##3(vec4_elem_t##4 from) { \ + vec4_elem_t##3 casted = as_##vec4_elem_t##3(from); \ + return as_##to_t##3(casted); \ + } +#define _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_VEC2(vec4_elem_t, to_t) \ + _CLC_INLINE to_t##2 __clc_cast_from_##vec4_elem_t##4_to_##to_t##2(vec4_elem_t##4 from) { \ + vec4_elem_t##4 casted = as_##vec4_elem_t##4(from); \ + return as_##to_t##2((vec4_elem_t##2)(casted.x, casted.y)); \ + } +#define _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_SCALAR(vec4_elem_t, to_t) \ + _CLC_INLINE to_t __clc_cast_from_##vec4_elem_t##4_to_##to_t(vec4_elem_t##4 from) { \ + vec4_elem_t##4 casted = as_##vec4_elem_t##4(from); \ + return as_##to_t(casted.x); \ + } +#define _CLC_DEFINE_BUILTIN_CAST_VEC3_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_INLINE vec4_elem_t##4 __clc_cast_from_##from_t##3_to_##vec4_elem_t##4(from_t##3 from) { \ + vec4_elem_t##3 casted = as_##vec4_elem_t##3(from); \ + return as_##vec4_elem_t##4(casted); \ + } +#define _CLC_DEFINE_BUILTIN_CAST_VEC2_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_INLINE vec4_elem_t##4 __clc_cast_from_##from_t##2_to_##vec4_elem_t##4(from_t##2 from) { \ + vec4_elem_t##2 casted = as_##vec4_elem_t##2(from); \ + return (vec4_elem_t##4)(casted.x, casted.y, 0, 0); \ + } +#define _CLC_DEFINE_BUILTIN_CAST_SCALAR_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_INLINE vec4_elem_t##4 __clc_cast_from_##from_t##_to_##vec4_elem_t##4(from_t from) { \ + vec4_elem_t casted = as_##vec4_elem_t(from); \ + return (vec4_elem_t##4)(casted, 0, 0, 0); \ + } + +// Generic casts between builtin types. +#define _CLC_DEFINE_CAST_VEC4(vec4_elem_t, to_t) \ + _CLC_INLINE to_t##4 __clc_cast_from_##vec4_elem_t##4_to_##to_t##4(vec4_elem_t##4 from) { \ + return (to_t##4)(from.x, from.y, from.z, from.w); \ + } +#define _CLC_DEFINE_CAST_VEC4_TO_VEC3(vec4_elem_t, to_t) \ + _CLC_INLINE to_t##3 __clc_cast_from_##vec4_elem_t##4_to_##to_t##3(vec4_elem_t##4 from) { \ + return (to_t##3)(from.x, from.y, from.z); \ + } +#define _CLC_DEFINE_CAST_VEC4_TO_VEC2(vec4_elem_t, to_t) \ + _CLC_INLINE to_t##2 __clc_cast_from_##vec4_elem_t##4_to_##to_t##2(vec4_elem_t##4 from) { \ + return (to_t##2)(from.x, from.y); \ + } +#define _CLC_DEFINE_CAST_VEC4_TO_SCALAR(vec4_elem_t, to_t) \ + _CLC_INLINE to_t __clc_cast_from_##vec4_elem_t##4_to_##to_t(vec4_elem_t##4 from) { \ + return (to_t)from.x; \ + } +#define _CLC_DEFINE_CAST_VEC3_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_INLINE vec4_elem_t##4 __clc_cast_from_##from_t##3_to_##vec4_elem_t##4(from_t##3 from) { \ + return (vec4_elem_t##4)(from.x, from.y, from.z, 0); \ + } +#define _CLC_DEFINE_CAST_VEC2_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_INLINE vec4_elem_t##4 __clc_cast_from_##from_t##2_to_##vec4_elem_t##4(from_t##2 from) { \ + return (vec4_elem_t##4)(from.x, from.y, 0, 0); \ + } +#define _CLC_DEFINE_CAST_SCALAR_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_INLINE vec4_elem_t##4 __clc_cast_from_##from_t##_to_##vec4_elem_t##4(from_t from) { \ + return (vec4_elem_t##4)(from, 0, 0, 0); \ + } + +// Helpers to extract N channel(s) from a four-channel (RGBA/XYZW) color type. + +#define _CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC4(from_t, to_t) \ + _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_VEC3(from_t, to_t) \ + _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_VEC2(from_t, to_t) \ + _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_SCALAR(from_t, to_t) \ + _CLC_DEFINE_BUILTIN_CAST_VEC2_TO_VEC4(from_t, to_t) \ + _CLC_DEFINE_BUILTIN_CAST_VEC3_TO_VEC4(from_t, to_t) \ + _CLC_DEFINE_BUILTIN_CAST_SCALAR_TO_VEC4(from_t, to_t) + +#define _CLC_DEFINE_EXTRACT_COLOR_HELPERS(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC4(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC4_TO_VEC3(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC4_TO_VEC2(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC4_TO_SCALAR(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC2_TO_VEC4(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC3_TO_VEC4(from_t, to_t) \ + _CLC_DEFINE_CAST_SCALAR_TO_VEC4(from_t, to_t) + +// Define casts between supported builtin types for image color + +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(float, float) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(float, int) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(int, float) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(float, uint) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(uint, float) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(half, half) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(half, short) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(short, half) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(half, ushort) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(ushort, half) + +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(float, half) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(half, float) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(float, short) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(short, float) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(float, ushort) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(ushort, float) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(float, char) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(char, float) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(float, uchar) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(uchar, float) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(half, int) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(int, half) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(half, uint) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(uint, half) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(half, char) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(char, half) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(half, uchar) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(uchar, half) + +#undef _CLC_DEFINE_EXTRACT_COLOR_HELPERS +#undef _CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS + +#undef _CLC_DEFINE_CAST_SCALAR_TO_VEC4 +#undef _CLC_DEFINE_CAST_VEC2_TO_VEC4 +#undef _CLC_DEFINE_CAST_VEC3_TO_VEC4 +#undef _CLC_DEFINE_CAST_VEC4_TO_SCALAR +#undef _CLC_DEFINE_CAST_VEC4_TO_VEC3 +#undef _CLC_DEFINE_CAST_VEC4_TO_VEC2 +#undef _CLC_DEFINE_CAST_VEC4 +#undef _CLC_DEFINE_BUILTIN_CAST_SCALAR_TO_VEC4 +#undef _CLC_DEFINE_BUILTIN_CAST_VEC2_TO_VEC4 +#undef _CLC_DEFINE_BUILTIN_VEC3_TO_VEC4 +#undef _CLC_DEFINE_BUILTIN_VEC4_TO_SCALAR +#undef _CLC_DEFINE_BUILTIN_VEC4_TO_VEC3 +#undef _CLC_DEFINE_BUILTIN_VEC4_TO_VEC2 +#undef _CLC_DEFINE_BUILTIN_VEC4 + +// The ockl functions/builtins expect the resource in constant addrspace. +#if __clang_major__ >= 8 +#define _CLC_CONST_AS __constant +#elif __clang_major__ >= 7 +#define _CLC_CONST_AS __attribute__((address_space(4))) +#else +#define _CLC_CONST_AS __attribute__((address_space(2))) +#endif + +// Declare ockl functions/builtins that we link from the ROCm device libs. +float4 __ockl_image_load_1D(_CLC_CONST_AS unsigned int *tex, int coord); +float4 __ockl_image_load_2D(_CLC_CONST_AS unsigned int *tex, int2 coord); +float4 __ockl_image_load_3D(_CLC_CONST_AS unsigned int *tex, int3 coord); +half4 __ockl_image_loadh_1D(_CLC_CONST_AS unsigned int *tex, int coord); +half4 __ockl_image_loadh_2D(_CLC_CONST_AS unsigned int *tex, int2 coord); +half4 __ockl_image_loadh_3D(_CLC_CONST_AS unsigned int *tex, int3 coord); + +float4 __ockl_image_load_1Da(_CLC_CONST_AS unsigned int *tex, int2 coord); +float4 __ockl_image_load_2Da(_CLC_CONST_AS unsigned int *tex, int4 coord); +half4 __ockl_image_loadh_1Da(_CLC_CONST_AS unsigned int *tex, int2 coord); +half4 __ockl_image_loadh_2Da(_CLC_CONST_AS unsigned int *tex, int4 coord); + +void __ockl_image_store_1D(_CLC_CONST_AS unsigned int *tex, int coord, float4 color); +void __ockl_image_store_2D(_CLC_CONST_AS unsigned int *tex, int2 coord, float4 color); +void __ockl_image_store_3D(_CLC_CONST_AS unsigned int *tex, int3 coord, float4 color); +void __ockl_image_storeh_1D(_CLC_CONST_AS unsigned int *tex, int coord, half4 color); +void __ockl_image_storeh_2D(_CLC_CONST_AS unsigned int *tex, int2 coord, half4 color); +void __ockl_image_storeh_3D(_CLC_CONST_AS unsigned int *tex, int3 coord, half4 color); + +void __ockl_image_store_1Da(_CLC_CONST_AS unsigned int *tex, int2 coord, float4 color); +void __ockl_image_store_2Da(_CLC_CONST_AS unsigned int *tex, int4 coord, float4 color); +void __ockl_image_storeh_1Da(_CLC_CONST_AS unsigned int *tex, int2 coord, half4 color); +void __ockl_image_storeh_2Da(_CLC_CONST_AS unsigned int *tex, int4 coord, half4 color); + +float4 __ockl_image_sample_1D(_CLC_CONST_AS unsigned int *tex, _CLC_CONST_AS unsigned int *samp, float coord); +float4 __ockl_image_sample_2D(_CLC_CONST_AS unsigned int *tex, _CLC_CONST_AS unsigned int *samp, float2 coord); +float4 __ockl_image_sample_3D(_CLC_CONST_AS unsigned int *tex, _CLC_CONST_AS unsigned int *samp, float3 coord); +half4 __ockl_image_sampleh_1D(_CLC_CONST_AS unsigned int *tex, _CLC_CONST_AS unsigned int *samp, float coord); +half4 __ockl_image_sampleh_2D(_CLC_CONST_AS unsigned int *tex, _CLC_CONST_AS unsigned int *samp, float2 coord); +half4 __ockl_image_sampleh_3D(_CLC_CONST_AS unsigned int *tex, _CLC_CONST_AS unsigned int *samp, float3 coord); + +float4 __ockl_image_sample_1Da(_CLC_CONST_AS unsigned int *tex, _CLC_CONST_AS unsigned int *samp, float2 coord); +float4 __ockl_image_sample_2Da(_CLC_CONST_AS unsigned int *tex, _CLC_CONST_AS unsigned int *samp, float4 coord); +half4 __ockl_image_sampleh_1Da(_CLC_CONST_AS unsigned int *tex, _CLC_CONST_AS unsigned int *samp, float2 coord); +half4 __ockl_image_sampleh_2Da(_CLC_CONST_AS unsigned int *tex, _CLC_CONST_AS unsigned int *samp, float4 coord); + +// +// IMAGES +// + +#define _CLC_ARRAY_COORD_PARAMS_1D(coord, layer) coord, layer +#define _CLC_ARRAY_COORD_PARAMS_2D(coord, layer) coord.x, coord.y, layer, 0 + +// Fetch Ops + +#define _CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, \ + builtin_ret_t, builtin_ret_postfix) \ + _CLC_DEF elem_t _CLC_MANGLE_FUNC_IMG_HANDLE( \ + 18, __spirv_ImageFetch, I##elem_t_mangled, \ + coord_mangled##ET_T0_T1_)(ulong imageHandle, coord_t coord) { \ + _CLC_CONST_AS unsigned int *tex = (_CLC_CONST_AS unsigned int *)imageHandle; \ + builtin_ret_t##4 color = \ + __ockl_image_load##builtin_ret_postfix##_##dimension##D(tex, coord); \ + return __clc_cast_from_##builtin_ret_t##4_to_##elem_t(color); \ + } + +#define _CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(dimension, elem_t, elem_t_mangled, \ + coord_t, coord_mangled, float,) + +#define _CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(dimension, elem_t, elem_t_mangled, \ + coord_t, coord_mangled, half, h) + +#define _CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(dimension, elem_t, elem_t_mangled, \ + coord_t, coord_mangled) + +// Float +// return 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(1, float, f, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(2, float, f, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(3, float, f, int3, Dv3_i) +// return 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(1, float2, Dv2_f, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(2, float2, Dv2_f, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(3, float2, Dv2_f, int3, Dv3_i) +// return 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(1, float4, Dv4_f, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(2, float4, Dv4_f, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(3, float4, Dv4_f, int3, Dv3_i) + +// Half +#ifdef cl_khr_fp16 +// return 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, half, DF16_, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, half, DF16_, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, half, DF16_, int3, Dv3_i) +// return 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, half2, Dv2_DF16_, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, half2, Dv2_DF16_, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, half2, Dv2_DF16_, int3, Dv3_i) +// return 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, half4, Dv4_DF16_, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, half4, Dv4_DF16_, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, half4, Dv4_DF16_, int3, Dv3_i) +#endif + +// Int +// return 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(1, int, i, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(2, int, i, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(3, int, i, int3, Dv3_i) +// return 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(1, int2, Dv2_i, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(2, int2, Dv2_i, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(3, int2, Dv2_i, int3, Dv3_i) +// return 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(1, int4, Dv4_i, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(2, int4, Dv4_i, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(3, int4, Dv4_i, int3, Dv3_i) + +// Unsigned Int +// return 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(1, uint, j, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(2, uint, j, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(3, uint, j, int3, Dv3_i) +// return 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(1, uint2, Dv2_j, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(2, uint2, Dv2_j, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(3, uint2, Dv2_j, int3, Dv3_i) +// return 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(1, uint4, Dv4_j, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(2, uint4, Dv4_j, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN(3, uint4, Dv4_j, int3, Dv3_i) + +// Short +// return 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, short, s, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, short, s, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, short, s, int3, Dv3_i) +// return 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, short2, Dv2_s, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, short2, Dv2_s, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, short2, Dv2_s, int3, Dv3_i) +// return 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, short4, Dv4_s, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, short4, Dv4_s, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, short4, Dv4_s, int3, Dv3_i) + +// Unsigned Short +// return 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, ushort, t, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, ushort, t, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, ushort, t, int3, Dv3_i) +// return 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, ushort2, Dv2_t, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, ushort2, Dv2_t, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, ushort2, Dv2_t, int3, Dv3_i) +// return 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(1, ushort4, Dv4_t, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(2, ushort4, Dv4_t, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(3, ushort4, Dv4_t, int3, Dv3_i) + +// Char +// return 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(1, char, a, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(2, char, a, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(3, char, a, int3, Dv3_i) +// return 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(1, char2, Dv2_a, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(2, char2, Dv2_a, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(3, char2, Dv2_a, int3, Dv3_i) +// return 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(1, char4, Dv4_a, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(2, char4, Dv4_a, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(3, char4, Dv4_a, int3, Dv3_i) + +// Unsigned Char +// return 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(1, uchar, h, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(2, uchar, h, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(3, uchar, h, int3, Dv3_i) +// return 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(1, uchar2, Dv2_h, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(2, uchar2, Dv2_h, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(3, uchar2, Dv2_h, int3, Dv3_i) +// return 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(1, uchar4, Dv4_h, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(2, uchar4, Dv4_h, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(3, uchar4, Dv4_h, int3, Dv3_i) + +#undef _CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN +#undef _CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN +#undef _CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN + +// Write Ops + +#define _CLC_DEFINE_IMAGE_BINDLESS_WRITE_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, \ + builtin_ret_t, builtin_ret_postfix) \ + _CLC_DEF void _CLC_MANGLE_FUNC_IMG_HANDLE( \ + 18, __spirv_ImageWrite, I, coord_mangled##elem_t_mangled##EvT_T0_T1_)( \ + ulong imageHandle, coord_t coord, elem_t color) { \ + _CLC_CONST_AS unsigned int *tex = (_CLC_CONST_AS unsigned int *)imageHandle; \ + builtin_ret_t##4 outColor = \ + __clc_cast_from_##elem_t##_to_##builtin_ret_t##4(color); \ + __ockl_image_store##builtin_ret_postfix##_##dimension##D(tex, coord, outColor); \ + } + +#define _CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_IMAGE_BINDLESS_WRITE_BUILTIN(dimension, elem_t, elem_t_mangled, \ + coord_t, coord_mangled, float,) + +#define _CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_IMAGE_BINDLESS_WRITE_BUILTIN(dimension, elem_t, elem_t_mangled, \ + coord_t, coord_mangled, half, h) + +#define _CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(dimension, elem_t, elem_t_mangled, \ + coord_t, coord_mangled) + +// Float +// write 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(1, float, f, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(2, float, f, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(3, float, f, int3, Dv3_i) +// write 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(1, float2, Dv2_f, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(2, float2, Dv2_f, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(3, float2, Dv2_f, int3, Dv3_i) +// write 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(1, float4, Dv4_f, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(2, float4, Dv4_f, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(3, float4, Dv4_f, int3, Dv3_i) + +// Half +#ifdef cl_khr_fp16 +// write 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, half, DF16_, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, half, DF16_, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, half, DF16_, int3, Dv3_i) +// write 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, half2, Dv2_DF16_, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, half2, Dv2_DF16_, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, half2, Dv2_DF16_, int3, Dv3_i) +// write 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, half4, Dv4_DF16_, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, half4, Dv4_DF16_, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, half4, Dv4_DF16_, int3, Dv3_i) +#endif + +// Int +// write 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(1, int, i, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(2, int, i, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(3, int, i, int3, Dv3_i) +// write 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(1, int2, Dv2_i, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(2, int2, Dv2_i, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(3, int2, Dv2_i, int3, Dv3_i) +// write 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(1, int4, Dv4_i, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(2, int4, Dv4_i, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(3, int4, Dv4_i, int3, Dv3_i) + +// Unsigned Int +// write 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(1, uint, j, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(2, uint, j, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(3, uint, j, int3, Dv3_i) +// write 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(1, uint2, Dv2_j, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(2, uint2, Dv2_j, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(3, uint2, Dv2_j, int3, Dv3_i) +// write 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(1, uint4, Dv4_j, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(2, uint4, Dv4_j, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN(3, uint4, Dv4_j, int3, Dv3_i) + +// Short +// write 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, short, s, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, short, s, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, short, s, int3, Dv3_i) +// write 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, short2, Dv2_s, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, short2, Dv2_s, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, short2, Dv2_s, int3, Dv3_i) +// write 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, short4, Dv4_s, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, short4, Dv4_s, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, short4, Dv4_s, int3, Dv3_i) + +// Unsigned Short +// write 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, ushort, t, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, ushort, t, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, ushort, t, int3, Dv3_i) +// write 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, ushort2, Dv2_t, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, ushort2, Dv2_t, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, ushort2, Dv2_t, int3, Dv3_i) +// write 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(1, ushort4, Dv4_t, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(2, ushort4, Dv4_t, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(3, ushort4, Dv4_t, int3, Dv3_i) + +// Char +// write 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(1, char, a, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(2, char, a, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(3, char, a, int3, Dv3_i) +// write 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(1, char2, Dv2_a, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(2, char2, Dv2_a, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(3, char2, Dv2_a, int3, Dv3_i) +// write 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(1, char4, Dv4_a, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(2, char4, Dv4_a, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(3, char4, Dv4_a, int3, Dv3_i) + +// Unsigned Char +// write 1-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(1, uchar, h, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(2, uchar, h, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(3, uchar, h, int3, Dv3_i) +// write 2-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(1, uchar2, Dv2_h, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(2, uchar2, Dv2_h, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(3, uchar2, Dv2_h, int3, Dv3_i) +// write 4-channel color data +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(1, uchar4, Dv4_h, int, i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(2, uchar4, Dv4_h, int2, Dv2_i) +_CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(3, uchar4, Dv4_h, int3, Dv3_i) + +#undef _CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN +#undef _CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN +#undef _CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN + +// +// IMAGE ARRAYS +// + +// Read Ops + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + builtin_ret_t, builtin_ret_postfix) \ + _CLC_DEF elem_t _CLC_MANGLE_FUNC_IMG_HANDLE( \ + 23, __spirv_ImageArrayFetch, I##elem_t_mangled, \ + coord_mangled##ET_T0_T1_i)(ulong imageHandle, coord_t coord, int layer) { \ + _CLC_CONST_AS unsigned int *tex = (_CLC_CONST_AS unsigned int *)imageHandle; \ + int##vec_size arrayCoord = \ + (int##vec_size)(_CLC_ARRAY_COORD_PARAMS_##dimension##D(coord, layer)); \ + builtin_ret_t##4 color = \ + __ockl_image_load##builtin_ret_postfix##_##dimension##Da(tex, arrayCoord); \ + return __clc_cast_from_##builtin_ret_t##4_to_##elem_t(color); \ + } + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_BUILTIN(dimension, elem_t, \ + elem_t_mangled, coord_t, \ + coord_mangled, vec_size, \ + float,) + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_BUILTIN(dimension, elem_t, \ + elem_t_mangled, coord_t, \ + coord_mangled, vec_size, \ + half, h) + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(dimension, elem_t, \ + elem_t_mangled, coord_t, \ + coord_mangled, vec_size) + +// Float +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, float, f, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, float, f, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, float2, Dv2_f, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, float2, Dv2_f, int2, Dv2_i, 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, float4, Dv4_f, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, float4, Dv4_f, int2, Dv2_i, 4) + +// Half +#ifdef cl_khr_fp16 +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, half, DF16_, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, half, DF16_, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, half2, Dv2_DF16_, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, half2, Dv2_DF16_, int2, Dv2_i, 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, half4, Dv4_DF16_, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, half4, Dv4_DF16_, int2, Dv2_i, 4) +#endif + +// Int +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, int, i, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, int, i, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, int2, Dv2_i, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, int2, Dv2_i, int2, Dv2_i, 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, int4, Dv4_i, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, int4, Dv4_i, int2, Dv2_i, 4) + +// Unsigned Int +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, uint, j, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, uint, j, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, uint2, Dv2_j, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, uint2, Dv2_j, int2, Dv2_i, 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, uint4, Dv4_j, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, uint4, Dv4_j, int2, Dv2_i, 4) + +// Short +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, short, s, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, short, s, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, short2, Dv2_s, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, short2, Dv2_s, int2, Dv2_i, 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, short4, Dv4_s, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, short4, Dv4_s, int2, Dv2_i, 4) + +// Unsigned Short +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, ushort, t, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, ushort, t, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, ushort2, Dv2_t, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, ushort2, Dv2_t, int2, Dv2_i, 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, ushort4, Dv4_t, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, ushort4, Dv4_t, int2, Dv2_i, 4) + +// Char +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, char, a, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, char, a, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, char2, Dv2_a, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, char2, Dv2_a, int2, Dv2_i, 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, char4, Dv4_a, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, char4, Dv4_a, int2, Dv2_i, 4) + +// Unsigned Char +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, uchar, h, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, uchar, h, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, uchar2, Dv2_h, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, uchar2, Dv2_h, int2, Dv2_i, 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, uchar4, Dv4_h, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, uchar4, Dv4_h, int2, Dv2_i, 4) + +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN + +// Write Ops + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + builtin_ret_t, builtin_ret_postfix) \ + _CLC_DEF void _CLC_MANGLE_FUNC_IMG_HANDLE( \ + 23, __spirv_ImageArrayWrite, I, coord_mangled##elem_t_mangled##EvT_T0_iT1_)( \ + ulong imageHandle, coord_t coord, int layer, elem_t color) { \ + _CLC_CONST_AS unsigned int *tex = (_CLC_CONST_AS unsigned int *)imageHandle; \ + int##vec_size arrayCoord = \ + (int##vec_size)(_CLC_ARRAY_COORD_PARAMS_##dimension##D(coord, layer)); \ + builtin_ret_t##4 outColor = \ + __clc_cast_from_##elem_t##_to_##builtin_ret_t##4(color); \ + __ockl_image_store##builtin_ret_postfix##_##dimension##Da(tex, arrayCoord, \ + outColor); \ + } + + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_BUILTIN(dimension, elem_t, \ + elem_t_mangled, coord_t, \ + coord_mangled, vec_size, \ + float,) + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_BUILTIN(dimension, elem_t, \ + elem_t_mangled, coord_t, \ + coord_mangled, vec_size, \ + half, h) + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(dimension, elem_t, \ + elem_t_mangled, coord_t, \ + coord_mangled, vec_size) + +// Float +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, float, f, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, float, f, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, float2, Dv2_f, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, float2, Dv2_f, int2, Dv2_i, 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, float4, Dv4_f, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, float4, Dv4_f, int2, Dv2_i, 4) + +// Half +#ifdef cl_khr_fp16 +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, half, DF16_, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, half, DF16_, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, half2, Dv2_DF16_, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, half2, Dv2_DF16_, int2, Dv2_i, 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, half4, Dv4_DF16_, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, half4, Dv4_DF16_, int2, Dv2_i, 4) +#endif + +// Int +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, int, i, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, int, i, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, int2, Dv2_i, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, int2, Dv2_i, int2, Dv2_i, 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, int4, Dv4_i, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, int4, Dv4_i, int2, Dv2_i, 4) + +// Unsigned Int +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, uint, j, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, uint, j, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, uint2, Dv2_j, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, uint2, Dv2_j, int2, Dv2_i, 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, uint4, Dv4_j, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, uint4, Dv4_j, int2, Dv2_i, 4) + +// Short +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, short, s, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, short, s, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, short2, Dv2_s, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, short2, Dv2_s, int2, Dv2_i, 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, short4, Dv4_s, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, short4, Dv4_s, int2, Dv2_i, 4) + +// Unsigned Short +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, ushort, t, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, ushort, t, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, ushort2, Dv2_t, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, ushort2, Dv2_t, int2, Dv2_i, 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, ushort4, Dv4_t, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, ushort4, Dv4_t, int2, Dv2_i, 4) + +// Char +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, char, a, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, char, a, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, char2, Dv2_a, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, char2, Dv2_a, int2, Dv2_i, 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, char4, Dv4_a, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, char4, Dv4_a, int2, Dv2_i, 4) + +// Unsigned Char +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, uchar, h, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, uchar, h, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, uchar2, Dv2_h, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, uchar2, Dv2_h, int2, Dv2_i, 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, uchar4, Dv4_h, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, uchar4, Dv4_h, int2, Dv2_i, 4) + +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN + +// +// SAMPLED IMAGES +// Note: consider splitting it off in sampled_image.cl when it grows larger. +// + +// From +// https://github.com/ROCm/clr/tree/amd-staging/hipamd/include/hip/amd_detail/texture_fetch_functions.h +static _CLC_CONST_AS const unsigned int SAMPLER_OBJECT_OFFSET_DWORD = 12; + +// Read Ops + +#define _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, \ + builtin_ret_t, builtin_ret_postfix) \ + _CLC_DEF elem_t _CLC_MANGLE_FUNC_IMG_HANDLE( \ + 17, __spirv_ImageRead, I##elem_t_mangled, \ + coord_mangled##ET_T0_T1_)(ulong imageHandle, coord_t coord) { \ + _CLC_CONST_AS unsigned int *tex = (_CLC_CONST_AS unsigned int *)imageHandle; \ + _CLC_CONST_AS unsigned int *samp = tex + SAMPLER_OBJECT_OFFSET_DWORD; \ + builtin_ret_t##4 color = \ + __ockl_image_sample##builtin_ret_postfix##_##dimension##D(tex, samp, coord); \ + return __clc_cast_from_##builtin_ret_t##4_to_##elem_t(color); \ + } + +#define _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN(dimension, elem_t, elem_t_mangled, \ + coord_t, coord_mangled, float,) + +#define _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN(dimension, elem_t, elem_t_mangled, \ + coord_t, coord_mangled, half, h) + +#define _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(dimension, elem_t, \ + elem_t_mangled, coord_t, \ + coord_mangled) + +// Float +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, float, f, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, float, f, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, float, f, float3, Dv3_f) +// return 2-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, float2, Dv2_f, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, float2, Dv2_f, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, float2, Dv2_f, float3, Dv3_f) +// return 4-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, float4, Dv4_f, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, float4, Dv4_f, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, float4, Dv4_f, float3, Dv3_f) + +// Half +#ifdef cl_khr_fp16 +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, half, DF16_, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, half, DF16_, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, half, DF16_, float3, Dv3_f) +// return 2-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, half2, Dv2_DF16_, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, half2, Dv2_DF16_, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, half2, Dv2_DF16_, float3, Dv3_f) +// return 4-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, half4, Dv4_DF16_, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, half4, Dv4_DF16_, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, half4, Dv4_DF16_, float3, Dv3_f) +#endif + +// Int +// return 1-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, int, i, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, int, i, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, int, i, float3, Dv3_f) +// return 2-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, int2, Dv2_i, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, int2, Dv2_i, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, int2, Dv2_i, float3, Dv3_f) +// return 4-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, int4, Dv4_i, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, int4, Dv4_i, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, int4, Dv4_i, float3, Dv3_f) + +// Unsigned Int +// return 1-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, uint, j, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, uint, j, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, uint, j, float3, Dv3_f) +// return 2-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, uint2, Dv2_j, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, uint2, Dv2_j, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, uint2, Dv2_j, float3, Dv3_f) +// return 4-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, uint4, Dv4_j, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, uint4, Dv4_j, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, uint4, Dv4_j, float3, Dv3_f) + +// Short +// return 1-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, short, s, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, short, s, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, short, s, float3, Dv3_f) +// return 2-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, short2, Dv2_s, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, short2, Dv2_s, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, short2, Dv2_s, float3, Dv3_f) +// return 4-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, short4, Dv4_s, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, short4, Dv4_s, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, short4, Dv4_s, float3, Dv3_f) + +// Unsigned Short +// return 1-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, ushort, t, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, ushort, t, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, ushort, t, float3, Dv3_f) +// return 2-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, ushort2, Dv2_t, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, ushort2, Dv2_t, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, ushort2, Dv2_t, float3, Dv3_f) +// return 4-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, ushort4, Dv4_t, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, ushort4, Dv4_t, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, ushort4, Dv4_t, float3, Dv3_f) + +// Char +// return 1-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(1, char, a, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(2, char, a, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(3, char, a, float3, Dv3_f) +// return 2-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(1, char2, Dv2_a, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(2, char2, Dv2_a, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(3, char2, Dv2_a, float3, Dv3_f) +// return 4-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(1, char4, Dv4_a, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(2, char4, Dv4_a, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(3, char4, Dv4_a, float3, Dv3_f) + +// Unsigned Char +// return 1-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(1, uchar, h, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(2, uchar, h, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(3, uchar, h, float3, Dv3_f) +// return 2-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(1, uchar2, Dv2_h, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(2, uchar2, Dv2_h, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(3, uchar2, Dv2_h, float3, Dv3_f) +// return 4-channel color data +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(1, uchar4, Dv4_h, float, f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(2, uchar4, Dv4_h, float2, Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(3, uchar4, Dv4_h, float3, Dv3_f) + +#undef _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN +#undef _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN +#undef _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN + +// +// SAMPLED IMAGE ARRAYS +// + +// Read Ops + +#define _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + builtin_ret_t, builtin_ret_postfix) \ + _CLC_DEF elem_t _CLC_MANGLE_FUNC_IMG_HANDLE( \ + 22, __spirv_ImageArrayRead, I##elem_t_mangled, \ + coord_mangled##ET_T0_T1_i)(ulong imageHandle, coord_t coord, int layer) { \ + _CLC_CONST_AS unsigned int *tex = (_CLC_CONST_AS unsigned int *)imageHandle; \ + _CLC_CONST_AS unsigned int *samp = tex + SAMPLER_OBJECT_OFFSET_DWORD; \ + float##vec_size arrayCoord = \ + (float##vec_size)(_CLC_ARRAY_COORD_PARAMS_##dimension##D(coord, (float)layer)); \ + builtin_ret_t##4 color = \ + __ockl_image_sample##builtin_ret_postfix##_##dimension##Da(tex, samp, \ + arrayCoord); \ + return __clc_cast_from_##builtin_ret_t##4_to_##elem_t(color); \ + } + +#define _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_BUILTIN(dimension, elem_t, \ + elem_t_mangled, coord_t, \ + coord_mangled, vec_size, \ + float,) + +#define _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_BUILTIN(dimension, elem_t, \ + elem_t_mangled, coord_t, \ + coord_mangled, vec_size, \ + half, h) + +#define _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(dimension, elem_t, \ + elem_t_mangled, coord_t, \ + coord_mangled, vec_size) + +// Float +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, float, f, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, float, f, float2, Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, float2, Dv2_f, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, float2, Dv2_f, float2, Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, float4, Dv4_f, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, float4, Dv4_f, float2, Dv2_f, 4) + +// Half +#ifdef cl_khr_fp16 +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, half, DF16_, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, half, DF16_, float2, Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, half2, Dv2_DF16_, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, half2, Dv2_DF16_, float2, Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, half4, Dv4_DF16_, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, half4, Dv4_DF16_, float2, Dv2_f, 4) +#endif + +// Int +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, int, i, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, int, i, float2, Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, int2, Dv2_i, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, int2, Dv2_i, float2, Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, int4, Dv4_i, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, int4, Dv4_i, float2, Dv2_f, 4) + +// Unsigned Int +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, uint, j, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, uint, j, float2, Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, uint2, Dv2_j, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, uint2, Dv2_j, float2, Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, uint4, Dv4_j, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, uint4, Dv4_j, float2, Dv2_f, 4) + +// Short +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, short, s, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, short, s, float2, Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, short2, Dv2_s, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, short2, Dv2_s, float2, Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, short4, Dv4_s, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, short4, Dv4_s, float2, Dv2_f, 4) + +// Unsigned Short +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, ushort, t, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, ushort, t, float2, Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, ushort2, Dv2_t, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, ushort2, Dv2_t, float2, Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, ushort4, Dv4_t, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, ushort4, Dv4_t, float2, Dv2_f, 4) + +// Char +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, char, a, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, char, a, float2, Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, char2, Dv2_a, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, char2, Dv2_a, float2, Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, char4, Dv4_a, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, char4, Dv4_a, float2, Dv2_f, 4) + +// Unsigned Char +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, uchar, h, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, uchar, h, float2, Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, uchar2, Dv2_h, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, uchar2, Dv2_h, float2, Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, uchar4, Dv4_h, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, uchar4, Dv4_h, float2, Dv2_f, 4) + +#undef _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN +#undef _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN +#undef _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN + +#undef _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_BUILTIN +#undef _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN + +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_BUILTIN +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_BUILTIN + +#undef _CLC_DEFINE_IMAGE_BINDLESS_WRITE_BUILTIN +#undef _CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN + +#undef _CLC_ARRAY_COORD_PARAMS_1D +#undef _CLC_ARRAY_COORD_PARAMS_2D + +#undef _CLC_CONST_AS + +#undef _CLC_MANGLE_FUNC_IMG_HANDLE diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index 72841724fa01d..83fb3ad6652b0 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT) CACHE PATH "Path to external '${name}' adapter source dir" FORCE) endfunction() - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") + set(UNIFIED_RUNTIME_REPO "https://github.com/GeorgeWeb/unified-runtime.git") include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 4c07083d355c7..4af077e532b47 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -6,4 +6,4 @@ # Merge pull request #2578 from Bensuo/ewan/remove_command_ref_counting # # Remove command-buffer command handle ref counting -set(UNIFIED_RUNTIME_TAG 14f4a3ba70b91b3adc411ec6bfc8ae86e948a990) +set(UNIFIED_RUNTIME_TAG georgi/bindless-hip) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc index d99f988d49cbb..82ce9b1a98d16 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc @@ -2048,7 +2048,7 @@ void release_external_semaphore(external_semaphore semaphoreHandle, ```cpp #include -include::../../../test-e2e/bindless_images/examples/example_1_1D_read_write.cpp[lines=9..-1] +include::../../../test-e2e/bindless_images/examples/example_1_1D_read_write.cpp[lines=12..-1] ``` === Reading from a dynamically sized array of 2D images @@ -2064,14 +2064,14 @@ include::../../../test-e2e/bindless_images/examples/example_2_2D_dynamic_read.cp ```cpp #include -include::../../../test-e2e/bindless_images/examples/example_3_1D_mipmap_anisotropic_filtering_and_levels.cpp[lines=9..-1] +include::../../../test-e2e/bindless_images/examples/example_3_1D_mipmap_anisotropic_filtering_and_levels.cpp[lines=10..-1] ``` === 1D image array read/write ```cpp #include -include::../../../test-e2e/bindless_images/examples/example_4_1D_array_read_write.cpp[lines=9..-1] +include::../../../test-e2e/bindless_images/examples/example_4_1D_array_read_write.cpp[lines=14..-1] ``` === Sampling a cubemap @@ -2079,7 +2079,7 @@ include::../../../test-e2e/bindless_images/examples/example_4_1D_array_read_writ ```c++ #include -include::../../../test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp[lines=9..-1] +include::../../../test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp[lines=10..-1] ``` === Using imported memory and semaphore objects @@ -2087,7 +2087,7 @@ include::../../../test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp ```c++ #include -include::../../../test-e2e/bindless_images/examples/example_6_import_memory_and_semaphores.cpp[lines=8..-1] +include::../../../test-e2e/bindless_images/examples/example_6_import_memory_and_semaphores.cpp[lines=14..-1] ``` == Implementation notes diff --git a/sycl/test-e2e/bindless_images/3_channel_format.cpp b/sycl/test-e2e/bindless_images/3_channel_format.cpp index 3eb46c7458155..1f8a14ed2919c 100644 --- a/sycl/test-e2e/bindless_images/3_channel_format.cpp +++ b/sycl/test-e2e/bindless_images/3_channel_format.cpp @@ -3,6 +3,10 @@ // Test requires at least this version of the Intel GPU driver on Arc. // REQUIRES-INTEL-DRIVER: lin: 32370 +// UNSUPPORTED: hip || level_zero +// UNSUPPORTED-INTENDED: Unimplemented in the HIP adapter yet. +// Also, the feature is not fully implemented in the Level Zero stack. + // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out diff --git a/sycl/test-e2e/bindless_images/array/fetch_sampled_array.cpp b/sycl/test-e2e/bindless_images/array/fetch_sampled_array.cpp index 723c7233ea9a8..c6a1fad34d187 100644 --- a/sycl/test-e2e/bindless_images/array/fetch_sampled_array.cpp +++ b/sycl/test-e2e/bindless_images/array/fetch_sampled_array.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_image_array // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/array/read_sampled_array.cpp b/sycl/test-e2e/bindless_images/array/read_sampled_array.cpp index 65aabee94c242..f093910e3bfda 100644 --- a/sycl/test-e2e/bindless_images/array/read_sampled_array.cpp +++ b/sycl/test-e2e/bindless_images/array/read_sampled_array.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_image_array // RUN: %{build} -o %t.out // RUN: %{run} %t.out @@ -137,7 +137,17 @@ static bool runTest(sycl::range dims, sycl::range localSize, unsigned int seed = 0) { using VecType = sycl::vec; - sycl::device dev; + sycl::device dev{}; + // skip half tests if the device does not support the aspect. + if constexpr (std::is_same_v) { + if (!dev.has(sycl::aspect::fp16)) { +#ifdef VERBOSE_PRINT + std::cout << "Test skipped due to lack of device support for fp16\n"; +#endif + return false; + } + } + sycl::queue q(dev); auto ctxt = q.get_context(); diff --git a/sycl/test-e2e/bindless_images/array/read_write_1d_subregion.cpp b/sycl/test-e2e/bindless_images/array/read_write_1d_subregion.cpp index 4815661efc2d2..47d84af028293 100644 --- a/sycl/test-e2e/bindless_images/array/read_write_1d_subregion.cpp +++ b/sycl/test-e2e/bindless_images/array/read_write_1d_subregion.cpp @@ -1,7 +1,7 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_image_array -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out +// RUN: %if !any-device-is-hip %{ %{build} -o %t.out %} +// RUN: %if !any-device-is-hip %{ %{run} %t.out %} #include #include diff --git a/sycl/test-e2e/bindless_images/array/read_write_2d_subregion.cpp b/sycl/test-e2e/bindless_images/array/read_write_2d_subregion.cpp index 1b72a57bed47c..1bae687b08dd1 100644 --- a/sycl/test-e2e/bindless_images/array/read_write_2d_subregion.cpp +++ b/sycl/test-e2e/bindless_images/array/read_write_2d_subregion.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_image_array // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp b/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp index ffc6a34db202a..7f0db5936adce 100644 --- a/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp +++ b/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp @@ -1,5 +1,4 @@ -// REQUIRES: linux -// REQUIRES: aspect-ext_oneapi_bindless_images +// REQUIRES: aspect-ext_oneapi_image_array // RUN: %{build} -o %t.out // RUN: %{run} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out diff --git a/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp b/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp index d228b308ab72e..2b63336c907f5 100644 --- a/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp +++ b/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp @@ -1,6 +1,4 @@ -// REQUIRES: cuda,aspect-ext_oneapi_cubemap // REQUIRES: aspect-ext_oneapi_cubemap_seamless_filtering -// REQUIRES: build-and-run-mode // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/cubemap/cubemap_unsampled.cpp b/sycl/test-e2e/bindless_images/cubemap/cubemap_unsampled.cpp index d74b728593ce2..5f2ef75faafd1 100644 --- a/sycl/test-e2e/bindless_images/cubemap/cubemap_unsampled.cpp +++ b/sycl/test-e2e/bindless_images/cubemap/cubemap_unsampled.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda,aspect-ext_oneapi_cubemap +// REQUIRES: aspect-ext_oneapi_cubemap // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} %t.out diff --git a/sycl/test-e2e/bindless_images/device_to_device_copy.cpp b/sycl/test-e2e/bindless_images/device_to_device_copy.cpp index 4a9263e44a13e..f6d091feb1a43 100644 --- a/sycl/test-e2e/bindless_images/device_to_device_copy.cpp +++ b/sycl/test-e2e/bindless_images/device_to_device_copy.cpp @@ -1,4 +1,7 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images + +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: Undetermined issue in this test. // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/device_to_device_copy_1D_subregion.cpp b/sycl/test-e2e/bindless_images/device_to_device_copy_1D_subregion.cpp index 250195358011a..43884b58b2cc4 100644 --- a/sycl/test-e2e/bindless_images/device_to_device_copy_1D_subregion.cpp +++ b/sycl/test-e2e/bindless_images/device_to_device_copy_1D_subregion.cpp @@ -1,4 +1,7 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images + +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: Undetermined issue in this test. // RUN: %{build} -o %t.out // RUN: %{run} %t.out @@ -15,9 +18,9 @@ namespace syclexp = sycl::ext::oneapi::experimental; void copy_image_mem_handle_to_image_mem_handle( const syclexp::image_descriptor &dataInDesc, - const syclexp::image_descriptor &outDesc, - const std::vector &dataIn1, const std::vector &dataIn2, - sycl::device dev, sycl::queue q, std::vector &out) { + const syclexp::image_descriptor &outDesc, const std::vector &dataIn1, + const std::vector &dataIn2, sycl::device dev, sycl::queue q, + std::vector &out) { // Check that output image is double size of input images assert(outDesc.width == dataInDesc.width * 2); diff --git a/sycl/test-e2e/bindless_images/device_to_device_copy_2D_subregion.cpp b/sycl/test-e2e/bindless_images/device_to_device_copy_2D_subregion.cpp index 0dea97a3f745e..1c5be23383f80 100644 --- a/sycl/test-e2e/bindless_images/device_to_device_copy_2D_subregion.cpp +++ b/sycl/test-e2e/bindless_images/device_to_device_copy_2D_subregion.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/device_to_device_copy_3D_subregion.cpp b/sycl/test-e2e/bindless_images/device_to_device_copy_3D_subregion.cpp index 680814bf6be77..3121affc105fd 100644 --- a/sycl/test-e2e/bindless_images/device_to_device_copy_3D_subregion.cpp +++ b/sycl/test-e2e/bindless_images/device_to_device_copy_3D_subregion.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled_semaphore.cpp b/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled_semaphore.cpp index 85bde0e9a0ee5..e9e9be06bbf71 100644 --- a/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled_semaphore.cpp +++ b/sycl/test-e2e/bindless_images/dx12_interop/read_write_unsampled_semaphore.cpp @@ -1,6 +1,7 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images // REQUIRES: windows -// XFAIL: * + +// XFAIL: run-mode // XFAIL-TRACKER: https://github.com/intel/llvm/issues/15851 // RUN: %{build} -l d3d12 -l dxgi -l dxguid -o %t.out diff --git a/sycl/test-e2e/bindless_images/examples/example_1_1D_read_write.cpp b/sycl/test-e2e/bindless_images/examples/example_1_1D_read_write.cpp index cc7425a1e585c..beef3c7fcf09c 100644 --- a/sycl/test-e2e/bindless_images/examples/example_1_1D_read_write.cpp +++ b/sycl/test-e2e/bindless_images/examples/example_1_1D_read_write.cpp @@ -1,4 +1,7 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images + +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: Undetermined issue in 'create_image' in this test. // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/examples/example_2_2D_dynamic_read.cpp b/sycl/test-e2e/bindless_images/examples/example_2_2D_dynamic_read.cpp index f98c4c4fb073a..75f0b4c4a1061 100644 --- a/sycl/test-e2e/bindless_images/examples/example_2_2D_dynamic_read.cpp +++ b/sycl/test-e2e/bindless_images/examples/example_2_2D_dynamic_read.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/examples/example_3_1D_mipmap_anisotropic_filtering_and_levels.cpp b/sycl/test-e2e/bindless_images/examples/example_3_1D_mipmap_anisotropic_filtering_and_levels.cpp index d46eb88fa084c..c4fad4c74ee9c 100644 --- a/sycl/test-e2e/bindless_images/examples/example_3_1D_mipmap_anisotropic_filtering_and_levels.cpp +++ b/sycl/test-e2e/bindless_images/examples/example_3_1D_mipmap_anisotropic_filtering_and_levels.cpp @@ -1,4 +1,5 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_mipmap +// REQUIRES: aspect-ext_oneapi_mipmap_anisotropy // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/examples/example_4_1D_array_read_write.cpp b/sycl/test-e2e/bindless_images/examples/example_4_1D_array_read_write.cpp index 69fb804f977c5..cba3d324158e4 100644 --- a/sycl/test-e2e/bindless_images/examples/example_4_1D_array_read_write.cpp +++ b/sycl/test-e2e/bindless_images/examples/example_4_1D_array_read_write.cpp @@ -1,4 +1,7 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images + +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: Undetermined issue in 'create_image' in this test. // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp b/sycl/test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp index c6ba9b48fad52..73c011f87e93d 100644 --- a/sycl/test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp +++ b/sycl/test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_cubemap // REQUIRES: build-and-run-mode // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/bindless_images/examples/example_6_import_memory_and_semaphores.cpp b/sycl/test-e2e/bindless_images/examples/example_6_import_memory_and_semaphores.cpp index d555b73d49aac..7c7443f9e3ed5 100644 --- a/sycl/test-e2e/bindless_images/examples/example_6_import_memory_and_semaphores.cpp +++ b/sycl/test-e2e/bindless_images/examples/example_6_import_memory_and_semaphores.cpp @@ -1,6 +1,12 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_external_memory_import +// REQUIRES: aspect-ext_oneapi_external_semaphore_import // RUN: %{build} -o %t.out +// This test is not being executed via the {run} command due to using invalid +// external input and output file descriptors for the external resource that is +// being imported. The purpose of this test is to showcase the interop APIs and +// in order to properly obtain those descriptors we would need a lot of Vulkan +// context and texture setup as a prerequisite to the example and complicate it. #include #include diff --git a/sycl/test-e2e/bindless_images/image_get_info.cpp b/sycl/test-e2e/bindless_images/image_get_info.cpp index 8c36f431ba942..a7ea4825d8494 100644 --- a/sycl/test-e2e/bindless_images/image_get_info.cpp +++ b/sycl/test-e2e/bindless_images/image_get_info.cpp @@ -1,4 +1,8 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images + +// UNSUPPORTED: hip || level_zero +// UNSUPPORTED-INTENDED: Image channels queries not working correctly on HIP. +// Also, the feature is not fully implemented in the Level Zero stack. // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} %t.out diff --git a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_1D.cpp b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_1D.cpp index 3f95d87f39b1b..39a81d68190c3 100644 --- a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_1D.cpp +++ b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_1D.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_mipmap // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} %t.out diff --git a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_2D.cpp b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_2D.cpp index a6e185a2b112c..49011973ef089 100644 --- a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_2D.cpp +++ b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_2D.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_mipmap // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} %t.out diff --git a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_3D.cpp b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_3D.cpp index 7f3ee74cdc949..778dd187aa2d9 100644 --- a/sycl/test-e2e/bindless_images/mipmap/mipmap_read_3D.cpp +++ b/sycl/test-e2e/bindless_images/mipmap/mipmap_read_3D.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_mipmap // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} %t.out diff --git a/sycl/test-e2e/bindless_images/read_norm_types.cpp b/sycl/test-e2e/bindless_images/read_norm_types.cpp index 80dace1ba0a53..9eec60c61178c 100644 --- a/sycl/test-e2e/bindless_images/read_norm_types.cpp +++ b/sycl/test-e2e/bindless_images/read_norm_types.cpp @@ -1,5 +1,8 @@ // REQUIRES: aspect-ext_oneapi_bindless_images +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: Returning non fp[32/16] values from sampling fails. + // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out diff --git a/sycl/test-e2e/bindless_images/read_sampled.cpp b/sycl/test-e2e/bindless_images/read_sampled.cpp index 3e786855381e0..e0b3c3a109a25 100644 --- a/sycl/test-e2e/bindless_images/read_sampled.cpp +++ b/sycl/test-e2e/bindless_images/read_sampled.cpp @@ -1,4 +1,8 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images + +// UNSUPPORTED: hip || level_zero +// UNSUPPORTED-INTENDED: Returning non-FP values from fetching fails on HIP. +// Also, the feature is not fully implemented in the Level Zero stack. // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} %t.out @@ -127,7 +131,17 @@ static bool runTest(sycl::range dims, sycl::range localSize, unsigned int seed = 0) { using VecType = sycl::vec; - sycl::device dev; + sycl::device dev{}; + // skip half tests if not supported + if constexpr (std::is_same_v) { + if (!dev.has(sycl::aspect::fp16)) { +#ifdef VERBOSE_PRINT + std::cout << "Test skipped due to lack of device support for fp16\n"; +#endif + return false; + } + } + sycl::queue q(dev); auto ctxt = q.get_context(); diff --git a/sycl/test-e2e/bindless_images/read_write_1D.cpp b/sycl/test-e2e/bindless_images/read_write_1D.cpp index e42f234a07642..ab148eee7f788 100644 --- a/sycl/test-e2e/bindless_images/read_write_1D.cpp +++ b/sycl/test-e2e/bindless_images/read_write_1D.cpp @@ -1,5 +1,8 @@ // REQUIRES: aspect-ext_oneapi_bindless_images +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: Undetermined issue in 'create_image' in this test. + // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out diff --git a/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp b/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp index b768e9eb668eb..6b48b056c66cb 100644 --- a/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp +++ b/sycl/test-e2e/bindless_images/read_write_1D_subregion.cpp @@ -1,5 +1,8 @@ // REQUIRES: aspect-ext_oneapi_bindless_images +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: Undetermined issue in 'create_image' in this test. + // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out diff --git a/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp b/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp index 1c8157d9760bd..860f8ae9377e4 100644 --- a/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp +++ b/sycl/test-e2e/bindless_images/read_write_3D_subregion.cpp @@ -1,5 +1,8 @@ // REQUIRES: aspect-ext_oneapi_bindless_images +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: Undetermined issue in 'create_image' in this test. + // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out diff --git a/sycl/test-e2e/bindless_images/read_write_unsampled.cpp b/sycl/test-e2e/bindless_images/read_write_unsampled.cpp index 77f3ec299b7c1..f684075f1dcca 100644 --- a/sycl/test-e2e/bindless_images/read_write_unsampled.cpp +++ b/sycl/test-e2e/bindless_images/read_write_unsampled.cpp @@ -1,5 +1,8 @@ // REQUIRES: aspect-ext_oneapi_bindless_images +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: Returning non fp[32/16] values from sampling fails. + // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp index 965dc9f00c1c4..5f537ea8855cb 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_1D_USM.cpp @@ -1,4 +1,3 @@ -// REQUIRES: cuda // REQUIRES: aspect-ext_oneapi_bindless_sampled_image_fetch_1d_usm // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp index 0a6da2d97f136..4518832215e9e 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D.cpp @@ -1,4 +1,3 @@ -// REQUIRES: cuda // REQUIRES: aspect-ext_oneapi_bindless_sampled_image_fetch_2d // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp index 834ec5b6e8c79..2dccb63645abf 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_2D_USM.cpp @@ -1,4 +1,3 @@ -// REQUIRES: cuda // REQUIRES: aspect-ext_oneapi_bindless_sampled_image_fetch_2d_usm // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp index ccb096dbfbdc5..0cffd4ef864a6 100644 --- a/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp +++ b/sycl/test-e2e/bindless_images/sampled_fetch/fetch_3D.cpp @@ -1,4 +1,3 @@ -// REQUIRES: cuda // REQUIRES: aspect-ext_oneapi_bindless_sampled_image_fetch_3d // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/bindless_images/sampling_2D.cpp b/sycl/test-e2e/bindless_images/sampling_2D.cpp index 316eebc0ace2c..2e253201f7713 100644 --- a/sycl/test-e2e/bindless_images/sampling_2D.cpp +++ b/sycl/test-e2e/bindless_images/sampling_2D.cpp @@ -1,4 +1,4 @@ -// REQUIRES: aspect-ext_oneapi_bindless_images +// REQUIRES: aspect-ext_oneapi_bindless_images_2d_usm // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out diff --git a/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp b/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp index 96007abe1b511..aa98be888d7be 100644 --- a/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp +++ b/sycl/test-e2e/bindless_images/sampling_2D_USM_shared.cpp @@ -1,6 +1,12 @@ -// REQUIRES: cuda // REQUIRES: aspect-ext_oneapi_bindless_images_shared_usm +// This test is unstable (sometimes passes) on HIP-AMD platforms. +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: While rarely, urBindlessImagesSampledImageCreateExp for +// USM image memory type (with linear sampler) sometimes returns an unsupported +// feature result code (1:1 mapping from the native errc from the HIP runtime). +// We think this is likely an issue in the ROCm drivers(could be arch-specific). + // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} %t.out diff --git a/sycl/test-e2e/bindless_images/sampling_2D_half.cpp b/sycl/test-e2e/bindless_images/sampling_2D_half.cpp index f23fc4c470889..9e0c3a4e8f820 100644 --- a/sycl/test-e2e/bindless_images/sampling_2D_half.cpp +++ b/sycl/test-e2e/bindless_images/sampling_2D_half.cpp @@ -1,6 +1,13 @@ // REQUIRES: aspect-ext_oneapi_bindless_images // REQUIRES: aspect-fp16 +// This test is unstable (sometimes passes) on HIP-AMD platforms. +// UNSUPPORTED: hip +// UNSUPPORTED-INTENDED: While rarely, urBindlessImagesSampledImageCreateExp for +// USM image memory type (with linear sampler) sometimes returns an unsupported +// feature result code (1:1 mapping from the native errc from the HIP runtime). +// We think this is likely an issue in the ROCm drivers(could be arch-specific). + // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out diff --git a/sycl/test-e2e/bindless_images/sampling_3D.cpp b/sycl/test-e2e/bindless_images/sampling_3D.cpp index bf11c21191013..da6346e54a5d4 100644 --- a/sycl/test-e2e/bindless_images/sampling_3D.cpp +++ b/sycl/test-e2e/bindless_images/sampling_3D.cpp @@ -1,5 +1,8 @@ // REQUIRES: aspect-ext_oneapi_bindless_images +// UNSUPPORTED: arch-amd_gpu_gfx90a +// UNSUPPORTED-INTENDED: AMD gfx90a devices don't support 3D linear filter mode + // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} env NEOReadDebugKeys=1 UseBindlessMode=1 UseExternalAllocatorForSshAndDsh=1 %t.out diff --git a/sycl/test-e2e/bindless_images/sampling_unique_addr_modes.cpp b/sycl/test-e2e/bindless_images/sampling_unique_addr_modes.cpp index 895f7082adce6..17c8cd71000da 100644 --- a/sycl/test-e2e/bindless_images/sampling_unique_addr_modes.cpp +++ b/sycl/test-e2e/bindless_images/sampling_unique_addr_modes.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_unique_addressing_per_dim // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/user_types/mipmap_read_user_type_2D.cpp b/sycl/test-e2e/bindless_images/user_types/mipmap_read_user_type_2D.cpp index 77913a2836565..087b341bb74f8 100644 --- a/sycl/test-e2e/bindless_images/user_types/mipmap_read_user_type_2D.cpp +++ b/sycl/test-e2e/bindless_images/user_types/mipmap_read_user_type_2D.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_mipmap // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/user_types/read_write_user_type.cpp b/sycl/test-e2e/bindless_images/user_types/read_write_user_type.cpp index db9347f9895e6..d855ff43113bc 100644 --- a/sycl/test-e2e/bindless_images/user_types/read_write_user_type.cpp +++ b/sycl/test-e2e/bindless_images/user_types/read_write_user_type.cpp @@ -1,4 +1,8 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images + +// UNSUPPORTED: hip || level_zero +// UNSUPPORTED-INTENDED: Returning non-FP values from fetching fails on HIP. +// Also, the feature is not fully implemented in the Level Zero stack. // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp index 57623c1b2da03..412d6e6f2c3cb 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/mipmaps.cpp @@ -1,4 +1,5 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_external_memory_import +// REQUIRES: aspect-ext_oneapi_mipmap // REQUIRES: vulkan // REQUIRES: build-and-run-mode diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp index 12e5cfe5cad87..efa5aaa4733c8 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda || (windows && level_zero && aspect-ext_oneapi_bindless_images) +// REQUIRES: aspect-ext_oneapi_external_memory_import || (windows && level_zero && aspect-ext_oneapi_bindless_images) // REQUIRES: vulkan // REQUIRES: build-and-run-mode diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images_USM.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images_USM.cpp index 159b1c9ec1b44..47b1316e96360 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images_USM.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/sampled_images_USM.cpp @@ -1,4 +1,5 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_bindless_images +// REQUIRES: aspect-ext_oneapi_external_memory_import // REQUIRES: vulkan // REQUIRES: build-and-run-mode diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp index b6d046904eea9..57ae0823793d4 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images.cpp @@ -1,4 +1,4 @@ -// REQUIRES: cuda || (windows && level_zero && aspect-ext_oneapi_bindless_images) +// REQUIRES: aspect-ext_oneapi_external_memory_import || (windows && level_zero && aspect-ext_oneapi_bindless_images) // REQUIRES: vulkan // REQUIRES: build-and-run-mode diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images_semaphore.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images_semaphore.cpp index af9163311727c..b1778d4da32cc 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images_semaphore.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/unsampled_images_semaphore.cpp @@ -1,4 +1,5 @@ -// REQUIRES: cuda +// REQUIRES: aspect-ext_oneapi_external_semaphore_import +// REQUIRES: aspect-ext_oneapi_external_memory_import // REQUIRES: vulkan // REQUIRES: build-and-run-mode From e83ea79f61360466140934f0ac828bcf656cf381 Mon Sep 17 00:00:00 2001 From: Georgi Mirazchiyski Date: Mon, 23 Dec 2024 15:27:40 +0000 Subject: [PATCH 02/10] Split image.cl into image, image_array and image_common Signed-off-by: Georgi Mirazchiyski --- libclc/libspirv/lib/amdgcn-amdhsa/SOURCES | 2 + .../lib/amdgcn-amdhsa/images/image.cl | 833 +++--------------- .../lib/amdgcn-amdhsa/images/image_array.cl | 525 +++++++++++ .../lib/amdgcn-amdhsa/images/image_common.cl | 159 ++++ .../lib/amdgcn-amdhsa/images/image_common.h | 164 ++++ 5 files changed, 994 insertions(+), 689 deletions(-) create mode 100644 libclc/libspirv/lib/amdgcn-amdhsa/images/image_array.cl create mode 100644 libclc/libspirv/lib/amdgcn-amdhsa/images/image_common.cl create mode 100644 libclc/libspirv/lib/amdgcn-amdhsa/images/image_common.h diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES b/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES index 6e3a1a4107b36..25ad07b6b8c18 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES +++ b/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES @@ -16,7 +16,9 @@ atomic/atomic_sub.cl atomic/atomic_store.cl conversion/GenericCastToPtrExplicit.cl synchronization/barrier.cl +images/image_common.cl images/image.cl +images/image_array.cl math/acos.cl math/acosh.cl math/asin.cl diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/images/image.cl b/libclc/libspirv/lib/amdgcn-amdhsa/images/image.cl index c8a0bbfae83f9..d20ee55f2cb04 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/images/image.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/images/image.cl @@ -1,4 +1,4 @@ -#include +#include "image_common.h" #include #ifdef cl_khr_fp16 @@ -9,158 +9,6 @@ #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable #endif -#ifdef _WIN32 -#define _CLC_MANGLE_FUNC_IMG_HANDLE(namelength, name, prefix, postfix) \ - _Z##namelength##name##prefix##y##postfix -#else -#define _CLC_MANGLE_FUNC_IMG_HANDLE(namelength, name, prefix, postfix) \ - _Z##namelength##name##prefix##m##postfix -#endif - -// Helpers for casting between two builitin vector types and/or scalar types. - -// Using the builtin as_type() and as_typen() functions to reinterpret types. -// The restriction being is that element "type"s need to be of the same size. -#define _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_VEC3(vec4_elem_t, to_t) \ - _CLC_INLINE to_t##3 __clc_cast_from_##vec4_elem_t##4_to_##to_t##3(vec4_elem_t##4 from) { \ - vec4_elem_t##3 casted = as_##vec4_elem_t##3(from); \ - return as_##to_t##3(casted); \ - } -#define _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_VEC2(vec4_elem_t, to_t) \ - _CLC_INLINE to_t##2 __clc_cast_from_##vec4_elem_t##4_to_##to_t##2(vec4_elem_t##4 from) { \ - vec4_elem_t##4 casted = as_##vec4_elem_t##4(from); \ - return as_##to_t##2((vec4_elem_t##2)(casted.x, casted.y)); \ - } -#define _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_SCALAR(vec4_elem_t, to_t) \ - _CLC_INLINE to_t __clc_cast_from_##vec4_elem_t##4_to_##to_t(vec4_elem_t##4 from) { \ - vec4_elem_t##4 casted = as_##vec4_elem_t##4(from); \ - return as_##to_t(casted.x); \ - } -#define _CLC_DEFINE_BUILTIN_CAST_VEC3_TO_VEC4(from_t, vec4_elem_t) \ - _CLC_INLINE vec4_elem_t##4 __clc_cast_from_##from_t##3_to_##vec4_elem_t##4(from_t##3 from) { \ - vec4_elem_t##3 casted = as_##vec4_elem_t##3(from); \ - return as_##vec4_elem_t##4(casted); \ - } -#define _CLC_DEFINE_BUILTIN_CAST_VEC2_TO_VEC4(from_t, vec4_elem_t) \ - _CLC_INLINE vec4_elem_t##4 __clc_cast_from_##from_t##2_to_##vec4_elem_t##4(from_t##2 from) { \ - vec4_elem_t##2 casted = as_##vec4_elem_t##2(from); \ - return (vec4_elem_t##4)(casted.x, casted.y, 0, 0); \ - } -#define _CLC_DEFINE_BUILTIN_CAST_SCALAR_TO_VEC4(from_t, vec4_elem_t) \ - _CLC_INLINE vec4_elem_t##4 __clc_cast_from_##from_t##_to_##vec4_elem_t##4(from_t from) { \ - vec4_elem_t casted = as_##vec4_elem_t(from); \ - return (vec4_elem_t##4)(casted, 0, 0, 0); \ - } - -// Generic casts between builtin types. -#define _CLC_DEFINE_CAST_VEC4(vec4_elem_t, to_t) \ - _CLC_INLINE to_t##4 __clc_cast_from_##vec4_elem_t##4_to_##to_t##4(vec4_elem_t##4 from) { \ - return (to_t##4)(from.x, from.y, from.z, from.w); \ - } -#define _CLC_DEFINE_CAST_VEC4_TO_VEC3(vec4_elem_t, to_t) \ - _CLC_INLINE to_t##3 __clc_cast_from_##vec4_elem_t##4_to_##to_t##3(vec4_elem_t##4 from) { \ - return (to_t##3)(from.x, from.y, from.z); \ - } -#define _CLC_DEFINE_CAST_VEC4_TO_VEC2(vec4_elem_t, to_t) \ - _CLC_INLINE to_t##2 __clc_cast_from_##vec4_elem_t##4_to_##to_t##2(vec4_elem_t##4 from) { \ - return (to_t##2)(from.x, from.y); \ - } -#define _CLC_DEFINE_CAST_VEC4_TO_SCALAR(vec4_elem_t, to_t) \ - _CLC_INLINE to_t __clc_cast_from_##vec4_elem_t##4_to_##to_t(vec4_elem_t##4 from) { \ - return (to_t)from.x; \ - } -#define _CLC_DEFINE_CAST_VEC3_TO_VEC4(from_t, vec4_elem_t) \ - _CLC_INLINE vec4_elem_t##4 __clc_cast_from_##from_t##3_to_##vec4_elem_t##4(from_t##3 from) { \ - return (vec4_elem_t##4)(from.x, from.y, from.z, 0); \ - } -#define _CLC_DEFINE_CAST_VEC2_TO_VEC4(from_t, vec4_elem_t) \ - _CLC_INLINE vec4_elem_t##4 __clc_cast_from_##from_t##2_to_##vec4_elem_t##4(from_t##2 from) { \ - return (vec4_elem_t##4)(from.x, from.y, 0, 0); \ - } -#define _CLC_DEFINE_CAST_SCALAR_TO_VEC4(from_t, vec4_elem_t) \ - _CLC_INLINE vec4_elem_t##4 __clc_cast_from_##from_t##_to_##vec4_elem_t##4(from_t from) { \ - return (vec4_elem_t##4)(from, 0, 0, 0); \ - } - -// Helpers to extract N channel(s) from a four-channel (RGBA/XYZW) color type. - -#define _CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(from_t, to_t) \ - _CLC_DEFINE_CAST_VEC4(from_t, to_t) \ - _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_VEC3(from_t, to_t) \ - _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_VEC2(from_t, to_t) \ - _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_SCALAR(from_t, to_t) \ - _CLC_DEFINE_BUILTIN_CAST_VEC2_TO_VEC4(from_t, to_t) \ - _CLC_DEFINE_BUILTIN_CAST_VEC3_TO_VEC4(from_t, to_t) \ - _CLC_DEFINE_BUILTIN_CAST_SCALAR_TO_VEC4(from_t, to_t) - -#define _CLC_DEFINE_EXTRACT_COLOR_HELPERS(from_t, to_t) \ - _CLC_DEFINE_CAST_VEC4(from_t, to_t) \ - _CLC_DEFINE_CAST_VEC4_TO_VEC3(from_t, to_t) \ - _CLC_DEFINE_CAST_VEC4_TO_VEC2(from_t, to_t) \ - _CLC_DEFINE_CAST_VEC4_TO_SCALAR(from_t, to_t) \ - _CLC_DEFINE_CAST_VEC2_TO_VEC4(from_t, to_t) \ - _CLC_DEFINE_CAST_VEC3_TO_VEC4(from_t, to_t) \ - _CLC_DEFINE_CAST_SCALAR_TO_VEC4(from_t, to_t) - -// Define casts between supported builtin types for image color - -_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(float, float) -_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(float, int) -_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(int, float) -_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(float, uint) -_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(uint, float) -_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(half, half) -_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(half, short) -_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(short, half) -_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(half, ushort) -_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(ushort, half) - -_CLC_DEFINE_EXTRACT_COLOR_HELPERS(float, half) -_CLC_DEFINE_EXTRACT_COLOR_HELPERS(half, float) -_CLC_DEFINE_EXTRACT_COLOR_HELPERS(float, short) -_CLC_DEFINE_EXTRACT_COLOR_HELPERS(short, float) -_CLC_DEFINE_EXTRACT_COLOR_HELPERS(float, ushort) -_CLC_DEFINE_EXTRACT_COLOR_HELPERS(ushort, float) -_CLC_DEFINE_EXTRACT_COLOR_HELPERS(float, char) -_CLC_DEFINE_EXTRACT_COLOR_HELPERS(char, float) -_CLC_DEFINE_EXTRACT_COLOR_HELPERS(float, uchar) -_CLC_DEFINE_EXTRACT_COLOR_HELPERS(uchar, float) -_CLC_DEFINE_EXTRACT_COLOR_HELPERS(half, int) -_CLC_DEFINE_EXTRACT_COLOR_HELPERS(int, half) -_CLC_DEFINE_EXTRACT_COLOR_HELPERS(half, uint) -_CLC_DEFINE_EXTRACT_COLOR_HELPERS(uint, half) -_CLC_DEFINE_EXTRACT_COLOR_HELPERS(half, char) -_CLC_DEFINE_EXTRACT_COLOR_HELPERS(char, half) -_CLC_DEFINE_EXTRACT_COLOR_HELPERS(half, uchar) -_CLC_DEFINE_EXTRACT_COLOR_HELPERS(uchar, half) - -#undef _CLC_DEFINE_EXTRACT_COLOR_HELPERS -#undef _CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS - -#undef _CLC_DEFINE_CAST_SCALAR_TO_VEC4 -#undef _CLC_DEFINE_CAST_VEC2_TO_VEC4 -#undef _CLC_DEFINE_CAST_VEC3_TO_VEC4 -#undef _CLC_DEFINE_CAST_VEC4_TO_SCALAR -#undef _CLC_DEFINE_CAST_VEC4_TO_VEC3 -#undef _CLC_DEFINE_CAST_VEC4_TO_VEC2 -#undef _CLC_DEFINE_CAST_VEC4 -#undef _CLC_DEFINE_BUILTIN_CAST_SCALAR_TO_VEC4 -#undef _CLC_DEFINE_BUILTIN_CAST_VEC2_TO_VEC4 -#undef _CLC_DEFINE_BUILTIN_VEC3_TO_VEC4 -#undef _CLC_DEFINE_BUILTIN_VEC4_TO_SCALAR -#undef _CLC_DEFINE_BUILTIN_VEC4_TO_VEC3 -#undef _CLC_DEFINE_BUILTIN_VEC4_TO_VEC2 -#undef _CLC_DEFINE_BUILTIN_VEC4 - -// The ockl functions/builtins expect the resource in constant addrspace. -#if __clang_major__ >= 8 -#define _CLC_CONST_AS __constant -#elif __clang_major__ >= 7 -#define _CLC_CONST_AS __attribute__((address_space(4))) -#else -#define _CLC_CONST_AS __attribute__((address_space(2))) -#endif - // Declare ockl functions/builtins that we link from the ROCm device libs. float4 __ockl_image_load_1D(_CLC_CONST_AS unsigned int *tex, int coord); float4 __ockl_image_load_2D(_CLC_CONST_AS unsigned int *tex, int2 coord); @@ -169,70 +17,65 @@ half4 __ockl_image_loadh_1D(_CLC_CONST_AS unsigned int *tex, int coord); half4 __ockl_image_loadh_2D(_CLC_CONST_AS unsigned int *tex, int2 coord); half4 __ockl_image_loadh_3D(_CLC_CONST_AS unsigned int *tex, int3 coord); -float4 __ockl_image_load_1Da(_CLC_CONST_AS unsigned int *tex, int2 coord); -float4 __ockl_image_load_2Da(_CLC_CONST_AS unsigned int *tex, int4 coord); -half4 __ockl_image_loadh_1Da(_CLC_CONST_AS unsigned int *tex, int2 coord); -half4 __ockl_image_loadh_2Da(_CLC_CONST_AS unsigned int *tex, int4 coord); - -void __ockl_image_store_1D(_CLC_CONST_AS unsigned int *tex, int coord, float4 color); -void __ockl_image_store_2D(_CLC_CONST_AS unsigned int *tex, int2 coord, float4 color); -void __ockl_image_store_3D(_CLC_CONST_AS unsigned int *tex, int3 coord, float4 color); -void __ockl_image_storeh_1D(_CLC_CONST_AS unsigned int *tex, int coord, half4 color); -void __ockl_image_storeh_2D(_CLC_CONST_AS unsigned int *tex, int2 coord, half4 color); -void __ockl_image_storeh_3D(_CLC_CONST_AS unsigned int *tex, int3 coord, half4 color); - -void __ockl_image_store_1Da(_CLC_CONST_AS unsigned int *tex, int2 coord, float4 color); -void __ockl_image_store_2Da(_CLC_CONST_AS unsigned int *tex, int4 coord, float4 color); -void __ockl_image_storeh_1Da(_CLC_CONST_AS unsigned int *tex, int2 coord, half4 color); -void __ockl_image_storeh_2Da(_CLC_CONST_AS unsigned int *tex, int4 coord, half4 color); - -float4 __ockl_image_sample_1D(_CLC_CONST_AS unsigned int *tex, _CLC_CONST_AS unsigned int *samp, float coord); -float4 __ockl_image_sample_2D(_CLC_CONST_AS unsigned int *tex, _CLC_CONST_AS unsigned int *samp, float2 coord); -float4 __ockl_image_sample_3D(_CLC_CONST_AS unsigned int *tex, _CLC_CONST_AS unsigned int *samp, float3 coord); -half4 __ockl_image_sampleh_1D(_CLC_CONST_AS unsigned int *tex, _CLC_CONST_AS unsigned int *samp, float coord); -half4 __ockl_image_sampleh_2D(_CLC_CONST_AS unsigned int *tex, _CLC_CONST_AS unsigned int *samp, float2 coord); -half4 __ockl_image_sampleh_3D(_CLC_CONST_AS unsigned int *tex, _CLC_CONST_AS unsigned int *samp, float3 coord); - -float4 __ockl_image_sample_1Da(_CLC_CONST_AS unsigned int *tex, _CLC_CONST_AS unsigned int *samp, float2 coord); -float4 __ockl_image_sample_2Da(_CLC_CONST_AS unsigned int *tex, _CLC_CONST_AS unsigned int *samp, float4 coord); -half4 __ockl_image_sampleh_1Da(_CLC_CONST_AS unsigned int *tex, _CLC_CONST_AS unsigned int *samp, float2 coord); -half4 __ockl_image_sampleh_2Da(_CLC_CONST_AS unsigned int *tex, _CLC_CONST_AS unsigned int *samp, float4 coord); +void __ockl_image_store_1D(_CLC_CONST_AS unsigned int *tex, int coord, + float4 color); +void __ockl_image_store_2D(_CLC_CONST_AS unsigned int *tex, int2 coord, + float4 color); +void __ockl_image_store_3D(_CLC_CONST_AS unsigned int *tex, int3 coord, + float4 color); +void __ockl_image_storeh_1D(_CLC_CONST_AS unsigned int *tex, int coord, + half4 color); +void __ockl_image_storeh_2D(_CLC_CONST_AS unsigned int *tex, int2 coord, + half4 color); +void __ockl_image_storeh_3D(_CLC_CONST_AS unsigned int *tex, int3 coord, + half4 color); + +float4 __ockl_image_sample_1D(_CLC_CONST_AS unsigned int *tex, + _CLC_CONST_AS unsigned int *samp, float coord); +float4 __ockl_image_sample_2D(_CLC_CONST_AS unsigned int *tex, + _CLC_CONST_AS unsigned int *samp, float2 coord); +float4 __ockl_image_sample_3D(_CLC_CONST_AS unsigned int *tex, + _CLC_CONST_AS unsigned int *samp, float3 coord); +half4 __ockl_image_sampleh_1D(_CLC_CONST_AS unsigned int *tex, + _CLC_CONST_AS unsigned int *samp, float coord); +half4 __ockl_image_sampleh_2D(_CLC_CONST_AS unsigned int *tex, + _CLC_CONST_AS unsigned int *samp, float2 coord); +half4 __ockl_image_sampleh_3D(_CLC_CONST_AS unsigned int *tex, + _CLC_CONST_AS unsigned int *samp, float3 coord); // // IMAGES // -#define _CLC_ARRAY_COORD_PARAMS_1D(coord, layer) coord, layer -#define _CLC_ARRAY_COORD_PARAMS_2D(coord, layer) coord.x, coord.y, layer, 0 - // Fetch Ops -#define _CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, \ - builtin_ret_t, builtin_ret_postfix) \ - _CLC_DEF elem_t _CLC_MANGLE_FUNC_IMG_HANDLE( \ - 18, __spirv_ImageFetch, I##elem_t_mangled, \ - coord_mangled##ET_T0_T1_)(ulong imageHandle, coord_t coord) { \ - _CLC_CONST_AS unsigned int *tex = (_CLC_CONST_AS unsigned int *)imageHandle; \ - builtin_ret_t##4 color = \ - __ockl_image_load##builtin_ret_postfix##_##dimension##D(tex, coord); \ - return __clc_cast_from_##builtin_ret_t##4_to_##elem_t(color); \ +#define _CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, builtin_ret_t, \ + builtin_ret_postfix) \ + _CLC_DEF elem_t _CLC_MANGLE_FUNC_IMG_HANDLE( \ + 18, __spirv_ImageFetch, I##elem_t_mangled, \ + coord_mangled##ET_T0_T1_)(ulong imageHandle, coord_t coord) { \ + _CLC_CONST_AS unsigned int *tex = \ + (_CLC_CONST_AS unsigned int *)imageHandle; \ + builtin_ret_t##4 color = \ + __ockl_image_load##builtin_ret_postfix##_##dimension##D(tex, coord); \ + return __clc_cast_from_##builtin_ret_t##4_to_##elem_t(color); \ } -#define _CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ - _CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(dimension, elem_t, elem_t_mangled, \ - coord_t, coord_mangled, float,) +#define _CLC_DEFINE_IMAGE_BINDLESS_FETCH_32_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(dimension, elem_t, elem_t_mangled, \ + coord_t, coord_mangled, float, ) -#define _CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ - _CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(dimension, elem_t, elem_t_mangled, \ +#define _CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN(dimension, elem_t, elem_t_mangled, \ coord_t, coord_mangled, half, h) -#define _CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ - _CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN(dimension, elem_t, elem_t_mangled, \ - coord_t, coord_mangled) +#define _CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_IMAGE_BINDLESS_FETCH_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) // Float // return 1-channel color data @@ -354,32 +197,34 @@ _CLC_DEFINE_IMAGE_BINDLESS_FETCH_8_BUILTIN(3, uchar4, Dv4_h, int3, Dv3_i) // Write Ops -#define _CLC_DEFINE_IMAGE_BINDLESS_WRITE_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, \ - builtin_ret_t, builtin_ret_postfix) \ - _CLC_DEF void _CLC_MANGLE_FUNC_IMG_HANDLE( \ - 18, __spirv_ImageWrite, I, coord_mangled##elem_t_mangled##EvT_T0_T1_)( \ - ulong imageHandle, coord_t coord, elem_t color) { \ - _CLC_CONST_AS unsigned int *tex = (_CLC_CONST_AS unsigned int *)imageHandle; \ - builtin_ret_t##4 outColor = \ - __clc_cast_from_##elem_t##_to_##builtin_ret_t##4(color); \ - __ockl_image_store##builtin_ret_postfix##_##dimension##D(tex, coord, outColor); \ +#define _CLC_DEFINE_IMAGE_BINDLESS_WRITE_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, builtin_ret_t, \ + builtin_ret_postfix) \ + _CLC_DEF void _CLC_MANGLE_FUNC_IMG_HANDLE( \ + 18, __spirv_ImageWrite, I, coord_mangled##elem_t_mangled##EvT_T0_T1_)( \ + ulong imageHandle, coord_t coord, elem_t color) { \ + _CLC_CONST_AS unsigned int *tex = \ + (_CLC_CONST_AS unsigned int *)imageHandle; \ + builtin_ret_t##4 outColor = \ + __clc_cast_from_##elem_t##_to_##builtin_ret_t##4(color); \ + __ockl_image_store##builtin_ret_postfix##_##dimension##D(tex, coord, \ + outColor); \ } -#define _CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ - _CLC_DEFINE_IMAGE_BINDLESS_WRITE_BUILTIN(dimension, elem_t, elem_t_mangled, \ - coord_t, coord_mangled, float,) +#define _CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_IMAGE_BINDLESS_WRITE_BUILTIN(dimension, elem_t, elem_t_mangled, \ + coord_t, coord_mangled, float, ) -#define _CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ - _CLC_DEFINE_IMAGE_BINDLESS_WRITE_BUILTIN(dimension, elem_t, elem_t_mangled, \ +#define _CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_IMAGE_BINDLESS_WRITE_BUILTIN(dimension, elem_t, elem_t_mangled, \ coord_t, coord_mangled, half, h) -#define _CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ - _CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN(dimension, elem_t, elem_t_mangled, \ - coord_t, coord_mangled) +#define _CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) // Float // write 1-channel color data @@ -499,311 +344,41 @@ _CLC_DEFINE_IMAGE_BINDLESS_WRITE_8_BUILTIN(3, uchar4, Dv4_h, int3, Dv3_i) #undef _CLC_DEFINE_IMAGE_BINDLESS_WRITE_16_BUILTIN #undef _CLC_DEFINE_IMAGE_BINDLESS_WRITE_32_BUILTIN -// -// IMAGE ARRAYS -// - -// Read Ops - -#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ - builtin_ret_t, builtin_ret_postfix) \ - _CLC_DEF elem_t _CLC_MANGLE_FUNC_IMG_HANDLE( \ - 23, __spirv_ImageArrayFetch, I##elem_t_mangled, \ - coord_mangled##ET_T0_T1_i)(ulong imageHandle, coord_t coord, int layer) { \ - _CLC_CONST_AS unsigned int *tex = (_CLC_CONST_AS unsigned int *)imageHandle; \ - int##vec_size arrayCoord = \ - (int##vec_size)(_CLC_ARRAY_COORD_PARAMS_##dimension##D(coord, layer)); \ - builtin_ret_t##4 color = \ - __ockl_image_load##builtin_ret_postfix##_##dimension##Da(tex, arrayCoord); \ - return __clc_cast_from_##builtin_ret_t##4_to_##elem_t(color); \ - } - -#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ - _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_BUILTIN(dimension, elem_t, \ - elem_t_mangled, coord_t, \ - coord_mangled, vec_size, \ - float,) - -#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ - _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_BUILTIN(dimension, elem_t, \ - elem_t_mangled, coord_t, \ - coord_mangled, vec_size, \ - half, h) - -#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ - _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(dimension, elem_t, \ - elem_t_mangled, coord_t, \ - coord_mangled, vec_size) - -// Float -// return 1-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, float, f, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, float, f, int2, Dv2_i, 4) -// return 2-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, float2, Dv2_f, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, float2, Dv2_f, int2, Dv2_i, 4) -// return 4-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, float4, Dv4_f, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, float4, Dv4_f, int2, Dv2_i, 4) - -// Half -#ifdef cl_khr_fp16 -// return 1-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, half, DF16_, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, half, DF16_, int2, Dv2_i, 4) -// return 2-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, half2, Dv2_DF16_, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, half2, Dv2_DF16_, int2, Dv2_i, 4) -// return 4-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, half4, Dv4_DF16_, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, half4, Dv4_DF16_, int2, Dv2_i, 4) -#endif - -// Int -// return 1-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, int, i, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, int, i, int2, Dv2_i, 4) -// return 2-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, int2, Dv2_i, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, int2, Dv2_i, int2, Dv2_i, 4) -// return 4-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, int4, Dv4_i, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, int4, Dv4_i, int2, Dv2_i, 4) - -// Unsigned Int -// return 1-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, uint, j, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, uint, j, int2, Dv2_i, 4) -// return 2-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, uint2, Dv2_j, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, uint2, Dv2_j, int2, Dv2_i, 4) -// return 4-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, uint4, Dv4_j, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, uint4, Dv4_j, int2, Dv2_i, 4) - -// Short -// return 1-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, short, s, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, short, s, int2, Dv2_i, 4) -// return 2-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, short2, Dv2_s, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, short2, Dv2_s, int2, Dv2_i, 4) -// return 4-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, short4, Dv4_s, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, short4, Dv4_s, int2, Dv2_i, 4) - -// Unsigned Short -// return 1-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, ushort, t, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, ushort, t, int2, Dv2_i, 4) -// return 2-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, ushort2, Dv2_t, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, ushort2, Dv2_t, int2, Dv2_i, 4) -// return 4-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, ushort4, Dv4_t, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, ushort4, Dv4_t, int2, Dv2_i, 4) - -// Char -// return 1-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, char, a, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, char, a, int2, Dv2_i, 4) -// return 2-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, char2, Dv2_a, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, char2, Dv2_a, int2, Dv2_i, 4) -// return 4-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, char4, Dv4_a, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, char4, Dv4_a, int2, Dv2_i, 4) - -// Unsigned Char -// return 1-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, uchar, h, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, uchar, h, int2, Dv2_i, 4) -// return 2-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, uchar2, Dv2_h, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, uchar2, Dv2_h, int2, Dv2_i, 4) -// return 4-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, uchar4, Dv4_h, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, uchar4, Dv4_h, int2, Dv2_i, 4) - -#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN -#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN -#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN - -// Write Ops - -#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ - builtin_ret_t, builtin_ret_postfix) \ - _CLC_DEF void _CLC_MANGLE_FUNC_IMG_HANDLE( \ - 23, __spirv_ImageArrayWrite, I, coord_mangled##elem_t_mangled##EvT_T0_iT1_)( \ - ulong imageHandle, coord_t coord, int layer, elem_t color) { \ - _CLC_CONST_AS unsigned int *tex = (_CLC_CONST_AS unsigned int *)imageHandle; \ - int##vec_size arrayCoord = \ - (int##vec_size)(_CLC_ARRAY_COORD_PARAMS_##dimension##D(coord, layer)); \ - builtin_ret_t##4 outColor = \ - __clc_cast_from_##elem_t##_to_##builtin_ret_t##4(color); \ - __ockl_image_store##builtin_ret_postfix##_##dimension##Da(tex, arrayCoord, \ - outColor); \ - } - - -#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ - _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_BUILTIN(dimension, elem_t, \ - elem_t_mangled, coord_t, \ - coord_mangled, vec_size, \ - float,) - -#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ - _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_BUILTIN(dimension, elem_t, \ - elem_t_mangled, coord_t, \ - coord_mangled, vec_size, \ - half, h) - -#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ - _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(dimension, elem_t, \ - elem_t_mangled, coord_t, \ - coord_mangled, vec_size) - -// Float -// write 1-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, float, f, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, float, f, int2, Dv2_i, 4) -// write 2-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, float2, Dv2_f, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, float2, Dv2_f, int2, Dv2_i, 4) -// write 4-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, float4, Dv4_f, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, float4, Dv4_f, int2, Dv2_i, 4) - -// Half -#ifdef cl_khr_fp16 -// write 1-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, half, DF16_, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, half, DF16_, int2, Dv2_i, 4) -// write 2-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, half2, Dv2_DF16_, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, half2, Dv2_DF16_, int2, Dv2_i, 4) -// write 4-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, half4, Dv4_DF16_, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, half4, Dv4_DF16_, int2, Dv2_i, 4) -#endif - -// Int -// write 1-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, int, i, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, int, i, int2, Dv2_i, 4) -// write 2-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, int2, Dv2_i, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, int2, Dv2_i, int2, Dv2_i, 4) -// write 4-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, int4, Dv4_i, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, int4, Dv4_i, int2, Dv2_i, 4) - -// Unsigned Int -// write 1-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, uint, j, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, uint, j, int2, Dv2_i, 4) -// write 2-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, uint2, Dv2_j, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, uint2, Dv2_j, int2, Dv2_i, 4) -// write 4-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, uint4, Dv4_j, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, uint4, Dv4_j, int2, Dv2_i, 4) - -// Short -// write 1-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, short, s, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, short, s, int2, Dv2_i, 4) -// write 2-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, short2, Dv2_s, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, short2, Dv2_s, int2, Dv2_i, 4) -// write 4-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, short4, Dv4_s, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, short4, Dv4_s, int2, Dv2_i, 4) - -// Unsigned Short -// write 1-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, ushort, t, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, ushort, t, int2, Dv2_i, 4) -// write 2-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, ushort2, Dv2_t, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, ushort2, Dv2_t, int2, Dv2_i, 4) -// write 4-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, ushort4, Dv4_t, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, ushort4, Dv4_t, int2, Dv2_i, 4) - -// Char -// write 1-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, char, a, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, char, a, int2, Dv2_i, 4) -// write 2-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, char2, Dv2_a, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, char2, Dv2_a, int2, Dv2_i, 4) -// write 4-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, char4, Dv4_a, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, char4, Dv4_a, int2, Dv2_i, 4) - -// Unsigned Char -// write 1-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, uchar, h, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, uchar, h, int2, Dv2_i, 4) -// write 2-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, uchar2, Dv2_h, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, uchar2, Dv2_h, int2, Dv2_i, 4) -// write 4-channel color data -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, uchar4, Dv4_h, int, i, 2) -_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, uchar4, Dv4_h, int2, Dv2_i, 4) - -#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN -#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN -#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN - // // SAMPLED IMAGES -// Note: consider splitting it off in sampled_image.cl when it grows larger. // -// From -// https://github.com/ROCm/clr/tree/amd-staging/hipamd/include/hip/amd_detail/texture_fetch_functions.h -static _CLC_CONST_AS const unsigned int SAMPLER_OBJECT_OFFSET_DWORD = 12; - // Read Ops -#define _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, \ - builtin_ret_t, builtin_ret_postfix) \ - _CLC_DEF elem_t _CLC_MANGLE_FUNC_IMG_HANDLE( \ - 17, __spirv_ImageRead, I##elem_t_mangled, \ - coord_mangled##ET_T0_T1_)(ulong imageHandle, coord_t coord) { \ - _CLC_CONST_AS unsigned int *tex = (_CLC_CONST_AS unsigned int *)imageHandle; \ - _CLC_CONST_AS unsigned int *samp = tex + SAMPLER_OBJECT_OFFSET_DWORD; \ - builtin_ret_t##4 color = \ - __ockl_image_sample##builtin_ret_postfix##_##dimension##D(tex, samp, coord); \ - return __clc_cast_from_##builtin_ret_t##4_to_##elem_t(color); \ +#define _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, builtin_ret_t, \ + builtin_ret_postfix) \ + _CLC_DEF elem_t _CLC_MANGLE_FUNC_IMG_HANDLE( \ + 17, __spirv_ImageRead, I##elem_t_mangled, \ + coord_mangled##ET_T0_T1_)(ulong imageHandle, coord_t coord) { \ + _CLC_CONST_AS unsigned int *tex = \ + (_CLC_CONST_AS unsigned int *)imageHandle; \ + _CLC_CONST_AS unsigned int *samp = tex + SAMPLER_OBJECT_OFFSET_DWORD; \ + builtin_ret_t##4 color = \ + __ockl_image_sample##builtin_ret_postfix##_##dimension##D(tex, samp, \ + coord); \ + return __clc_cast_from_##builtin_ret_t##4_to_##elem_t(color); \ } -#define _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ - _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN(dimension, elem_t, elem_t_mangled, \ - coord_t, coord_mangled, float,) +#define _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, float, ) -#define _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ - _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN(dimension, elem_t, elem_t_mangled, \ - coord_t, coord_mangled, half, h) +#define _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, half, h) -#define _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ - _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(dimension, elem_t, \ - elem_t_mangled, coord_t, \ - coord_mangled) +#define _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) \ + _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled) // Float // return 1 channel color data @@ -812,12 +387,16 @@ _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, float, f, float2, Dv2_f) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, float, f, float3, Dv3_f) // return 2-channel color data _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, float2, Dv2_f, float, f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, float2, Dv2_f, float2, Dv2_f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, float2, Dv2_f, float3, Dv3_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, float2, Dv2_f, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, float2, Dv2_f, float3, + Dv3_f) // return 4-channel color data _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, float4, Dv4_f, float, f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, float4, Dv4_f, float2, Dv2_f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, float4, Dv4_f, float3, Dv3_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, float4, Dv4_f, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, float4, Dv4_f, float3, + Dv3_f) // Half #ifdef cl_khr_fp16 @@ -827,12 +406,16 @@ _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, half, DF16_, float2, Dv2_f) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, half, DF16_, float3, Dv3_f) // return 2-channel color data _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, half2, Dv2_DF16_, float, f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, half2, Dv2_DF16_, float2, Dv2_f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, half2, Dv2_DF16_, float3, Dv3_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, half2, Dv2_DF16_, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, half2, Dv2_DF16_, float3, + Dv3_f) // return 4-channel color data _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, half4, Dv4_DF16_, float, f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, half4, Dv4_DF16_, float2, Dv2_f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, half4, Dv4_DF16_, float3, Dv3_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, half4, Dv4_DF16_, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, half4, Dv4_DF16_, float3, + Dv3_f) #endif // Int @@ -856,12 +439,16 @@ _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, uint, j, float2, Dv2_f) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, uint, j, float3, Dv3_f) // return 2-channel color data _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, uint2, Dv2_j, float, f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, uint2, Dv2_j, float2, Dv2_f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, uint2, Dv2_j, float3, Dv3_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, uint2, Dv2_j, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, uint2, Dv2_j, float3, + Dv3_f) // return 4-channel color data _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(1, uint4, Dv4_j, float, f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, uint4, Dv4_j, float2, Dv2_f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, uint4, Dv4_j, float3, Dv3_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(2, uint4, Dv4_j, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN(3, uint4, Dv4_j, float3, + Dv3_f) // Short // return 1-channel color data @@ -870,12 +457,16 @@ _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, short, s, float2, Dv2_f) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, short, s, float3, Dv3_f) // return 2-channel color data _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, short2, Dv2_s, float, f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, short2, Dv2_s, float2, Dv2_f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, short2, Dv2_s, float3, Dv3_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, short2, Dv2_s, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, short2, Dv2_s, float3, + Dv3_f) // return 4-channel color data _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, short4, Dv4_s, float, f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, short4, Dv4_s, float2, Dv2_f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, short4, Dv4_s, float3, Dv3_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, short4, Dv4_s, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, short4, Dv4_s, float3, + Dv3_f) // Unsigned Short // return 1-channel color data @@ -884,12 +475,16 @@ _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, ushort, t, float2, Dv2_f) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, ushort, t, float3, Dv3_f) // return 2-channel color data _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, ushort2, Dv2_t, float, f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, ushort2, Dv2_t, float2, Dv2_f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, ushort2, Dv2_t, float3, Dv3_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, ushort2, Dv2_t, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, ushort2, Dv2_t, float3, + Dv3_f) // return 4-channel color data _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(1, ushort4, Dv4_t, float, f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, ushort4, Dv4_t, float2, Dv2_f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, ushort4, Dv4_t, float3, Dv3_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(2, ushort4, Dv4_t, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN(3, ushort4, Dv4_t, float3, + Dv3_f) // Char // return 1-channel color data @@ -912,165 +507,25 @@ _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(2, uchar, h, float2, Dv2_f) _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(3, uchar, h, float3, Dv3_f) // return 2-channel color data _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(1, uchar2, Dv2_h, float, f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(2, uchar2, Dv2_h, float2, Dv2_f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(3, uchar2, Dv2_h, float3, Dv3_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(2, uchar2, Dv2_h, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(3, uchar2, Dv2_h, float3, + Dv3_f) // return 4-channel color data _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(1, uchar4, Dv4_h, float, f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(2, uchar4, Dv4_h, float2, Dv2_f) -_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(3, uchar4, Dv4_h, float3, Dv3_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(2, uchar4, Dv4_h, float2, + Dv2_f) +_CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN(3, uchar4, Dv4_h, float3, + Dv3_f) #undef _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_8_BUILTIN #undef _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_16_BUILTIN #undef _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_32_BUILTIN -// -// SAMPLED IMAGE ARRAYS -// - -// Read Ops - -#define _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ - builtin_ret_t, builtin_ret_postfix) \ - _CLC_DEF elem_t _CLC_MANGLE_FUNC_IMG_HANDLE( \ - 22, __spirv_ImageArrayRead, I##elem_t_mangled, \ - coord_mangled##ET_T0_T1_i)(ulong imageHandle, coord_t coord, int layer) { \ - _CLC_CONST_AS unsigned int *tex = (_CLC_CONST_AS unsigned int *)imageHandle; \ - _CLC_CONST_AS unsigned int *samp = tex + SAMPLER_OBJECT_OFFSET_DWORD; \ - float##vec_size arrayCoord = \ - (float##vec_size)(_CLC_ARRAY_COORD_PARAMS_##dimension##D(coord, (float)layer)); \ - builtin_ret_t##4 color = \ - __ockl_image_sample##builtin_ret_postfix##_##dimension##Da(tex, samp, \ - arrayCoord); \ - return __clc_cast_from_##builtin_ret_t##4_to_##elem_t(color); \ - } - -#define _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ - _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_BUILTIN(dimension, elem_t, \ - elem_t_mangled, coord_t, \ - coord_mangled, vec_size, \ - float,) - -#define _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ - _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_BUILTIN(dimension, elem_t, \ - elem_t_mangled, coord_t, \ - coord_mangled, vec_size, \ - half, h) - -#define _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN( \ - dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ - _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(dimension, elem_t, \ - elem_t_mangled, coord_t, \ - coord_mangled, vec_size) - -// Float -// return 1 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, float, f, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, float, f, float2, Dv2_f, 4) -// return 2 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, float2, Dv2_f, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, float2, Dv2_f, float2, Dv2_f, 4) -// return 4 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, float4, Dv4_f, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, float4, Dv4_f, float2, Dv2_f, 4) - -// Half -#ifdef cl_khr_fp16 -// return 1 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, half, DF16_, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, half, DF16_, float2, Dv2_f, 4) -// return 2 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, half2, Dv2_DF16_, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, half2, Dv2_DF16_, float2, Dv2_f, 4) -// return 4 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, half4, Dv4_DF16_, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, half4, Dv4_DF16_, float2, Dv2_f, 4) -#endif - -// Int -// return 1 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, int, i, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, int, i, float2, Dv2_f, 4) -// return 2 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, int2, Dv2_i, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, int2, Dv2_i, float2, Dv2_f, 4) -// return 4 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, int4, Dv4_i, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, int4, Dv4_i, float2, Dv2_f, 4) - -// Unsigned Int -// return 1 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, uint, j, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, uint, j, float2, Dv2_f, 4) -// return 2 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, uint2, Dv2_j, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, uint2, Dv2_j, float2, Dv2_f, 4) -// return 4 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, uint4, Dv4_j, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, uint4, Dv4_j, float2, Dv2_f, 4) - -// Short -// return 1 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, short, s, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, short, s, float2, Dv2_f, 4) -// return 2 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, short2, Dv2_s, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, short2, Dv2_s, float2, Dv2_f, 4) -// return 4 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, short4, Dv4_s, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, short4, Dv4_s, float2, Dv2_f, 4) - -// Unsigned Short -// return 1 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, ushort, t, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, ushort, t, float2, Dv2_f, 4) -// return 2 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, ushort2, Dv2_t, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, ushort2, Dv2_t, float2, Dv2_f, 4) -// return 4 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, ushort4, Dv4_t, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, ushort4, Dv4_t, float2, Dv2_f, 4) - -// Char -// return 1 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, char, a, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, char, a, float2, Dv2_f, 4) -// return 2 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, char2, Dv2_a, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, char2, Dv2_a, float2, Dv2_f, 4) -// return 4 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, char4, Dv4_a, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, char4, Dv4_a, float2, Dv2_f, 4) - -// Unsigned Char -// return 1 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, uchar, h, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, uchar, h, float2, Dv2_f, 4) -// return 2 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, uchar2, Dv2_h, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, uchar2, Dv2_h, float2, Dv2_f, 4) -// return 4 channel color data -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, uchar4, Dv4_h, float, f, 2) -_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, uchar4, Dv4_h, float2, Dv2_f, 4) - -#undef _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN -#undef _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN -#undef _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN - -#undef _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_BUILTIN #undef _CLC_DEFINE_SAMPLEDIMAGE_BINDLESS_READ_BUILTIN -#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_BUILTIN -#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_BUILTIN - #undef _CLC_DEFINE_IMAGE_BINDLESS_WRITE_BUILTIN #undef _CLC_DEFINE_IMAGE_BINDLESS_FETCH_BUILTIN -#undef _CLC_ARRAY_COORD_PARAMS_1D -#undef _CLC_ARRAY_COORD_PARAMS_2D - #undef _CLC_CONST_AS - #undef _CLC_MANGLE_FUNC_IMG_HANDLE diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/images/image_array.cl b/libclc/libspirv/lib/amdgcn-amdhsa/images/image_array.cl new file mode 100644 index 0000000000000..62efb4257a95e --- /dev/null +++ b/libclc/libspirv/lib/amdgcn-amdhsa/images/image_array.cl @@ -0,0 +1,525 @@ +#include "image_common.h" +#include + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif + +#define _CLC_ARRAY_COORD_PARAMS_1D(coord, layer) coord, layer +#define _CLC_ARRAY_COORD_PARAMS_2D(coord, layer) coord.x, coord.y, layer, 0 + +// Declare ockl functions/builtins that we link from the ROCm device libs. +float4 __ockl_image_load_1Da(_CLC_CONST_AS unsigned int *tex, int2 coord); +float4 __ockl_image_load_2Da(_CLC_CONST_AS unsigned int *tex, int4 coord); +half4 __ockl_image_loadh_1Da(_CLC_CONST_AS unsigned int *tex, int2 coord); +half4 __ockl_image_loadh_2Da(_CLC_CONST_AS unsigned int *tex, int4 coord); + +void __ockl_image_store_1Da(_CLC_CONST_AS unsigned int *tex, int2 coord, + float4 color); +void __ockl_image_store_2Da(_CLC_CONST_AS unsigned int *tex, int4 coord, + float4 color); +void __ockl_image_storeh_1Da(_CLC_CONST_AS unsigned int *tex, int2 coord, + half4 color); +void __ockl_image_storeh_2Da(_CLC_CONST_AS unsigned int *tex, int4 coord, + half4 color); + +float4 __ockl_image_sample_1Da(_CLC_CONST_AS unsigned int *tex, + _CLC_CONST_AS unsigned int *samp, float2 coord); +float4 __ockl_image_sample_2Da(_CLC_CONST_AS unsigned int *tex, + _CLC_CONST_AS unsigned int *samp, float4 coord); +half4 __ockl_image_sampleh_1Da(_CLC_CONST_AS unsigned int *tex, + _CLC_CONST_AS unsigned int *samp, float2 coord); +half4 __ockl_image_sampleh_2Da(_CLC_CONST_AS unsigned int *tex, + _CLC_CONST_AS unsigned int *samp, float4 coord); + +// +// IMAGE ARRAYS +// + +// Read Ops + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + builtin_ret_t, builtin_ret_postfix) \ + _CLC_DEF elem_t _CLC_MANGLE_FUNC_IMG_HANDLE(23, __spirv_ImageArrayFetch, \ + I##elem_t_mangled, \ + coord_mangled##ET_T0_T1_i)( \ + ulong imageHandle, coord_t coord, int layer) { \ + _CLC_CONST_AS unsigned int *tex = \ + (_CLC_CONST_AS unsigned int *)imageHandle; \ + int##vec_size arrayCoord = \ + (int##vec_size)(_CLC_ARRAY_COORD_PARAMS_##dimension##D(coord, layer)); \ + builtin_ret_t##4 color = \ + __ockl_image_load##builtin_ret_postfix##_##dimension##Da(tex, \ + arrayCoord); \ + return __clc_cast_from_##builtin_ret_t##4_to_##elem_t(color); \ + } + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + float, ) + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + half, h) + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) + +// Float +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, float, f, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, float, f, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, float2, Dv2_f, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, float2, Dv2_f, int2, Dv2_i, + 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, float4, Dv4_f, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, float4, Dv4_f, int2, Dv2_i, + 4) + +// Half +#ifdef cl_khr_fp16 +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, half, DF16_, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, half, DF16_, int2, Dv2_i, + 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, half2, Dv2_DF16_, int, i, + 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, half2, Dv2_DF16_, int2, + Dv2_i, 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, half4, Dv4_DF16_, int, i, + 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, half4, Dv4_DF16_, int2, + Dv2_i, 4) +#endif + +// Int +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, int, i, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, int, i, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, int2, Dv2_i, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, int2, Dv2_i, int2, Dv2_i, + 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, int4, Dv4_i, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, int4, Dv4_i, int2, Dv2_i, + 4) + +// Unsigned Int +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, uint, j, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, uint, j, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, uint2, Dv2_j, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, uint2, Dv2_j, int2, Dv2_i, + 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(1, uint4, Dv4_j, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN(2, uint4, Dv4_j, int2, Dv2_i, + 4) + +// Short +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, short, s, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, short, s, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, short2, Dv2_s, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, short2, Dv2_s, int2, Dv2_i, + 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, short4, Dv4_s, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, short4, Dv4_s, int2, Dv2_i, + 4) + +// Unsigned Short +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, ushort, t, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, ushort, t, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, ushort2, Dv2_t, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, ushort2, Dv2_t, int2, + Dv2_i, 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(1, ushort4, Dv4_t, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN(2, ushort4, Dv4_t, int2, + Dv2_i, 4) + +// Char +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, char, a, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, char, a, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, char2, Dv2_a, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, char2, Dv2_a, int2, Dv2_i, + 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, char4, Dv4_a, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, char4, Dv4_a, int2, Dv2_i, + 4) + +// Unsigned Char +// return 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, uchar, h, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, uchar, h, int2, Dv2_i, 4) +// return 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, uchar2, Dv2_h, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, uchar2, Dv2_h, int2, Dv2_i, + 4) +// return 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(1, uchar4, Dv4_h, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN(2, uchar4, Dv4_h, int2, Dv2_i, + 4) + +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_8_BUILTIN +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_16_BUILTIN +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_32_BUILTIN + +// Write Ops + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + builtin_ret_t, builtin_ret_postfix) \ + _CLC_DEF void _CLC_MANGLE_FUNC_IMG_HANDLE( \ + 23, __spirv_ImageArrayWrite, I, \ + coord_mangled##elem_t_mangled##EvT_T0_iT1_)( \ + ulong imageHandle, coord_t coord, int layer, elem_t color) { \ + _CLC_CONST_AS unsigned int *tex = \ + (_CLC_CONST_AS unsigned int *)imageHandle; \ + int##vec_size arrayCoord = \ + (int##vec_size)(_CLC_ARRAY_COORD_PARAMS_##dimension##D(coord, layer)); \ + builtin_ret_t##4 outColor = \ + __clc_cast_from_##elem_t##_to_##builtin_ret_t##4(color); \ + __ockl_image_store##builtin_ret_postfix##_##dimension##Da(tex, arrayCoord, \ + outColor); \ + } + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + float, ) + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + half, h) + +#define _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) + +// Float +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, float, f, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, float, f, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, float2, Dv2_f, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, float2, Dv2_f, int2, Dv2_i, + 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, float4, Dv4_f, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, float4, Dv4_f, int2, Dv2_i, + 4) + +// Half +#ifdef cl_khr_fp16 +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, half, DF16_, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, half, DF16_, int2, Dv2_i, + 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, half2, Dv2_DF16_, int, i, + 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, half2, Dv2_DF16_, int2, + Dv2_i, 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, half4, Dv4_DF16_, int, i, + 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, half4, Dv4_DF16_, int2, + Dv2_i, 4) +#endif + +// Int +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, int, i, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, int, i, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, int2, Dv2_i, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, int2, Dv2_i, int2, Dv2_i, + 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, int4, Dv4_i, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, int4, Dv4_i, int2, Dv2_i, + 4) + +// Unsigned Int +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, uint, j, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, uint, j, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, uint2, Dv2_j, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, uint2, Dv2_j, int2, Dv2_i, + 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(1, uint4, Dv4_j, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN(2, uint4, Dv4_j, int2, Dv2_i, + 4) + +// Short +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, short, s, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, short, s, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, short2, Dv2_s, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, short2, Dv2_s, int2, Dv2_i, + 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, short4, Dv4_s, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, short4, Dv4_s, int2, Dv2_i, + 4) + +// Unsigned Short +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, ushort, t, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, ushort, t, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, ushort2, Dv2_t, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, ushort2, Dv2_t, int2, + Dv2_i, 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(1, ushort4, Dv4_t, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN(2, ushort4, Dv4_t, int2, + Dv2_i, 4) + +// Char +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, char, a, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, char, a, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, char2, Dv2_a, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, char2, Dv2_a, int2, Dv2_i, + 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, char4, Dv4_a, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, char4, Dv4_a, int2, Dv2_i, + 4) + +// Unsigned Char +// write 1-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, uchar, h, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, uchar, h, int2, Dv2_i, 4) +// write 2-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, uchar2, Dv2_h, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, uchar2, Dv2_h, int2, Dv2_i, + 4) +// write 4-channel color data +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(1, uchar4, Dv4_h, int, i, 2) +_CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN(2, uchar4, Dv4_h, int2, Dv2_i, + 4) + +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_8_BUILTIN +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_16_BUILTIN +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_32_BUILTIN + +// +// SAMPLED IMAGE ARRAYS +// + +// Read Ops + +#define _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + builtin_ret_t, builtin_ret_postfix) \ + _CLC_DEF elem_t _CLC_MANGLE_FUNC_IMG_HANDLE(22, __spirv_ImageArrayRead, \ + I##elem_t_mangled, \ + coord_mangled##ET_T0_T1_i)( \ + ulong imageHandle, coord_t coord, int layer) { \ + _CLC_CONST_AS unsigned int *tex = \ + (_CLC_CONST_AS unsigned int *)imageHandle; \ + _CLC_CONST_AS unsigned int *samp = tex + SAMPLER_OBJECT_OFFSET_DWORD; \ + float##vec_size arrayCoord = (float##vec_size)( \ + _CLC_ARRAY_COORD_PARAMS_##dimension##D(coord, (float)layer)); \ + builtin_ret_t##4 color = \ + __ockl_image_sample##builtin_ret_postfix##_##dimension##Da( \ + tex, samp, arrayCoord); \ + return __clc_cast_from_##builtin_ret_t##4_to_##elem_t(color); \ + } + +#define _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + float, ) + +#define _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size, \ + half, h) + +#define _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) \ + _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN( \ + dimension, elem_t, elem_t_mangled, coord_t, coord_mangled, vec_size) + +// Float +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, float, f, float, f, + 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, float, f, float2, + Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, float2, Dv2_f, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, float2, Dv2_f, + float2, Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, float4, Dv4_f, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, float4, Dv4_f, + float2, Dv2_f, 4) + +// Half +#ifdef cl_khr_fp16 +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, half, DF16_, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, half, DF16_, float2, + Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, half2, Dv2_DF16_, + float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, half2, Dv2_DF16_, + float2, Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, half4, Dv4_DF16_, + float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, half4, Dv4_DF16_, + float2, Dv2_f, 4) +#endif + +// Int +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, int, i, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, int, i, float2, + Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, int2, Dv2_i, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, int2, Dv2_i, float2, + Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, int4, Dv4_i, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, int4, Dv4_i, float2, + Dv2_f, 4) + +// Unsigned Int +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, uint, j, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, uint, j, float2, + Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, uint2, Dv2_j, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, uint2, Dv2_j, float2, + Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(1, uint4, Dv4_j, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN(2, uint4, Dv4_j, float2, + Dv2_f, 4) + +// Short +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, short, s, float, f, + 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, short, s, float2, + Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, short2, Dv2_s, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, short2, Dv2_s, + float2, Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, short4, Dv4_s, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, short4, Dv4_s, + float2, Dv2_f, 4) + +// Unsigned Short +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, ushort, t, float, f, + 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, ushort, t, float2, + Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, ushort2, Dv2_t, + float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, ushort2, Dv2_t, + float2, Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(1, ushort4, Dv4_t, + float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN(2, ushort4, Dv4_t, + float2, Dv2_f, 4) + +// Char +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, char, a, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, char, a, float2, + Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, char2, Dv2_a, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, char2, Dv2_a, float2, + Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, char4, Dv4_a, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, char4, Dv4_a, float2, + Dv2_f, 4) + +// Unsigned Char +// return 1 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, uchar, h, float, f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, uchar, h, float2, + Dv2_f, 4) +// return 2 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, uchar2, Dv2_h, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, uchar2, Dv2_h, float2, + Dv2_f, 4) +// return 4 channel color data +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(1, uchar4, Dv4_h, float, + f, 2) +_CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN(2, uchar4, Dv4_h, float2, + Dv2_f, 4) + +#undef _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_8_BUILTIN +#undef _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_16_BUILTIN +#undef _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_32_BUILTIN + +#undef _CLC_DEFINE_SAMPLEDIMAGE_ARRAY_BINDLESS_READ_BUILTIN + +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_WRITE_BUILTIN +#undef _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_FETCH_BUILTIN + +#undef _CLC_ARRAY_COORD_PARAMS_1D +#undef _CLC_ARRAY_COORD_PARAMS_2D + +#undef _CLC_CONST_AS +#undef _CLC_MANGLE_FUNC_IMG_HANDLE diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/images/image_common.cl b/libclc/libspirv/lib/amdgcn-amdhsa/images/image_common.cl new file mode 100644 index 0000000000000..af2734bb04ce4 --- /dev/null +++ b/libclc/libspirv/lib/amdgcn-amdhsa/images/image_common.cl @@ -0,0 +1,159 @@ +#include "image_common.h" + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif + +// From +// https://github.com/ROCm/clr/tree/amd-staging/hipamd/include/hip/amd_detail/texture_fetch_functions.h +_CLC_CONST_AS const unsigned int SAMPLER_OBJECT_OFFSET_DWORD = 12; + +// Using the builtin as_type() and as_typen() functions to reinterpret types. +// The restriction being is that element "type"s need to be of the same size. +#define _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_VEC3(vec4_elem_t, to_t) \ + _CLC_DEF to_t##3 __clc_cast_from_##vec4_elem_t##4_to_##to_t##3( \ + vec4_elem_t##4 from) { \ + vec4_elem_t##3 casted = as_##vec4_elem_t##3(from); \ + return as_##to_t##3(casted); \ + } +#define _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_VEC2(vec4_elem_t, to_t) \ + _CLC_DEF to_t##2 __clc_cast_from_##vec4_elem_t##4_to_##to_t##2( \ + vec4_elem_t##4 from) { \ + vec4_elem_t##4 casted = as_##vec4_elem_t##4(from); \ + return as_##to_t##2((vec4_elem_t##2)(casted.x, casted.y)); \ + } +#define _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_SCALAR(vec4_elem_t, to_t) \ + _CLC_DEF to_t __clc_cast_from_##vec4_elem_t##4_to_##to_t( \ + vec4_elem_t##4 from) { \ + vec4_elem_t##4 casted = as_##vec4_elem_t##4(from); \ + return as_##to_t(casted.x); \ + } +#define _CLC_DEFINE_BUILTIN_CAST_VEC3_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DEF vec4_elem_t##4 __clc_cast_from_##from_t##3_to_##vec4_elem_t##4( \ + from_t##3 from) { \ + vec4_elem_t##3 casted = as_##vec4_elem_t##3(from); \ + return as_##vec4_elem_t##4(casted); \ + } +#define _CLC_DEFINE_BUILTIN_CAST_VEC2_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DEF vec4_elem_t##4 __clc_cast_from_##from_t##2_to_##vec4_elem_t##4( \ + from_t##2 from) { \ + vec4_elem_t##2 casted = as_##vec4_elem_t##2(from); \ + return (vec4_elem_t##4)(casted.x, casted.y, 0, 0); \ + } +#define _CLC_DEFINE_BUILTIN_CAST_SCALAR_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DEF vec4_elem_t##4 __clc_cast_from_##from_t##_to_##vec4_elem_t##4( \ + from_t from) { \ + vec4_elem_t casted = as_##vec4_elem_t(from); \ + return (vec4_elem_t##4)(casted, 0, 0, 0); \ + } + +// Generic casts between builtin types. +#define _CLC_DEFINE_CAST_VEC4(vec4_elem_t, to_t) \ + _CLC_DEF to_t##4 __clc_cast_from_##vec4_elem_t##4_to_##to_t##4( \ + vec4_elem_t##4 from) { \ + return (to_t##4)(from.x, from.y, from.z, from.w); \ + } +#define _CLC_DEFINE_CAST_VEC4_TO_VEC3(vec4_elem_t, to_t) \ + _CLC_DEF to_t##3 __clc_cast_from_##vec4_elem_t##4_to_##to_t##3( \ + vec4_elem_t##4 from) { \ + return (to_t##3)(from.x, from.y, from.z); \ + } +#define _CLC_DEFINE_CAST_VEC4_TO_VEC2(vec4_elem_t, to_t) \ + _CLC_DEF to_t##2 __clc_cast_from_##vec4_elem_t##4_to_##to_t##2( \ + vec4_elem_t##4 from) { \ + return (to_t##2)(from.x, from.y); \ + } +#define _CLC_DEFINE_CAST_VEC4_TO_SCALAR(vec4_elem_t, to_t) \ + _CLC_DEF to_t __clc_cast_from_##vec4_elem_t##4_to_##to_t( \ + vec4_elem_t##4 from) { \ + return (to_t)from.x; \ + } +#define _CLC_DEFINE_CAST_VEC3_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DEF vec4_elem_t##4 __clc_cast_from_##from_t##3_to_##vec4_elem_t##4( \ + from_t##3 from) { \ + return (vec4_elem_t##4)(from.x, from.y, from.z, 0); \ + } +#define _CLC_DEFINE_CAST_VEC2_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DEF vec4_elem_t##4 __clc_cast_from_##from_t##2_to_##vec4_elem_t##4( \ + from_t##2 from) { \ + return (vec4_elem_t##4)(from.x, from.y, 0, 0); \ + } +#define _CLC_DEFINE_CAST_SCALAR_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DEF vec4_elem_t##4 __clc_cast_from_##from_t##_to_##vec4_elem_t##4( \ + from_t from) { \ + return (vec4_elem_t##4)(from, 0, 0, 0); \ + } + +// Helpers to extract N channel(s) from a four-channel (RGBA/XYZW) color type. + +#define _CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC4(from_t, to_t) \ + _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_VEC3(from_t, to_t) \ + _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_VEC2(from_t, to_t) \ + _CLC_DEFINE_BUILTIN_CAST_VEC4_TO_SCALAR(from_t, to_t) \ + _CLC_DEFINE_BUILTIN_CAST_VEC2_TO_VEC4(from_t, to_t) \ + _CLC_DEFINE_BUILTIN_CAST_VEC3_TO_VEC4(from_t, to_t) \ + _CLC_DEFINE_BUILTIN_CAST_SCALAR_TO_VEC4(from_t, to_t) + +#define _CLC_DEFINE_EXTRACT_COLOR_HELPERS(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC4(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC4_TO_VEC3(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC4_TO_VEC2(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC4_TO_SCALAR(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC2_TO_VEC4(from_t, to_t) \ + _CLC_DEFINE_CAST_VEC3_TO_VEC4(from_t, to_t) \ + _CLC_DEFINE_CAST_SCALAR_TO_VEC4(from_t, to_t) + +// Define casts between supported builtin types for image color + +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(float, float) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(float, int) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(int, float) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(float, uint) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(uint, float) +#ifdef cl_khr_fp16 +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(half, half) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(half, short) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(short, half) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(half, ushort) +_CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS(ushort, half) +#endif + +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(float, short) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(short, float) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(float, ushort) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(ushort, float) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(float, char) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(char, float) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(float, uchar) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(uchar, float) +#ifdef cl_khr_fp16 +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(float, half) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(half, float) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(half, int) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(int, half) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(half, uint) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(uint, half) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(half, char) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(char, half) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(half, uchar) +_CLC_DEFINE_EXTRACT_COLOR_HELPERS(uchar, half) +#endif + +#undef _CLC_DEFINE_EXTRACT_COLOR_HELPERS +#undef _CLC_DEFINE_EXTRACT_SAME_SIZE_COLOR_HELPERS + +#undef _CLC_DEFINE_CAST_SCALAR_TO_VEC4 +#undef _CLC_DEFINE_CAST_VEC2_TO_VEC4 +#undef _CLC_DEFINE_CAST_VEC3_TO_VEC4 +#undef _CLC_DEFINE_CAST_VEC4_TO_SCALAR +#undef _CLC_DEFINE_CAST_VEC4_TO_VEC3 +#undef _CLC_DEFINE_CAST_VEC4_TO_VEC2 +#undef _CLC_DEFINE_CAST_VEC4 +#undef _CLC_DEFINE_BUILTIN_CAST_SCALAR_TO_VEC4 +#undef _CLC_DEFINE_BUILTIN_CAST_VEC2_TO_VEC4 +#undef _CLC_DEFINE_BUILTIN_VEC3_TO_VEC4 +#undef _CLC_DEFINE_BUILTIN_VEC4_TO_SCALAR +#undef _CLC_DEFINE_BUILTIN_VEC4_TO_VEC3 +#undef _CLC_DEFINE_BUILTIN_VEC4_TO_VEC2 +#undef _CLC_DEFINE_BUILTIN_VEC4 diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/images/image_common.h b/libclc/libspirv/lib/amdgcn-amdhsa/images/image_common.h new file mode 100644 index 0000000000000..eb32fb77d5c2d --- /dev/null +++ b/libclc/libspirv/lib/amdgcn-amdhsa/images/image_common.h @@ -0,0 +1,164 @@ +#ifndef CLC_SPIRV_IMAGE_COMMON +#define CLC_SPIRV_IMAGE_COMMON + +#include + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif + +#ifdef _WIN32 +#define _CLC_MANGLE_FUNC_IMG_HANDLE(namelength, name, prefix, postfix) \ + _Z##namelength##name##prefix##y##postfix +#else +#define _CLC_MANGLE_FUNC_IMG_HANDLE(namelength, name, prefix, postfix) \ + _Z##namelength##name##prefix##m##postfix +#endif + +// The ockl functions/builtins we link against from the ROCm device libs expect +// resources to reside in constant address space. +#if __clang_major__ >= 8 +#define _CLC_CONST_AS __constant +#elif __clang_major__ >= 7 +#define _CLC_CONST_AS __attribute__((address_space(4))) +#else +#define _CLC_CONST_AS __attribute__((address_space(2))) +#endif + +// From +// https://github.com/ROCm/clr/tree/amd-staging/hipamd/include/hip/amd_detail/texture_fetch_functions.h +// defined in "image_common.cl" +extern _CLC_CONST_AS const unsigned int SAMPLER_OBJECT_OFFSET_DWORD; + +// Helpers for casting between two builitin vector types and/or scalar types. + +// Using the builtin as_type() and as_typen() functions to reinterpret types. +// The restriction being is that element "type"s need to be of the same size. +#define _CLC_DECLARE_BUILTIN_CAST_VEC4_TO_VEC3(vec4_elem_t, to_t) \ + _CLC_DECL to_t##3 __clc_cast_from_##vec4_elem_t##4_to_##to_t##3( \ + vec4_elem_t##4 from); + +#define _CLC_DECLARE_BUILTIN_CAST_VEC4_TO_VEC2(vec4_elem_t, to_t) \ + _CLC_DECL to_t##2 __clc_cast_from_##vec4_elem_t##4_to_##to_t##2( \ + vec4_elem_t##4 from); + +#define _CLC_DECLARE_BUILTIN_CAST_VEC4_TO_SCALAR(vec4_elem_t, to_t) \ + _CLC_DECL to_t __clc_cast_from_##vec4_elem_t##4_to_##to_t( \ + vec4_elem_t##4 from); + +#define _CLC_DECLARE_BUILTIN_CAST_VEC3_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DECL vec4_elem_t##4 __clc_cast_from_##from_t##3_to_##vec4_elem_t##4( \ + from_t##3 from); + +#define _CLC_DECLARE_BUILTIN_CAST_VEC2_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DECL vec4_elem_t##4 __clc_cast_from_##from_t##2_to_##vec4_elem_t##4( \ + from_t##2 from); + +#define _CLC_DECLARE_BUILTIN_CAST_SCALAR_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DECL vec4_elem_t##4 __clc_cast_from_##from_t##_to_##vec4_elem_t##4( \ + from_t from); + +// Generic casts between builtin types. +#define _CLC_DECLARE_CAST_VEC4(vec4_elem_t, to_t) \ + _CLC_DECL to_t##4 __clc_cast_from_##vec4_elem_t##4_to_##to_t##4( \ + vec4_elem_t##4 from); + +#define _CLC_DECLARE_CAST_VEC4_TO_VEC3(vec4_elem_t, to_t) \ + _CLC_DECL to_t##3 __clc_cast_from_##vec4_elem_t##4_to_##to_t##3( \ + vec4_elem_t##4 from); + +#define _CLC_DECLARE_CAST_VEC4_TO_VEC2(vec4_elem_t, to_t) \ + _CLC_DECL to_t##2 __clc_cast_from_##vec4_elem_t##4_to_##to_t##2( \ + vec4_elem_t##4 from); + +#define _CLC_DECLARE_CAST_VEC4_TO_SCALAR(vec4_elem_t, to_t) \ + _CLC_DECL to_t __clc_cast_from_##vec4_elem_t##4_to_##to_t( \ + vec4_elem_t##4 from); + +#define _CLC_DECLARE_CAST_VEC3_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DECL vec4_elem_t##4 __clc_cast_from_##from_t##3_to_##vec4_elem_t##4( \ + from_t##3 from); + +#define _CLC_DECLARE_CAST_VEC2_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DECL vec4_elem_t##4 __clc_cast_from_##from_t##2_to_##vec4_elem_t##4( \ + from_t##2 from); + +#define _CLC_DECLARE_CAST_SCALAR_TO_VEC4(from_t, vec4_elem_t) \ + _CLC_DECL vec4_elem_t##4 __clc_cast_from_##from_t##_to_##vec4_elem_t##4( \ + from_t from); + +// Helpers to extract N channel(s) from a four-channel (RGBA/XYZW) color type. + +#define _CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(from_t, to_t) \ + _CLC_DECLARE_CAST_VEC4(from_t, to_t) \ + _CLC_DECLARE_BUILTIN_CAST_VEC4_TO_VEC3(from_t, to_t) \ + _CLC_DECLARE_BUILTIN_CAST_VEC4_TO_VEC2(from_t, to_t) \ + _CLC_DECLARE_BUILTIN_CAST_VEC4_TO_SCALAR(from_t, to_t) \ + _CLC_DECLARE_BUILTIN_CAST_VEC2_TO_VEC4(from_t, to_t) \ + _CLC_DECLARE_BUILTIN_CAST_VEC3_TO_VEC4(from_t, to_t) \ + _CLC_DECLARE_BUILTIN_CAST_SCALAR_TO_VEC4(from_t, to_t) + +#define _CLC_DECLARE_EXTRACT_COLOR_HELPERS(from_t, to_t) \ + _CLC_DECLARE_CAST_VEC4(from_t, to_t) \ + _CLC_DECLARE_CAST_VEC4_TO_VEC3(from_t, to_t) \ + _CLC_DECLARE_CAST_VEC4_TO_VEC2(from_t, to_t) \ + _CLC_DECLARE_CAST_VEC4_TO_SCALAR(from_t, to_t) \ + _CLC_DECLARE_CAST_VEC2_TO_VEC4(from_t, to_t) \ + _CLC_DECLARE_CAST_VEC3_TO_VEC4(from_t, to_t) \ + _CLC_DECLARE_CAST_SCALAR_TO_VEC4(from_t, to_t) + +// Define casts between supported builtin types for image color + +_CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(float, float) +_CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(float, int) +_CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(int, float) +_CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(float, uint) +_CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(uint, float) +#ifdef cl_khr_fp16 +_CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(half, half) +_CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(half, short) +_CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(short, half) +_CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(half, ushort) +_CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS(ushort, half) +#endif + +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(half, float) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(float, short) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(short, float) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(float, ushort) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(ushort, float) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(float, char) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(char, float) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(float, uchar) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(uchar, float) +#ifdef cl_khr_fp16 +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(float, half) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(half, int) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(int, half) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(half, uint) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(uint, half) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(half, char) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(char, half) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(half, uchar) +_CLC_DECLARE_EXTRACT_COLOR_HELPERS(uchar, half) +#endif + +#undef _CLC_DECLARE_EXTRACT_COLOR_HELPERS +#undef _CLC_DECLARE_EXTRACT_SAME_SIZE_COLOR_HELPERS + +#undef _CLC_DECLARE_CAST_SCALAR_TO_VEC4 +#undef _CLC_DECLARE_CAST_VEC2_TO_VEC4 +#undef _CLC_DECLARE_CAST_VEC3_TO_VEC4 +#undef _CLC_DECLARE_CAST_VEC4_TO_SCALAR +#undef _CLC_DECLARE_CAST_VEC4_TO_VEC3 +#undef _CLC_DECLARE_CAST_VEC4_TO_VEC2 +#undef _CLC_DECLARE_CAST_VEC4 +#undef _CLC_DECLARE_BUILTIN_CAST_SCALAR_TO_VEC4 +#undef _CLC_DECLARE_BUILTIN_CAST_VEC2_TO_VEC4 +#undef _CLC_DECLARE_BUILTIN_VEC3_TO_VEC4 +#undef _CLC_DECLARE_BUILTIN_VEC4_TO_SCALAR +#undef _CLC_DECLARE_BUILTIN_VEC4_TO_VEC3 +#undef _CLC_DECLARE_BUILTIN_VEC4_TO_VEC2 +#undef _CLC_DECLARE_BUILTIN_VEC4 + +#endif // CLC_SPIRV_IMAGE_COMMON From 729dd323be8d46c4607dc70c2b9ab967f50ae7e6 Mon Sep 17 00:00:00 2001 From: Georgi Mirazchiyski Date: Mon, 27 Jan 2025 12:09:56 +0000 Subject: [PATCH 03/10] Fix and cleanup some tests Signed-off-by: Georgi Mirazchiyski --- .../bindless_images/array/read_write_1d_subregion.cpp | 4 ++-- sycl/test-e2e/bindless_images/read_sampled.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/test-e2e/bindless_images/array/read_write_1d_subregion.cpp b/sycl/test-e2e/bindless_images/array/read_write_1d_subregion.cpp index 47d84af028293..6e8dd040c1a75 100644 --- a/sycl/test-e2e/bindless_images/array/read_write_1d_subregion.cpp +++ b/sycl/test-e2e/bindless_images/array/read_write_1d_subregion.cpp @@ -1,7 +1,7 @@ // REQUIRES: aspect-ext_oneapi_image_array -// RUN: %if !any-device-is-hip %{ %{build} -o %t.out %} -// RUN: %if !any-device-is-hip %{ %{run} %t.out %} +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out #include #include diff --git a/sycl/test-e2e/bindless_images/read_sampled.cpp b/sycl/test-e2e/bindless_images/read_sampled.cpp index e0b3c3a109a25..15af2b4800cc1 100644 --- a/sycl/test-e2e/bindless_images/read_sampled.cpp +++ b/sycl/test-e2e/bindless_images/read_sampled.cpp @@ -1,7 +1,7 @@ // REQUIRES: aspect-ext_oneapi_bindless_images // UNSUPPORTED: hip || level_zero -// UNSUPPORTED-INTENDED: Returning non-FP values from fetching fails on HIP. +// UNSUPPORTED-INTENDED: Returning non-FP values from sampling fails on HIP. // Also, the feature is not fully implemented in the Level Zero stack. // RUN: %{build} -o %t.out @@ -20,8 +20,8 @@ #include "helpers/common.hpp" #include "helpers/sampling.hpp" #include +#include #include -#include #include #include @@ -326,7 +326,7 @@ bool runTests(sycl::range<1> dims, sycl::range<1> localSize, float offset, syclexp::bindless_image_sampler samp(addrMode, normMode, filtMode); #if defined(VERBOSE_LV2) || defined(VERBOSE_LV3) - util::printTestInfo(samp, offset); + sampling_helpers::printTestInfo(samp, offset); #endif bindless_helpers::printTestName("Running 1D short", dims, @@ -483,7 +483,7 @@ bool runTests(sycl::range<2> dims, sycl::range<2> localSize, float offset, syclexp::bindless_image_sampler samp(addrMode, normMode, filtMode); #if defined(VERBOSE_LV2) || defined(VERBOSE_LV3) - util::printTestInfo(samp, offset); + sampling_helpers::printTestInfo(samp, offset); #endif bindless_helpers::printTestName("Running 2D short", dims, From c90f064fa9fa24a2841fda52ccde148990a0857d Mon Sep 17 00:00:00 2001 From: Georgi Mirazchiyski Date: Thu, 30 Jan 2025 15:41:48 +0000 Subject: [PATCH 04/10] Apply a few test fixes due to changes lost on rebase --- .../bindless_images/array/read_write_unsampled_array.cpp | 1 + sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp | 2 ++ .../bindless_images/user_types/user_types_common.hpp | 5 ++++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp b/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp index 7f0db5936adce..2d82d917d7ca5 100644 --- a/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp +++ b/sycl/test-e2e/bindless_images/array/read_write_unsampled_array.cpp @@ -1,3 +1,4 @@ +// REQUIRES: linux // REQUIRES: aspect-ext_oneapi_image_array // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp b/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp index 2b63336c907f5..7070e1c49e823 100644 --- a/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp +++ b/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp @@ -1,4 +1,6 @@ +// REQUIRES: aspect-ext_oneapi_cubemap // REQUIRES: aspect-ext_oneapi_cubemap_seamless_filtering +// REQUIRES: build-and-run-mode // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/bindless_images/user_types/user_types_common.hpp b/sycl/test-e2e/bindless_images/user_types/user_types_common.hpp index fde9860d1c03c..0a233c2a5cb48 100644 --- a/sycl/test-e2e/bindless_images/user_types/user_types_common.hpp +++ b/sycl/test-e2e/bindless_images/user_types/user_types_common.hpp @@ -1,12 +1,15 @@ // This file includes common definitions and functions that are shared between // multiple tests that use user-defined types +#ifdef VERBOSE_PRINT #include +#endif +#include #include #include -void printTestName(std::string name) { +static inline void printTestName([[maybe_unused]] std::string name) { #ifdef VERBOSE_PRINT std::cout << name << std::endl; #endif From 2f2ff6e13d1c2dd1f29c248f4feec342387a0d53 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 5 Feb 2025 16:59:00 +0000 Subject: [PATCH 05/10] Diagnose aspect failure. Signed-off-by: JackAKirk --- sycl/source/detail/device_impl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 3d820f2c782cd..dbb557b3afbf6 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -610,7 +610,7 @@ bool device_impl::has(aspect Aspect) const { case aspect::ext_oneapi_cubemap: { ur_bool_t support = false; bool call_successful = - getAdapter()->call_nocheck( + getAdapter()->call( MDevice, UR_DEVICE_INFO_CUBEMAP_SUPPORT_EXP, sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; @@ -618,7 +618,7 @@ bool device_impl::has(aspect Aspect) const { case aspect::ext_oneapi_cubemap_seamless_filtering: { ur_bool_t support = false; bool call_successful = - getAdapter()->call_nocheck( + getAdapter()->call( MDevice, UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP, sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; From b737bfa882d6b7c219fd31a4471e6e3eda848615 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Wed, 5 Feb 2025 17:36:58 +0000 Subject: [PATCH 06/10] use run-unfiltered-devices to fix tests. Signed-off-by: JackAKirk --- sycl/source/detail/device_impl.cpp | 4 ++-- sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp | 2 +- .../bindless_images/examples/example_5_sample_cubemap.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index dbb557b3afbf6..3d820f2c782cd 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -610,7 +610,7 @@ bool device_impl::has(aspect Aspect) const { case aspect::ext_oneapi_cubemap: { ur_bool_t support = false; bool call_successful = - getAdapter()->call( + getAdapter()->call_nocheck( MDevice, UR_DEVICE_INFO_CUBEMAP_SUPPORT_EXP, sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; @@ -618,7 +618,7 @@ bool device_impl::has(aspect Aspect) const { case aspect::ext_oneapi_cubemap_seamless_filtering: { ur_bool_t support = false; bool call_successful = - getAdapter()->call( + getAdapter()->call_nocheck( MDevice, UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP, sizeof(ur_bool_t), &support, nullptr) == UR_RESULT_SUCCESS; return call_successful && support; diff --git a/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp b/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp index a82428a47b3cf..164fe420b2d3e 100644 --- a/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp +++ b/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp @@ -2,7 +2,7 @@ // REQUIRES: aspect-ext_oneapi_cubemap_seamless_filtering // RUN: %{build} -o %t.out -// RUN: %{run} %t.out +// RUN: %{run-unfiltered-devices} %t.out #include "../user_types/user_types_common.hpp" #include diff --git a/sycl/test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp b/sycl/test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp index da8303c978d3d..c855829eab05a 100644 --- a/sycl/test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp +++ b/sycl/test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp @@ -1,6 +1,6 @@ // REQUIRES: aspect-ext_oneapi_cubemap // RUN: %{build} -o %t.out -// RUN: %{run} %t.out +// RUN: %{run-unfiltered-devices} %t.out #include #include From 3207b6debe5ddb268d659b50ae3ec37aec499b84 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 6 Feb 2025 10:14:04 +0000 Subject: [PATCH 07/10] Try build-and-run-mode Signed-off-by: JackAKirk --- sycl/test-e2e/bindless_images/cubemap/cubemap_unsampled.cpp | 1 + .../bindless_images/examples/example_5_sample_cubemap.cpp | 2 ++ 2 files changed, 3 insertions(+) diff --git a/sycl/test-e2e/bindless_images/cubemap/cubemap_unsampled.cpp b/sycl/test-e2e/bindless_images/cubemap/cubemap_unsampled.cpp index 5f2ef75faafd1..80ad06ab738d2 100644 --- a/sycl/test-e2e/bindless_images/cubemap/cubemap_unsampled.cpp +++ b/sycl/test-e2e/bindless_images/cubemap/cubemap_unsampled.cpp @@ -1,4 +1,5 @@ // REQUIRES: aspect-ext_oneapi_cubemap +// REQUIRES: build-and-run-mode // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} %t.out diff --git a/sycl/test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp b/sycl/test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp index c855829eab05a..3e585eba2be9f 100644 --- a/sycl/test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp +++ b/sycl/test-e2e/bindless_images/examples/example_5_sample_cubemap.cpp @@ -1,4 +1,6 @@ // REQUIRES: aspect-ext_oneapi_cubemap +// REQUIRES: build-and-run-mode + // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} %t.out From 203d1e8aa7cba81cbdd1797a5dbd76fb8e70b38d Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 6 Feb 2025 11:20:50 +0000 Subject: [PATCH 08/10] Fix broken SPIR cubemap compilation Signed-off-by: JackAKirk --- sycl/include/sycl/ext/oneapi/bindless_images.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp index d88b9c7ea1751..2e3a372cb0b35 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp @@ -1144,7 +1144,7 @@ DataT sample_cubemap(const sampled_image_handle &imageHandle [[maybe_unused]], const sycl::float3 &dirVec [[maybe_unused]]) { [[maybe_unused]] constexpr size_t NDims = 2; -#ifdef __SYCL_DEVICE_ONLY__ +#if defined(__SYCL_DEVICE_ONLY__) && !defined(__SPIR__) if constexpr (detail::is_recognized_standard_type()) { return __invoke__ImageReadCubemap( CONVERT_HANDLE_TO_SAMPLED_IMAGE(imageHandle.raw_handle, NDims), dirVec); @@ -1159,7 +1159,8 @@ DataT sample_cubemap(const sampled_image_handle &imageHandle [[maybe_unused]], dirVec)); } #else - assert(false); // Bindless images not yet implemented on host + assert(false); // Bindless images not yet implemented on host/ cubemap + // implementation broken/incomplete on SPIR backend #endif } From f67dd1c0733d633cc1ffa30b09630887235665c7 Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 6 Feb 2025 12:15:38 +0000 Subject: [PATCH 09/10] use build-and-run-mode sampled_cubemap.cpp Signed-off-by: JackAKirk --- sycl/include/sycl/ext/oneapi/bindless_images.hpp | 5 ++--- sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp index 2e3a372cb0b35..d88b9c7ea1751 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp @@ -1144,7 +1144,7 @@ DataT sample_cubemap(const sampled_image_handle &imageHandle [[maybe_unused]], const sycl::float3 &dirVec [[maybe_unused]]) { [[maybe_unused]] constexpr size_t NDims = 2; -#if defined(__SYCL_DEVICE_ONLY__) && !defined(__SPIR__) +#ifdef __SYCL_DEVICE_ONLY__ if constexpr (detail::is_recognized_standard_type()) { return __invoke__ImageReadCubemap( CONVERT_HANDLE_TO_SAMPLED_IMAGE(imageHandle.raw_handle, NDims), dirVec); @@ -1159,8 +1159,7 @@ DataT sample_cubemap(const sampled_image_handle &imageHandle [[maybe_unused]], dirVec)); } #else - assert(false); // Bindless images not yet implemented on host/ cubemap - // implementation broken/incomplete on SPIR backend + assert(false); // Bindless images not yet implemented on host #endif } diff --git a/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp b/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp index 164fe420b2d3e..23051ec0850ef 100644 --- a/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp +++ b/sycl/test-e2e/bindless_images/cubemap/cubemap_sampled.cpp @@ -1,5 +1,6 @@ // REQUIRES: aspect-ext_oneapi_cubemap // REQUIRES: aspect-ext_oneapi_cubemap_seamless_filtering +// REQUIRES: build-and-run-mode // RUN: %{build} -o %t.out // RUN: %{run-unfiltered-devices} %t.out From d78fdfec67475e804cc00aec8812894c4ef8e03e Mon Sep 17 00:00:00 2001 From: JackAKirk Date: Thu, 6 Feb 2025 15:20:25 +0000 Subject: [PATCH 10/10] Fix comment typo Signed-off-by: JackAKirk --- libclc/libspirv/lib/amdgcn-amdhsa/images/image_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/images/image_common.h b/libclc/libspirv/lib/amdgcn-amdhsa/images/image_common.h index eb32fb77d5c2d..cc818f5086e27 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/images/image_common.h +++ b/libclc/libspirv/lib/amdgcn-amdhsa/images/image_common.h @@ -30,7 +30,7 @@ // defined in "image_common.cl" extern _CLC_CONST_AS const unsigned int SAMPLER_OBJECT_OFFSET_DWORD; -// Helpers for casting between two builitin vector types and/or scalar types. +// Helpers for casting between two builtin vector types and/or scalar types. // Using the builtin as_type() and as_typen() functions to reinterpret types. // The restriction being is that element "type"s need to be of the same size.