Skip to content

Commit 83bbea9

Browse files
[SYCL][Bindless][Doc] Add support for cubemaps (#12996)
Add cubemap support: - Allocation and freeing of cubemapped images - Unsampled fetching and writing, and sampled reading - Device queries for cubemap support - Testing for both unsampled and sampled cubemap examples - Update the spec with cubemap support Remove `const` and `&` qualifiers from spec and implementation for handle parameters in `write_xxx` functions. Corresponding UR PR: oneapi-src/unified-runtime#1433 --------- Co-authored-by: Przemek Malon <[email protected]>
1 parent c18cf38 commit 83bbea9

File tree

20 files changed

+1168
-40
lines changed

20 files changed

+1168
-40
lines changed

libclc/ptx-nvidiacl/libspirv/images/image.cl

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,11 @@ pixelf32 as_pixelf32(int4 v) { return as_float4(v); }
216216
return (to_t##2)((to_t)from.x, (to_t)from.y); \
217217
}
218218

219+
#define _DEFINE_VEC4_TO_SINGLE_CAST(from_t, to_t) \
220+
inline to_t cast_##from_t##4_to_##to_t(from_t##4 from) { \
221+
return (to_t)from[0]; \
222+
}
223+
219224
#define _DEFINE_CAST(from_t, to_t) \
220225
inline to_t cast_##from_t##_to_##to_t(from_t from) { return (to_t)from; }
221226

@@ -278,6 +283,17 @@ _DEFINE_VEC4_TO_VEC2_CAST(float, half)
278283
_DEFINE_VEC4_TO_VEC2_CAST(int, uint)
279284
_DEFINE_VEC4_TO_VEC2_CAST(short, ushort)
280285

286+
_DEFINE_VEC4_TO_SINGLE_CAST(int, int)
287+
_DEFINE_VEC4_TO_SINGLE_CAST(uint, uint)
288+
_DEFINE_VEC4_TO_SINGLE_CAST(float, float)
289+
_DEFINE_VEC4_TO_SINGLE_CAST(short, short)
290+
_DEFINE_VEC4_TO_SINGLE_CAST(short, char)
291+
_DEFINE_VEC4_TO_SINGLE_CAST(int, short)
292+
_DEFINE_VEC4_TO_SINGLE_CAST(int, char)
293+
_DEFINE_VEC4_TO_SINGLE_CAST(uint, ushort)
294+
_DEFINE_VEC4_TO_SINGLE_CAST(uint, uchar)
295+
_DEFINE_VEC4_TO_SINGLE_CAST(float, half)
296+
281297
_DEFINE_VEC2_CAST(int, float)
282298
_DEFINE_VEC2_CAST(short, char)
283299
_DEFINE_VEC2_CAST(short, uchar)
@@ -332,6 +348,8 @@ _DEFINE_READ_3D_PIXELF(16, clamp)
332348
#undef _DEFINE_VEC4_CAST
333349
#undef _DEFINE_VEC2_CAST
334350
#undef _DEFINE_CAST
351+
#undef _DEFINE_VEC4_TO_VEC2_CAST
352+
#undef _DEFINE_VEC4_TO_SINGLE_CAST
335353
#undef _DEFINE_READ_1D_PIXELF
336354
#undef _DEFINE_READ_2D_PIXELF
337355
#undef _DEFINE_READ_3D_PIXELF
@@ -3645,3 +3663,112 @@ _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_ALL(half, DF16_, f, 16)
36453663
#undef _NVVM_FUNC
36463664
#undef NVVM_FUNC
36473665
#undef MANGLE_FUNC_IMG_HANDLE_HELPER
3666+
3667+
3668+
// <--- CUBEMAP --->
3669+
// Cubemap surfaces are handled through the layered images implementation
3670+
3671+
// Define functions to call intrinsic
3672+
float4
3673+
__nvvm_tex_cube_v4f32_f32(unsigned long, float, float,
3674+
float) __asm("__clc_llvm_nvvm_tex_cube_v4f32_f32");
3675+
int4 __nvvm_tex_cube_v4i32_f32(unsigned long, float, float, float) __asm(
3676+
"__clc_llvm_nvvm_tex_cube_v4i32_f32");
3677+
uint4 __nvvm_tex_cube_v4j32_f32(unsigned long, float, float, float) __asm(
3678+
"__clc_llvm_nvvm_tex_cube_v4j32_f32");
3679+
3680+
#define COORD_INPUT float x, float y, float z
3681+
#define COORD_THUNK_PARAMS x, y, z
3682+
#define COORD_PARAMS coord.x, coord.y, coord.z
3683+
3684+
// Macro to generate cubemap fetches to call intrinsics
3685+
// float4, int4, uint4 already defined above
3686+
#define _CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN( \
3687+
elem_t, fetch_elem_t, vec_size, fetch_vec_size, coord_input, coord_params) \
3688+
elem_t __nvvm_tex_cube_##vec_size##_f32(unsigned long imageHandle, \
3689+
coord_input) { \
3690+
fetch_elem_t a = \
3691+
__nvvm_tex_cube_##fetch_vec_size##_f32(imageHandle, coord_params); \
3692+
return cast_##fetch_elem_t##_to_##elem_t(a); \
3693+
}
3694+
3695+
// Float
3696+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(float, float4, f32, v4f32, COORD_INPUT, COORD_THUNK_PARAMS)
3697+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(float2, float4, v2f32, v4f32, COORD_INPUT, COORD_THUNK_PARAMS)
3698+
// Int
3699+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(int, int4, i32, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
3700+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(int2, int4, v2i32, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
3701+
// Uint
3702+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(uint, uint4, j32, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
3703+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(uint2, uint4, v2j32, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
3704+
// Short
3705+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(short, int4, i16, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
3706+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(short2, int4, v2i16, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
3707+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(short4, int4, v4i16, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
3708+
// UShort
3709+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(ushort, uint4, t16, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
3710+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(ushort2, uint4, v2t16, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
3711+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(ushort4, uint4, v4t16, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
3712+
// Char
3713+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(char, int4, i8, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
3714+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(char2, int4, v2i8, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
3715+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(char4, int4, v4i8, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
3716+
// UChar
3717+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(uchar, uint4, h8, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
3718+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(uchar2, uint4, v2h8, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
3719+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(uchar4, uint4, v4h8, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
3720+
// Half
3721+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(half, float4, f16, v4f32, COORD_INPUT, COORD_THUNK_PARAMS)
3722+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(half2, float4, v2f16, v4f32, COORD_INPUT, COORD_THUNK_PARAMS)
3723+
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(half4, float4, v4f16, v4f32, COORD_INPUT, COORD_THUNK_PARAMS)
3724+
3725+
// Macro to generate the mangled names for cubemap fetches
3726+
#define _CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(elem_t, elem_t_mangled, \
3727+
vec_size, coord_mangled, \
3728+
coord_input, coord_params) \
3729+
_CLC_DEF elem_t MANGLE_FUNC_IMG_HANDLE( \
3730+
26, __spirv_ImageSampleCubemap, I, \
3731+
elem_t_mangled##coord_mangled##ET0_T_T1_)(ulong imageHandle, \
3732+
coord_input) { \
3733+
return __nvvm_tex_cube_##vec_size##_f32(imageHandle, coord_params); \
3734+
}
3735+
3736+
// Float
3737+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(float, f, f32, Dv3_f, float3 coord, COORD_PARAMS)
3738+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(float2, Dv2_f, v2f32, Dv3_f, float3 coord, COORD_PARAMS)
3739+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(float4, Dv4_f, v4f32, Dv3_f, float3 coord, COORD_PARAMS)
3740+
// Int
3741+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(int, i, i32, Dv3_f, float3 coord, COORD_PARAMS)
3742+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(int2, Dv2_i, v2i32, Dv3_f, float3 coord, COORD_PARAMS)
3743+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(int4, Dv4_i, v4i32, Dv3_f, float3 coord, COORD_PARAMS)
3744+
// Uint
3745+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uint, j, j32, Dv3_f, float3 coord, COORD_PARAMS)
3746+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uint2, Dv2_j, v2j32, Dv3_f, float3 coord, COORD_PARAMS)
3747+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uint4, Dv4_j, v4j32, Dv3_f, float3 coord, COORD_PARAMS)
3748+
// Short
3749+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(short, s, i16, Dv3_f, float3 coord, COORD_PARAMS)
3750+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(short2, Dv2_s, v2i16, Dv3_f, float3 coord, COORD_PARAMS)
3751+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(short4, Dv4_s, v4i16, Dv3_f, float3 coord, COORD_PARAMS)
3752+
// UShort
3753+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(ushort, t, t16, Dv3_f, float3 coord, COORD_PARAMS)
3754+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(ushort2, Dv2_t, v2t16, Dv3_f, float3 coord, COORD_PARAMS)
3755+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(ushort4, Dv4_t, v4t16, Dv3_f, float3 coord, COORD_PARAMS)
3756+
// Char
3757+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(char, a, i8, Dv3_f, float3 coord, COORD_PARAMS)
3758+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(char2, Dv2_a, v2i8, Dv3_f, float3 coord, COORD_PARAMS)
3759+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(char4, Dv4_a, v4i8, Dv3_f, float3 coord, COORD_PARAMS)
3760+
// UChar
3761+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uchar, h, h8, Dv3_f, float3 coord, COORD_PARAMS)
3762+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uchar2, Dv2_h, v2h8, Dv3_f, float3 coord, COORD_PARAMS)
3763+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uchar4, Dv4_h, v4h8, Dv3_f, float3 coord, COORD_PARAMS)
3764+
// Half
3765+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(half, DF16_, f16, Dv3_f, float3 coord, COORD_PARAMS)
3766+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(half2, Dv2_DF16_, v2f16, Dv3_f, float3 coord, COORD_PARAMS)
3767+
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(half4, Dv4_DF16_, v4f16, Dv3_f, float3 coord, COORD_PARAMS)
3768+
3769+
3770+
#undef _CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN
3771+
#undef COORD_INPUT
3772+
#undef COORD_THUNK_PARAMS
3773+
#undef COORD_PARAMS
3774+
#undef _CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN

libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,3 +625,28 @@ entry:
625625
%1 = tail call <4 x i32> @__clc_struct32_to_vector({i32,i32,i32,i32} %0)
626626
ret <4 x i32> %1
627627
}
628+
629+
; <--- CUBEMAP --->
630+
declare {float,float,float,float} @llvm.nvvm.tex.unified.cube.v4f32.f32(i64, float, float, float)
631+
define <4 x float> @__clc_llvm_nvvm_tex_cube_v4f32_f32(i64 %img, float %x, float %y, float %z) nounwind alwaysinline {
632+
entry:
633+
%0 = tail call {float,float,float,float} @llvm.nvvm.tex.unified.cube.v4f32.f32(i64 %img, float %x, float %y, float %z);
634+
%1 = tail call <4 x float> @__clc_structf32_to_vector({float,float,float,float} %0)
635+
ret <4 x float> %1
636+
}
637+
638+
declare {i32,i32,i32,i32} @llvm.nvvm.tex.unified.cube.v4s32.f32(i64, float, float, float)
639+
define <4 x i32> @__clc_llvm_nvvm_tex_cube_v4i32_f32(i64 %img, float %x, float %y, float %z) nounwind alwaysinline {
640+
entry:
641+
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.tex.unified.cube.v4s32.f32(i64 %img, float %x, float %y, float %z);
642+
%1 = tail call <4 x i32> @__clc_struct32_to_vector({i32,i32,i32,i32} %0)
643+
ret <4 x i32> %1
644+
}
645+
646+
declare {i32,i32,i32,i32} @llvm.nvvm.tex.unified.cube.v4u32.f32(i64, float, float, float)
647+
define <4 x i32> @__clc_llvm_nvvm_tex_cube_v4j32_f32(i64 %img, float %x, float %y, float %z) nounwind alwaysinline {
648+
entry:
649+
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.tex.unified.cube.v4u32.f32(i64 %img, float %x, float %y, float %z);
650+
%1 = tail call <4 x i32> @__clc_struct32_to_vector({i32,i32,i32,i32} %0)
651+
ret <4 x i32> %1
652+
}

llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ def AspectExt_oneapi_interop_semaphore_export : Aspect<"ext_oneapi_interop_semap
6161
def AspectExt_oneapi_mipmap : Aspect<"ext_oneapi_mipmap">;
6262
def AspectExt_oneapi_mipmap_anisotropy : Aspect<"ext_oneapi_mipmap_anisotropy">;
6363
def AspectExt_oneapi_mipmap_level_reference : Aspect<"ext_oneapi_mipmap_level_reference">;
64+
def AspectExt_oneapi_cubemap : Aspect<"ext_oneapi_cubemap">;
65+
def AspectExt_oneapi_cubemap_seamless_filtering : Aspect<"ext_oneapi_cubemap_seamless_filtering">;
6466
def AspectExt_intel_esimd : Aspect<"ext_intel_esimd">;
6567
def AspectExt_oneapi_ballot_group : Aspect<"ext_oneapi_ballot_group">;
6668
def AspectExt_oneapi_fixed_size_group : Aspect<"ext_oneapi_fixed_size_group">;
@@ -121,7 +123,8 @@ def : TargetInfo<"__TestAspectList",
121123
AspectExt_oneapi_bindless_images_shared_usm, AspectExt_oneapi_bindless_images_1d_usm, AspectExt_oneapi_bindless_images_2d_usm,
122124
AspectExt_oneapi_interop_memory_import, AspectExt_oneapi_interop_memory_export,
123125
AspectExt_oneapi_interop_semaphore_import, AspectExt_oneapi_interop_semaphore_export,
124-
AspectExt_oneapi_mipmap, AspectExt_oneapi_mipmap_anisotropy, AspectExt_oneapi_mipmap_level_reference, AspectExt_intel_esimd,
126+
AspectExt_oneapi_mipmap, AspectExt_oneapi_mipmap_anisotropy, AspectExt_oneapi_mipmap_level_reference, AspectExt_oneapi_cubemap,
127+
AspectExt_oneapi_cubemap_seamless_filtering, AspectExt_intel_esimd,
125128
AspectExt_oneapi_ballot_group, AspectExt_oneapi_fixed_size_group, AspectExt_oneapi_opportunistic_group,
126129
AspectExt_oneapi_tangle_group, AspectExt_intel_matrix, AspectExt_oneapi_is_composite, AspectExt_oneapi_is_component,
127130
AspectExt_oneapi_graph, AspectExt_intel_fpga_task_sequence, AspectExt_oneapi_limited_graph,

0 commit comments

Comments
 (0)