Skip to content

[SYCL][Bindless][Doc] Add support for cubemaps #12996

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
63d57d3
[SYCL][Bindless][Doc] Add support for cubemaps
Seanst98 Oct 4, 2023
9ddb9a1
Merge branch 'sycl' into sean/cubemaps-MVP
Seanst98 Mar 13, 2024
4adb1ab
Merge branch 'sycl' into sean/cubemaps-MVP
Seanst98 Mar 18, 2024
0cd6568
Merge branch 'sycl' into sean/cubemaps-MVP
Seanst98 Mar 18, 2024
32e8a9e
Fix conflict markers in doc
Seanst98 Mar 18, 2024
728c5d5
Merge branch 'sycl' into sean/cubemaps-MVP
Seanst98 Mar 19, 2024
5a4287b
Merge branch 'sycl' into sean/cubemaps-MVP
Seanst98 Mar 20, 2024
f1dcfbb
Merge branch 'sycl' into sean/cubemaps-MVP
Seanst98 Mar 28, 2024
0c338a1
Addressing feedback
Seanst98 Mar 28, 2024
54716ad
Merge branch 'sycl' into sean/cubemaps-MVP
Seanst98 Mar 29, 2024
fa309aa
Addressing feedback
Seanst98 Mar 29, 2024
6970497
Addressing feedback
Seanst98 Mar 29, 2024
43371d4
Merge branch 'sycl' into sean/cubemaps-MVP
Seanst98 Apr 1, 2024
95ce972
Addressing feedback
Seanst98 Apr 1, 2024
52d984f
Addressing feedback
Seanst98 Apr 1, 2024
9d8ab76
Merge branch 'sycl' into sean/cubemaps-MVP
Seanst98 Apr 1, 2024
3e8eef5
Merge branch 'sycl' into sean/cubemaps-MVP
Seanst98 Apr 2, 2024
6f62c2b
Replace device aspect call_nocheck with call
Seanst98 Apr 2, 2024
09b3dd5
Merge branch 'sycl' into sean/cubemaps-MVP
Seanst98 Apr 2, 2024
22d01c0
Update UR CMakeLists
Seanst98 Apr 2, 2024
d50d017
Merge branch 'sycl' into sean/cubemaps-MVP
przemektmalon Apr 8, 2024
5059329
Update UR TAG
przemektmalon Apr 8, 2024
094773e
Revert aspect queries to call_nocheck
przemektmalon Apr 9, 2024
df14618
Merge branch 'sycl' into sean/cubemaps-MVP
przemektmalon Apr 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 127 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/images/image.cl
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,11 @@ pixelf32 as_pixelf32(int4 v) { return as_float4(v); }
return (to_t##2)((to_t)from.x, (to_t)from.y); \
}

#define _DEFINE_VEC4_TO_SINGLE_CAST(from_t, to_t) \
inline to_t cast_##from_t##4_to_##to_t(from_t##4 from) { \
return (to_t)from[0]; \
}

#define _DEFINE_CAST(from_t, to_t) \
inline to_t cast_##from_t##_to_##to_t(from_t from) { return (to_t)from; }

Expand Down Expand Up @@ -278,6 +283,17 @@ _DEFINE_VEC4_TO_VEC2_CAST(float, half)
_DEFINE_VEC4_TO_VEC2_CAST(int, uint)
_DEFINE_VEC4_TO_VEC2_CAST(short, ushort)

_DEFINE_VEC4_TO_SINGLE_CAST(int, int)
_DEFINE_VEC4_TO_SINGLE_CAST(uint, uint)
_DEFINE_VEC4_TO_SINGLE_CAST(float, float)
_DEFINE_VEC4_TO_SINGLE_CAST(short, short)
_DEFINE_VEC4_TO_SINGLE_CAST(short, char)
_DEFINE_VEC4_TO_SINGLE_CAST(int, short)
_DEFINE_VEC4_TO_SINGLE_CAST(int, char)
_DEFINE_VEC4_TO_SINGLE_CAST(uint, ushort)
_DEFINE_VEC4_TO_SINGLE_CAST(uint, uchar)
_DEFINE_VEC4_TO_SINGLE_CAST(float, half)

_DEFINE_VEC2_CAST(int, float)
_DEFINE_VEC2_CAST(short, char)
_DEFINE_VEC2_CAST(short, uchar)
Expand Down Expand Up @@ -332,6 +348,8 @@ _DEFINE_READ_3D_PIXELF(16, clamp)
#undef _DEFINE_VEC4_CAST
#undef _DEFINE_VEC2_CAST
#undef _DEFINE_CAST
#undef _DEFINE_VEC4_TO_VEC2_CAST
#undef _DEFINE_VEC4_TO_SINGLE_CAST
#undef _DEFINE_READ_1D_PIXELF
#undef _DEFINE_READ_2D_PIXELF
#undef _DEFINE_READ_3D_PIXELF
Expand Down Expand Up @@ -3645,3 +3663,112 @@ _CLC_DEFINE_IMAGE_ARRAY_BINDLESS_BUILTIN_ALL(half, DF16_, f, 16)
#undef _NVVM_FUNC
#undef NVVM_FUNC
#undef MANGLE_FUNC_IMG_HANDLE_HELPER


// <--- CUBEMAP --->
// Cubemap surfaces are handled through the layered images implementation

// Define functions to call intrinsic
float4
__nvvm_tex_cube_v4f32_f32(unsigned long, float, float,
float) __asm("__clc_llvm_nvvm_tex_cube_v4f32_f32");
int4 __nvvm_tex_cube_v4i32_f32(unsigned long, float, float, float) __asm(
"__clc_llvm_nvvm_tex_cube_v4i32_f32");
uint4 __nvvm_tex_cube_v4j32_f32(unsigned long, float, float, float) __asm(
"__clc_llvm_nvvm_tex_cube_v4j32_f32");

#define COORD_INPUT float x, float y, float z
#define COORD_THUNK_PARAMS x, y, z
#define COORD_PARAMS coord.x, coord.y, coord.z

// Macro to generate cubemap fetches to call intrinsics
// float4, int4, uint4 already defined above
#define _CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN( \
elem_t, fetch_elem_t, vec_size, fetch_vec_size, coord_input, coord_params) \
elem_t __nvvm_tex_cube_##vec_size##_f32(unsigned long imageHandle, \
coord_input) { \
fetch_elem_t a = \
__nvvm_tex_cube_##fetch_vec_size##_f32(imageHandle, coord_params); \
return cast_##fetch_elem_t##_to_##elem_t(a); \
}

// Float
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(float, float4, f32, v4f32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(float2, float4, v2f32, v4f32, COORD_INPUT, COORD_THUNK_PARAMS)
// Int
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(int, int4, i32, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(int2, int4, v2i32, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
// Uint
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(uint, uint4, j32, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(uint2, uint4, v2j32, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
// Short
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(short, int4, i16, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(short2, int4, v2i16, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(short4, int4, v4i16, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
// UShort
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(ushort, uint4, t16, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(ushort2, uint4, v2t16, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(ushort4, uint4, v4t16, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
// Char
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(char, int4, i8, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(char2, int4, v2i8, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(char4, int4, v4i8, v4i32, COORD_INPUT, COORD_THUNK_PARAMS)
// UChar
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(uchar, uint4, h8, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(uchar2, uint4, v2h8, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(uchar4, uint4, v4h8, v4j32, COORD_INPUT, COORD_THUNK_PARAMS)
// Half
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(half, float4, f16, v4f32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(half2, float4, v2f16, v4f32, COORD_INPUT, COORD_THUNK_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN(half4, float4, v4f16, v4f32, COORD_INPUT, COORD_THUNK_PARAMS)

// Macro to generate the mangled names for cubemap fetches
#define _CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(elem_t, elem_t_mangled, \
vec_size, coord_mangled, \
coord_input, coord_params) \
_CLC_DEF elem_t MANGLE_FUNC_IMG_HANDLE( \
26, __spirv_ImageSampleCubemap, I, \
elem_t_mangled##coord_mangled##ET0_T_T1_)(ulong imageHandle, \
coord_input) { \
return __nvvm_tex_cube_##vec_size##_f32(imageHandle, coord_params); \
}

// Float
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(float, f, f32, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(float2, Dv2_f, v2f32, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(float4, Dv4_f, v4f32, Dv3_f, float3 coord, COORD_PARAMS)
// Int
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(int, i, i32, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(int2, Dv2_i, v2i32, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(int4, Dv4_i, v4i32, Dv3_f, float3 coord, COORD_PARAMS)
// Uint
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uint, j, j32, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uint2, Dv2_j, v2j32, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uint4, Dv4_j, v4j32, Dv3_f, float3 coord, COORD_PARAMS)
// Short
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(short, s, i16, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(short2, Dv2_s, v2i16, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(short4, Dv4_s, v4i16, Dv3_f, float3 coord, COORD_PARAMS)
// UShort
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(ushort, t, t16, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(ushort2, Dv2_t, v2t16, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(ushort4, Dv4_t, v4t16, Dv3_f, float3 coord, COORD_PARAMS)
// Char
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(char, a, i8, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(char2, Dv2_a, v2i8, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(char4, Dv4_a, v4i8, Dv3_f, float3 coord, COORD_PARAMS)
// UChar
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uchar, h, h8, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uchar2, Dv2_h, v2h8, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(uchar4, Dv4_h, v4h8, Dv3_f, float3 coord, COORD_PARAMS)
// Half
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(half, DF16_, f16, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(half2, Dv2_DF16_, v2f16, Dv3_f, float3 coord, COORD_PARAMS)
_CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN(half4, Dv4_DF16_, v4f16, Dv3_f, float3 coord, COORD_PARAMS)


#undef _CLC_DEFINE_CUBEMAP_BINDLESS_THUNK_READS_BUILTIN
#undef COORD_INPUT
#undef COORD_THUNK_PARAMS
#undef COORD_PARAMS
#undef _CLC_DEFINE_CUBEMAP_BINDLESS_READS_BUILTIN
25 changes: 25 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll
Original file line number Diff line number Diff line change
Expand Up @@ -625,3 +625,28 @@ entry:
%1 = tail call <4 x i32> @__clc_struct32_to_vector({i32,i32,i32,i32} %0)
ret <4 x i32> %1
}

; <--- CUBEMAP --->
declare {float,float,float,float} @llvm.nvvm.tex.unified.cube.v4f32.f32(i64, float, float, float)
define <4 x float> @__clc_llvm_nvvm_tex_cube_v4f32_f32(i64 %img, float %x, float %y, float %z) nounwind alwaysinline {
entry:
%0 = tail call {float,float,float,float} @llvm.nvvm.tex.unified.cube.v4f32.f32(i64 %img, float %x, float %y, float %z);
%1 = tail call <4 x float> @__clc_structf32_to_vector({float,float,float,float} %0)
ret <4 x float> %1
}

declare {i32,i32,i32,i32} @llvm.nvvm.tex.unified.cube.v4s32.f32(i64, float, float, float)
define <4 x i32> @__clc_llvm_nvvm_tex_cube_v4i32_f32(i64 %img, float %x, float %y, float %z) nounwind alwaysinline {
entry:
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.tex.unified.cube.v4s32.f32(i64 %img, float %x, float %y, float %z);
%1 = tail call <4 x i32> @__clc_struct32_to_vector({i32,i32,i32,i32} %0)
ret <4 x i32> %1
}

declare {i32,i32,i32,i32} @llvm.nvvm.tex.unified.cube.v4u32.f32(i64, float, float, float)
define <4 x i32> @__clc_llvm_nvvm_tex_cube_v4j32_f32(i64 %img, float %x, float %y, float %z) nounwind alwaysinline {
entry:
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.tex.unified.cube.v4u32.f32(i64 %img, float %x, float %y, float %z);
%1 = tail call <4 x i32> @__clc_struct32_to_vector({i32,i32,i32,i32} %0)
ret <4 x i32> %1
}
5 changes: 4 additions & 1 deletion llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ def AspectExt_oneapi_interop_semaphore_export : Aspect<"ext_oneapi_interop_semap
def AspectExt_oneapi_mipmap : Aspect<"ext_oneapi_mipmap">;
def AspectExt_oneapi_mipmap_anisotropy : Aspect<"ext_oneapi_mipmap_anisotropy">;
def AspectExt_oneapi_mipmap_level_reference : Aspect<"ext_oneapi_mipmap_level_reference">;
def AspectExt_oneapi_cubemap : Aspect<"ext_oneapi_cubemap">;
def AspectExt_oneapi_cubemap_seamless_filtering : Aspect<"ext_oneapi_cubemap_seamless_filtering">;
def AspectExt_intel_esimd : Aspect<"ext_intel_esimd">;
def AspectExt_oneapi_ballot_group : Aspect<"ext_oneapi_ballot_group">;
def AspectExt_oneapi_fixed_size_group : Aspect<"ext_oneapi_fixed_size_group">;
Expand Down Expand Up @@ -121,7 +123,8 @@ def : TargetInfo<"__TestAspectList",
AspectExt_oneapi_bindless_images_shared_usm, AspectExt_oneapi_bindless_images_1d_usm, AspectExt_oneapi_bindless_images_2d_usm,
AspectExt_oneapi_interop_memory_import, AspectExt_oneapi_interop_memory_export,
AspectExt_oneapi_interop_semaphore_import, AspectExt_oneapi_interop_semaphore_export,
AspectExt_oneapi_mipmap, AspectExt_oneapi_mipmap_anisotropy, AspectExt_oneapi_mipmap_level_reference, AspectExt_intel_esimd,
AspectExt_oneapi_mipmap, AspectExt_oneapi_mipmap_anisotropy, AspectExt_oneapi_mipmap_level_reference, AspectExt_oneapi_cubemap,
AspectExt_oneapi_cubemap_seamless_filtering, AspectExt_intel_esimd,
AspectExt_oneapi_ballot_group, AspectExt_oneapi_fixed_size_group, AspectExt_oneapi_opportunistic_group,
AspectExt_oneapi_tangle_group, AspectExt_intel_matrix, AspectExt_oneapi_is_composite, AspectExt_oneapi_is_component,
AspectExt_oneapi_graph, AspectExt_intel_fpga_task_sequence, AspectExt_oneapi_limited_graph,
Expand Down
Loading