Skip to content

Commit c964961

Browse files
[SYCL][CUDA][libclc] Change suld helper functions to return vectors (#6280)
The current implemenation of the suld helper functions in PTX libclc fail to link with the image implementation. This commit fixes this linking issue by changing the return type of these helpers to vectors rather than unnamed structs. Signed-off-by: Larsen, Steffen <[email protected]>
1 parent 10c19ae commit c964961

File tree

2 files changed

+138
-129
lines changed

2 files changed

+138
-129
lines changed

libclc/ptx-nvidiacl/libspirv/images/image.cl

+67-92
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,12 @@
1111

1212
#ifdef cl_khr_fp16
1313
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
14-
struct out_16 {
15-
short x, y, z, w;
16-
};
1714
#endif
1815

1916
#ifdef cl_khr_3d_image_writes
2017
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
2118
#endif
2219

23-
struct out_32 {
24-
int x, y, z, w;
25-
};
26-
2720
// CLC helpers
2821
int __clc__sampler_extract_normalized_coords_prop(int) __asm(
2922
"__clc__sampler_extract_normalized_coords_prop");
@@ -57,73 +50,79 @@ int __clc__sampled_image3d_unpack_sampler(__ocl_sampled_image3d_ro_t) __asm(
5750
"__clc__sampled_image_unpack_sampler");
5851

5952
// NVVM helpers
60-
struct out_16
61-
__nvvm_suld_1d_v4i16_trap_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i16_trap_s");
62-
struct out_16
53+
#ifdef cl_khr_fp16
54+
short4
55+
__nvvm_suld_1d_v4i16_trap_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i16_trap");
56+
short4
6357
__nvvm_suld_2d_v4i16_trap_s(long, int,
6458
int) __asm("__clc_llvm_nvvm_suld_2d_v4i16_trap");
65-
struct out_16
59+
short4
6660
__nvvm_suld_3d_v4i16_trap_s(long, int, int,
6761
int) __asm("__clc_llvm_nvvm_suld_3d_v4i16_trap");
68-
struct out_32
69-
__nvvm_suld_1d_v4i32_trap_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i32_trap");
70-
struct out_32
71-
__nvvm_suld_2d_v4i32_trap_s(long, int,
72-
int) __asm("__clc_llvm_nvvm_suld_2d_v4i32_trap");
73-
struct out_32
74-
__nvvm_suld_3d_v4i32_trap_s(long, int, int,
75-
int) __asm("__clc_llvm_nvvm_suld_3d_v4i32_trap");
7662

77-
struct out_16
63+
short4
7864
__nvvm_suld_1d_v4i16_clamp_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i16_clamp");
79-
struct out_16
65+
short4
8066
__nvvm_suld_2d_v4i16_clamp_s(long, int,
8167
int) __asm("__clc_llvm_nvvm_suld_2d_v4i16_clamp");
82-
struct out_16
68+
short4
8369
__nvvm_suld_3d_v4i16_clamp_s(long, int, int,
8470
int) __asm("__clc_llvm_nvvm_suld_3d_v4i16_clamp");
85-
struct out_32
86-
__nvvm_suld_1d_v4i32_clamp_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i32_clamp");
87-
struct out_32
88-
__nvvm_suld_2d_v4i32_clamp_s(long, int,
89-
int) __asm("__clc_llvm_nvvm_suld_2d_v4i32_clamp");
90-
struct out_32
91-
__nvvm_suld_3d_v4i32_clamp_s(long, int, int,
92-
int) __asm("__clc_llvm_nvvm_suld_3d_v4i32_clamp");
9371

94-
struct out_16
72+
short4
9573
__nvvm_suld_1d_v4i16_zero_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i16_zero");
96-
struct out_16
74+
short4
9775
__nvvm_suld_2d_v4i16_zero_s(long, int,
9876
int) __asm("__clc_llvm_nvvm_suld_2d_v4i16_zero");
99-
struct out_16
77+
short4
10078
__nvvm_suld_3d_v4i16_zero_s(long, int, int,
10179
int) __asm("__clc_llvm_nvvm_suld_3d_v4i16_zero");
102-
struct out_32
103-
__nvvm_suld_1d_v4i32_zero_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i32_zero");
104-
struct out_32
105-
__nvvm_suld_2d_v4i32_zero_s(long, int,
106-
int) __asm("__clc_llvm_nvvm_suld_2d_v4i32_zero");
107-
struct out_32
108-
__nvvm_suld_3d_v4i32_zero_s(long, int, int,
109-
int) __asm("__clc_llvm_nvvm_suld_3d_v4i32_zero");
11080

111-
struct out_16
81+
short4
11282
__nvvm_suld_1d_v4i16_clamp(read_only image1d_t,
11383
int) __asm("__clc_llvm_nvvm_suld_1d_v4i16_clamp");
114-
struct out_16
84+
short4
11585
__nvvm_suld_2d_v4i16_clamp(read_only image2d_t, int,
11686
int) __asm("__clc_llvm_nvvm_suld_2d_v4i16_clamp");
117-
struct out_16
87+
short4
11888
__nvvm_suld_3d_v4i16_clamp(read_only image3d_t, int, int,
11989
int) __asm("__clc_llvm_nvvm_suld_3d_v4i16_clamp");
120-
struct out_32
90+
#endif
91+
92+
int4
93+
__nvvm_suld_1d_v4i32_trap_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i32_trap");
94+
int4
95+
__nvvm_suld_2d_v4i32_trap_s(long, int,
96+
int) __asm("__clc_llvm_nvvm_suld_2d_v4i32_trap");
97+
int4
98+
__nvvm_suld_3d_v4i32_trap_s(long, int, int,
99+
int) __asm("__clc_llvm_nvvm_suld_3d_v4i32_trap");
100+
101+
int4
102+
__nvvm_suld_1d_v4i32_clamp_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i32_clamp");
103+
int4
104+
__nvvm_suld_2d_v4i32_clamp_s(long, int,
105+
int) __asm("__clc_llvm_nvvm_suld_2d_v4i32_clamp");
106+
int4
107+
__nvvm_suld_3d_v4i32_clamp_s(long, int, int,
108+
int) __asm("__clc_llvm_nvvm_suld_3d_v4i32_clamp");
109+
110+
int4
111+
__nvvm_suld_1d_v4i32_zero_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i32_zero");
112+
int4
113+
__nvvm_suld_2d_v4i32_zero_s(long, int,
114+
int) __asm("__clc_llvm_nvvm_suld_2d_v4i32_zero");
115+
int4
116+
__nvvm_suld_3d_v4i32_zero_s(long, int, int,
117+
int) __asm("__clc_llvm_nvvm_suld_3d_v4i32_zero");
118+
119+
int4
121120
__nvvm_suld_1d_v4i32_clamp(read_only image1d_t,
122121
int) __asm("__clc_llvm_nvvm_suld_1d_v4i32_clamp");
123-
struct out_32
122+
int4
124123
__nvvm_suld_2d_v4i32_clamp(read_only image2d_t, int,
125124
int) __asm("__clc_llvm_nvvm_suld_2d_v4i32_clamp");
126-
struct out_32
125+
int4
127126
__nvvm_suld_3d_v4i32_clamp(read_only image3d_t, int, int,
128127
int) __asm("__clc_llvm_nvvm_suld_3d_v4i32_clamp");
129128

@@ -199,11 +198,8 @@ typedef float4 pixelf32;
199198
typedef half fp16;
200199
typedef float fp32;
201200

202-
#define _DEFINE_OUT_TYPE(elem_t, elem_size) \
203-
inline elem_t##4 out_##elem_t(struct out_##elem_size out) { \
204-
return (elem_t##4)(as_##elem_t(out.x), as_##elem_t(out.y), \
205-
as_##elem_t(out.z), as_##elem_t(out.w)); \
206-
}
201+
pixelf16 as_pixelf16(short4 v) { return as_half4(v); }
202+
pixelf32 as_pixelf32(int4 v) { return as_float4(v); }
207203

208204
#define _DEFINE_VEC4_CAST(from_t, to_t) \
209205
inline to_t##4 cast_##from_t##4_to_##to_t##4(from_t##4 from) { \
@@ -223,44 +219,30 @@ typedef float fp32;
223219
return cast_##pixelf_base_t##_to_##to_t(from); \
224220
}
225221

226-
#define _DEFINE_OUT_PIXELF(pixelf_size, elem_t) \
227-
inline pixelf##pixelf_size out_pixelf##pixelf_size( \
228-
struct out_##pixelf_size out) { \
229-
return (pixelf##pixelf_size)(as_##elem_t(out.x), as_##elem_t(out.y), \
230-
as_##elem_t(out.z), as_##elem_t(out.w)); \
231-
}
232-
233222
#define _DEFINE_READ_1D_PIXELF(pixelf_size, cuda_address_mode) \
234223
pixelf##pixelf_size read_1d_##pixelf_size##_##cuda_address_mode(long image, \
235224
int x) { \
236-
struct out_##pixelf_size res = \
225+
return as_pixelf##pixelf_size( \
237226
__nvvm_suld_1d_v4i##pixelf_size##_##cuda_address_mode##_s( \
238-
image, x * sizeof(struct out_##pixelf_size)); \
239-
return out_pixelf##pixelf_size(res); \
227+
image, x * sizeof(pixelf##pixelf_size))); \
240228
}
241229

242230
#define _DEFINE_READ_2D_PIXELF(pixelf_size, cuda_address_mode) \
243231
pixelf##pixelf_size read_2d_##pixelf_size##_##cuda_address_mode( \
244232
long image, int x, int y) { \
245-
struct out_##pixelf_size res = \
233+
return as_pixelf##pixelf_size( \
246234
__nvvm_suld_2d_v4i##pixelf_size##_##cuda_address_mode##_s( \
247-
image, x * sizeof(struct out_##pixelf_size), y); \
248-
return out_pixelf##pixelf_size(res); \
235+
image, x * sizeof(pixelf##pixelf_size), y)); \
249236
}
250237

251238
#define _DEFINE_READ_3D_PIXELF(pixelf_size, cuda_address_mode) \
252239
pixelf##pixelf_size read_3d_##pixelf_size##_##cuda_address_mode( \
253240
long image, int x, int y, int z) { \
254-
struct out_##pixelf_size res = \
241+
return as_pixelf##pixelf_size( \
255242
__nvvm_suld_3d_v4i##pixelf_size##_##cuda_address_mode##_s( \
256-
image, x * sizeof(struct out_##pixelf_size), y, z); \
257-
return out_pixelf##pixelf_size(res); \
243+
image, x * sizeof(pixelf##pixelf_size), y, z)); \
258244
}
259245

260-
_DEFINE_OUT_TYPE(float, 32)
261-
_DEFINE_OUT_TYPE(int, 32)
262-
_DEFINE_OUT_TYPE(uint, 32)
263-
264246
_DEFINE_VEC4_CAST(float, int)
265247
_DEFINE_VEC4_CAST(int, float)
266248
_DEFINE_VEC4_CAST(float, uint)
@@ -276,8 +258,6 @@ _DEFINE_CAST(pixelf32, float4)
276258
_DEFINE_CAST(pixelf32, pixelf32)
277259
_DEFINE_CAST(float4, pixelf32)
278260

279-
_DEFINE_OUT_PIXELF(32, float)
280-
281261
_DEFINE_PIXELF_CAST(32, float4, int4)
282262
_DEFINE_PIXELF_CAST(32, float4, uint4)
283263

@@ -298,8 +278,6 @@ _DEFINE_CAST(half4, half4)
298278
_DEFINE_CAST(pixelf16, half4)
299279
_DEFINE_CAST(pixelf16, pixelf16)
300280
_DEFINE_CAST(half4, pixelf16)
301-
_DEFINE_OUT_TYPE(half, 16)
302-
_DEFINE_OUT_PIXELF(16, half)
303281
_DEFINE_READ_1D_PIXELF(16, trap)
304282
_DEFINE_READ_2D_PIXELF(16, trap)
305283
_DEFINE_READ_3D_PIXELF(16, trap)
@@ -311,11 +289,9 @@ _DEFINE_READ_2D_PIXELF(16, clamp)
311289
_DEFINE_READ_3D_PIXELF(16, clamp)
312290
#endif
313291

314-
#undef _DEFINE_OUT_TYPE
315292
#undef _DEFINE_VEC4_CAST
316293
#undef _DEFINE_VEC2_CAST
317294
#undef _DEFINE_CAST
318-
#undef _DEFINE_OUT_PIXELF
319295
#undef _DEFINE_READ_1D_PIXELF
320296
#undef _DEFINE_READ_2D_PIXELF
321297
#undef _DEFINE_READ_3D_PIXELF
@@ -327,15 +303,15 @@ _DEFINE_READ_3D_PIXELF(16, clamp)
327303
_CLC_DEF \
328304
elem_t##4 _Z17__spirv_ImageReadIDv4_##elem_t_mangled##14ocl_image1d_roiET_T0_T1_( \
329305
read_only image1d_t image, int x) { \
330-
return out_##elem_t( \
306+
return as_##elem_t##4( \
331307
__nvvm_suld_1d_v4i##elem_size##_clamp(image, x * sizeof(elem_t##4))); \
332308
}
333309

334310
#define _CLC_DEFINE_IMAGE2D_READ_BUILTIN(elem_t, elem_t_mangled, elem_size) \
335311
_CLC_DEF \
336312
elem_t##4 _Z17__spirv_ImageReadIDv4_##elem_t_mangled##14ocl_image2d_roDv2_iET_T0_T1_( \
337313
read_only image2d_t image, int2 coord) { \
338-
return out_##elem_t(__nvvm_suld_2d_v4i##elem_size##_clamp( \
314+
return as_##elem_t##4(__nvvm_suld_2d_v4i##elem_size##_clamp( \
339315
image, coord.x * sizeof(elem_t##4), coord.y)); \
340316
}
341317

@@ -344,7 +320,7 @@ _DEFINE_READ_3D_PIXELF(16, clamp)
344320
_CLC_DEF \
345321
elem_t##4 _Z17__spirv_ImageReadIDv4_##elem_t_mangled##14ocl_image3d_ro##coord_mangled##ET_T0_T1_( \
346322
read_only image3d_t image, int4 coord) { \
347-
return out_##elem_t(__nvvm_suld_3d_v4i##elem_size##_clamp( \
323+
return as_##elem_t##4(__nvvm_suld_3d_v4i##elem_size##_clamp( \
348324
image, coord.x * sizeof(elem_t##4), coord.y, coord.z)); \
349325
}
350326

@@ -463,7 +439,7 @@ float4 unnormalized_coord_3d(float4 coord, long image) {
463439
float coord, long image, int sampler) { \
464440
if (is_nearest_filter_mode(sampler)) { \
465441
int i = (int)__spirv_ocl_floor(coord); \
466-
return out_##elem_t( \
442+
return as_##elem_t##4( \
467443
__nvvm_suld_1d_v4i##elem_size##_##cuda_address_mode##_s( \
468444
image, i * sizeof(elem_t##4))); \
469445
} else { \
@@ -487,7 +463,7 @@ float4 unnormalized_coord_3d(float4 coord, long image) {
487463
if (is_nearest_filter_mode(sampler)) { \
488464
int i = (int)__spirv_ocl_floor(coord.x); \
489465
int j = (int)__spirv_ocl_floor(coord.y); \
490-
return out_##elem_t( \
466+
return as_##elem_t##4( \
491467
__nvvm_suld_2d_v4i##elem_size##_##cuda_address_mode##_s( \
492468
image, i * sizeof(elem_t##4), j)); \
493469
} else { \
@@ -520,7 +496,7 @@ float4 unnormalized_coord_3d(float4 coord, long image) {
520496
int i = (int)__spirv_ocl_floor(coord.x); \
521497
int j = (int)__spirv_ocl_floor(coord.y); \
522498
int k = (int)__spirv_ocl_floor(coord.z); \
523-
return out_##elem_t( \
499+
return as_##elem_t##4( \
524500
__nvvm_suld_3d_v4i##elem_size##_##cuda_address_mode##_s( \
525501
image, i * sizeof(elem_t##4), j, k)); \
526502
} else { \
@@ -570,7 +546,7 @@ float4 unnormalized_coord_3d(float4 coord, long image) {
570546
if (i > width - 1) { \
571547
i = i - width; \
572548
} \
573-
return out_##elem_t(__nvvm_suld_1d_v4i##elem_size##_trap_s( \
549+
return as_##elem_t##4(__nvvm_suld_1d_v4i##elem_size##_trap_s( \
574550
image, i * sizeof(elem_t##4))); \
575551
} else { \
576552
int i0, i1; \
@@ -609,7 +585,7 @@ float4 unnormalized_coord_3d(float4 coord, long image) {
609585
if (j > height - 1) { \
610586
j = j - height; \
611587
} \
612-
return out_##elem_t(__nvvm_suld_2d_v4i##elem_size##_trap_s( \
588+
return as_##elem_t##4(__nvvm_suld_2d_v4i##elem_size##_trap_s( \
613589
image, i * sizeof(elem_t##4), j)); \
614590
} else { \
615591
int i0, i1, j0, j1; \
@@ -666,7 +642,7 @@ float4 unnormalized_coord_3d(float4 coord, long image) {
666642
if (k > depth - 1) { \
667643
k = k - depth; \
668644
} \
669-
return out_##elem_t(__nvvm_suld_3d_v4i##elem_size##_trap_s( \
645+
return as_##elem_t##4(__nvvm_suld_3d_v4i##elem_size##_trap_s( \
670646
image, i * sizeof(elem_t##4), j, k)); \
671647
} else { \
672648
int i0, i1, j0, j1, k0, k1; \
@@ -735,7 +711,7 @@ float4 unnormalized_coord_3d(float4 coord, long image) {
735711
int i = (int)__spirv_ocl_floor(u); \
736712
i = __spirv_ocl_s_min(i, width - 1); \
737713
\
738-
return out_##elem_t(__nvvm_suld_1d_v4i##elem_size##_trap_s( \
714+
return as_##elem_t##4(__nvvm_suld_1d_v4i##elem_size##_trap_s( \
739715
image, i * sizeof(elem_t##4))); \
740716
} else { \
741717
int i0, i1; \
@@ -771,7 +747,7 @@ float4 unnormalized_coord_3d(float4 coord, long image) {
771747
int j = (int)__spirv_ocl_floor(v); \
772748
j = __spirv_ocl_s_min(j, height - 1); \
773749
\
774-
return out_##elem_t(__nvvm_suld_2d_v4i##elem_size##_trap_s( \
750+
return as_##elem_t##4(__nvvm_suld_2d_v4i##elem_size##_trap_s( \
775751
image, i * sizeof(elem_t##4), j)); \
776752
} else { \
777753
int i0, i1, j0, j1; \
@@ -821,7 +797,7 @@ float4 unnormalized_coord_3d(float4 coord, long image) {
821797
int k = (int)__spirv_ocl_floor(w); \
822798
k = __spirv_ocl_s_min(k, depth - 1); \
823799
\
824-
return out_##elem_t(__nvvm_suld_3d_v4i##elem_size##_trap_s( \
800+
return as_##elem_t##4(__nvvm_suld_3d_v4i##elem_size##_trap_s( \
825801
image, i * sizeof(elem_t##4), j, k)); \
826802
} else { \
827803
int i0, i1, j0, j1, k0, k1; \
@@ -913,8 +889,7 @@ _DEFINE_SAMPLED_LOADS(half, 16)
913889
/* Sampling algorithms are implemented assu__spirv_ocl_s_ming an \
914890
* unnormalized floating point coordinate as input. Need to transform as \
915891
* appropriate. */ \
916-
sampling_coord_t sampling_coord = \
917-
cast_##input_coord_t##_to_##sampling_coord_t(input_coord); \
892+
sampling_coord_t sampling_coord = as_##sampling_coord_t(input_coord); \
918893
if (is_normalized_coords(sampler)) { \
919894
sampling_coord = unnormalized_coord_##dims##d(sampling_coord, image); \
920895
} \

0 commit comments

Comments
 (0)