Skip to content

Commit 1a25ae7

Browse files
t4c1Pavel V Chupin
authored and
Pavel V Chupin
committed
use wrappers for intrinsics
1 parent c628a2a commit 1a25ae7

File tree

2 files changed

+152
-25
lines changed

2 files changed

+152
-25
lines changed

libclc/ptx-nvidiacl/libspirv/images/image.cl

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -58,74 +58,74 @@ int __clc__sampled_image3d_unpack_sampler(__ocl_sampled_image3d_ro_t) __asm(
5858

5959
// NVVM helpers
6060
struct out_16
61-
__nvvm_suld_1d_v4i16_trap_s(long, int) __asm("llvm.nvvm.suld.1d.v4i16.trap");
61+
__nvvm_suld_1d_v4i16_trap_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i16_trap_s");
6262
struct out_16
6363
__nvvm_suld_2d_v4i16_trap_s(long, int,
64-
int) __asm("llvm.nvvm.suld.2d.v4i16.trap");
64+
int) __asm("__clc_llvm_nvvm_suld_2d_v4i16_trap");
6565
struct out_16
6666
__nvvm_suld_3d_v4i16_trap_s(long, int, int,
67-
int) __asm("llvm.nvvm.suld.3d.v4i16.trap");
67+
int) __asm("__clc_llvm_nvvm_suld_3d_v4i16_trap");
6868
struct out_32
69-
__nvvm_suld_1d_v4i32_trap_s(long, int) __asm("llvm.nvvm.suld.1d.v4i32.trap");
69+
__nvvm_suld_1d_v4i32_trap_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i32_trap");
7070
struct out_32
7171
__nvvm_suld_2d_v4i32_trap_s(long, int,
72-
int) __asm("llvm.nvvm.suld.2d.v4i32.trap");
72+
int) __asm("__clc_llvm_nvvm_suld_2d_v4i32_trap");
7373
struct out_32
7474
__nvvm_suld_3d_v4i32_trap_s(long, int, int,
75-
int) __asm("llvm.nvvm.suld.3d.v4i32.trap");
75+
int) __asm("__clc_llvm_nvvm_suld_3d_v4i32_trap");
7676

7777
struct out_16
78-
__nvvm_suld_1d_v4i16_clamp_s(long, int) __asm("llvm.nvvm.suld.1d.v4i16.clamp");
78+
__nvvm_suld_1d_v4i16_clamp_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i16_clamp");
7979
struct out_16
8080
__nvvm_suld_2d_v4i16_clamp_s(long, int,
81-
int) __asm("llvm.nvvm.suld.2d.v4i16.clamp");
81+
int) __asm("__clc_llvm_nvvm_suld_2d_v4i16_clamp");
8282
struct out_16
8383
__nvvm_suld_3d_v4i16_clamp_s(long, int, int,
84-
int) __asm("llvm.nvvm.suld.3d.v4i16.clamp");
84+
int) __asm("__clc_llvm_nvvm_suld_3d_v4i16_clamp");
8585
struct out_32
86-
__nvvm_suld_1d_v4i32_clamp_s(long, int) __asm("llvm.nvvm.suld.1d.v4i32.clamp");
86+
__nvvm_suld_1d_v4i32_clamp_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i32_clamp");
8787
struct out_32
8888
__nvvm_suld_2d_v4i32_clamp_s(long, int,
89-
int) __asm("llvm.nvvm.suld.2d.v4i32.clamp");
89+
int) __asm("__clc_llvm_nvvm_suld_2d_v4i32_clamp");
9090
struct out_32
9191
__nvvm_suld_3d_v4i32_clamp_s(long, int, int,
92-
int) __asm("llvm.nvvm.suld.3d.v4i32.clamp");
92+
int) __asm("__clc_llvm_nvvm_suld_3d_v4i32_clamp");
9393

9494
struct out_16
95-
__nvvm_suld_1d_v4i16_zero_s(long, int) __asm("llvm.nvvm.suld.1d.v4i16.zero");
95+
__nvvm_suld_1d_v4i16_zero_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i16_zero");
9696
struct out_16
9797
__nvvm_suld_2d_v4i16_zero_s(long, int,
98-
int) __asm("llvm.nvvm.suld.2d.v4i16.zero");
98+
int) __asm("__clc_llvm_nvvm_suld_2d_v4i16_zero");
9999
struct out_16
100100
__nvvm_suld_3d_v4i16_zero_s(long, int, int,
101-
int) __asm("llvm.nvvm.suld.3d.v4i16.zero");
101+
int) __asm("__clc_llvm_nvvm_suld_3d_v4i16_zero");
102102
struct out_32
103-
__nvvm_suld_1d_v4i32_zero_s(long, int) __asm("llvm.nvvm.suld.1d.v4i32.zero");
103+
__nvvm_suld_1d_v4i32_zero_s(long, int) __asm("__clc_llvm_nvvm_suld_1d_v4i32_zero");
104104
struct out_32
105105
__nvvm_suld_2d_v4i32_zero_s(long, int,
106-
int) __asm("llvm.nvvm.suld.2d.v4i32.zero");
106+
int) __asm("__clc_llvm_nvvm_suld_2d_v4i32_zero");
107107
struct out_32
108108
__nvvm_suld_3d_v4i32_zero_s(long, int, int,
109-
int) __asm("llvm.nvvm.suld.3d.v4i32.zero");
109+
int) __asm("__clc_llvm_nvvm_suld_3d_v4i32_zero");
110110

111111
struct out_16
112112
__nvvm_suld_1d_v4i16_clamp(read_only image1d_t,
113-
int) __asm("llvm.nvvm.suld.1d.v4i16.clamp");
113+
int) __asm("__clc_llvm_nvvm_suld_1d_v4i16_clamp");
114114
struct out_16
115115
__nvvm_suld_2d_v4i16_clamp(read_only image2d_t, int,
116-
int) __asm("llvm.nvvm.suld.2d.v4i16.clamp");
116+
int) __asm("__clc_llvm_nvvm_suld_2d_v4i16_clamp");
117117
struct out_16
118118
__nvvm_suld_3d_v4i16_clamp(read_only image3d_t, int, int,
119-
int) __asm("llvm.nvvm.suld.3d.v4i16.clamp");
119+
int) __asm("__clc_llvm_nvvm_suld_3d_v4i16_clamp");
120120
struct out_32
121121
__nvvm_suld_1d_v4i32_clamp(read_only image1d_t,
122-
int) __asm("llvm.nvvm.suld.1d.v4i32.clamp");
122+
int) __asm("__clc_llvm_nvvm_suld_1d_v4i32_clamp");
123123
struct out_32
124124
__nvvm_suld_2d_v4i32_clamp(read_only image2d_t, int,
125-
int) __asm("llvm.nvvm.suld.2d.v4i32.clamp");
125+
int) __asm("__clc_llvm_nvvm_suld_2d_v4i32_clamp");
126126
struct out_32
127127
__nvvm_suld_3d_v4i32_clamp(read_only image3d_t, int, int,
128-
int) __asm("llvm.nvvm.suld.3d.v4i32.clamp");
128+
int) __asm("__clc_llvm_nvvm_suld_3d_v4i32_clamp");
129129

130130
void __nvvm_sust_1d_v4i16_clamp(write_only image1d_t, int, short, short, short,
131131
short) __asm("llvm.nvvm.sust.b.1d.v4i16.clamp");

libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll

Lines changed: 128 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,131 @@ define i32 @__clc__sampler_extract_addressing_mode_prop(i32 %sampl) nounwind alw
3232
entry:
3333
%0 = lshr i32 %sampl, 2
3434
ret i32 %0
35-
}
35+
}
36+
37+
// We need wrappers around intrinsics as intrinsics are not allowed to return named structs
38+
declare {i16,i16,i16,i16} @llvm.nvvm.suld.1d.v4i16.trap(i64, i32)
39+
define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_1d_v4i16_trap_s(i64 %img, i32 %x) nounwind alwaysinline {
40+
entry:
41+
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.1d.v4i16.trap(i64 %img, i32 %x);
42+
ret {i16,i16,i16,i16} %0
43+
}
44+
45+
declare {i16,i16,i16,i16} @llvm.nvvm.suld.2d.v4i16.trap(i64, i32, i32)
46+
define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_2d_v4i16_trap(i64 %img, i32 %x, i32 %y) nounwind alwaysinline {
47+
entry:
48+
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.2d.v4i16.trap(i64 %img, i32 %x, i32 %y);
49+
ret {i16,i16,i16,i16} %0
50+
}
51+
52+
declare {i16,i16,i16,i16} @llvm.nvvm.suld.3d.v4i16.trap(i64, i32, i32, i32)
53+
define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_3d_v4i16_trap(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
54+
entry:
55+
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.3d.v4i16.trap(i64 %img, i32 %x, i32 %y, i32 %z);
56+
ret {i16,i16,i16,i16} %0
57+
}
58+
59+
declare {i16,i16,i16,i16} @llvm.nvvm.suld.1d.v4i16.clamp(i64, i32)
60+
define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_1d_v4i16_clamp(i64 %img, i32 %x) nounwind alwaysinline {
61+
entry:
62+
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.1d.v4i16.clamp(i64 %img, i32 %x);
63+
ret {i16,i16,i16,i16} %0
64+
}
65+
66+
declare {i16,i16,i16,i16} @llvm.nvvm.suld.2d.v4i16.clamp(i64, i32, i32)
67+
define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_2d_v4i16_clamp(i64 %img, i32 %x, i32 %y) nounwind alwaysinline {
68+
entry:
69+
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.2d.v4i16.clamp(i64 %img, i32 %x, i32 %y);
70+
ret {i16,i16,i16,i16} %0
71+
}
72+
73+
declare {i16,i16,i16,i16} @llvm.nvvm.suld.3d.v4i16.clamp(i64, i32, i32, i32)
74+
define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_3d_v4i16_clamp(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
75+
entry:
76+
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.3d.v4i16.clamp(i64 %img, i32 %x, i32 %y, i32 %z);
77+
ret {i16,i16,i16,i16} %0
78+
}
79+
80+
declare {i16,i16,i16,i16} @llvm.nvvm.suld.1d.v4i16.zero(i64, i32)
81+
define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_1d_v4i16_zero(i64 %img, i32 %x) nounwind alwaysinline {
82+
entry:
83+
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.1d.v4i16.zero(i64 %img, i32 %x);
84+
ret {i16,i16,i16,i16} %0
85+
}
86+
87+
declare {i16,i16,i16,i16} @llvm.nvvm.suld.2d.v4i16.zero(i64, i32, i32)
88+
define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_2d_v4i16_zero(i64 %img, i32 %x, i32 %y) nounwind alwaysinline {
89+
entry:
90+
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.2d.v4i16.zero(i64 %img, i32 %x, i32 %y);
91+
ret {i16,i16,i16,i16} %0
92+
}
93+
94+
declare {i16,i16,i16,i16} @llvm.nvvm.suld.3d.v4i16.zero(i64, i32, i32, i32)
95+
define {i16,i16,i16,i16} @__clc_llvm_nvvm_suld_3d_v4i16_zero(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
96+
entry:
97+
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.3d.v4i16.zero(i64 %img, i32 %x, i32 %y, i32 %z);
98+
ret {i16,i16,i16,i16} %0
99+
}
100+
101+
declare {i32,i32,i32,i32} @llvm.nvvm.suld.1d.v4i32.trap(i64, i32)
102+
define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_1d_v4i32_trap(i64 %img, i32 %x) nounwind alwaysinline {
103+
entry:
104+
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.1d.v4i32.trap(i64 %img, i32 %x);
105+
ret {i32,i32,i32,i32} %0
106+
}
107+
108+
declare {i32,i32,i32,i32} @llvm.nvvm.suld.2d.v4i32.trap(i64, i32, i32)
109+
define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_2d_v4i32_trap(i64 %img, i32 %x, i32 %y) nounwind alwaysinline {
110+
entry:
111+
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.2d.v4i32.trap(i64 %img, i32 %x, i32 %y);
112+
ret {i32,i32,i32,i32} %0
113+
}
114+
115+
declare {i32,i32,i32,i32} @llvm.nvvm.suld.3d.v4i32.trap(i64, i32, i32, i32)
116+
define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_3d_v4i32_trap(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
117+
entry:
118+
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.3d.v4i32.trap(i64 %img, i32 %x, i32 %y, i32 %z);
119+
ret {i32,i32,i32,i32} %0
120+
}
121+
122+
declare {i32,i32,i32,i32} @llvm.nvvm.suld.1d.v4i32.clamp(i64, i32)
123+
define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_1d_v4i32_clamp(i64 %img, i32 %x) nounwind alwaysinline {
124+
entry:
125+
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.1d.v4i32.clamp(i64 %img, i32 %x);
126+
ret {i32,i32,i32,i32} %0
127+
}
128+
129+
declare {i32,i32,i32,i32} @llvm.nvvm.suld.2d.v4i32.clamp(i64, i32, i32)
130+
define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_2d_v4i32_clamp(i64 %img, i32 %x, i32 %y) nounwind alwaysinline {
131+
entry:
132+
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.2d.v4i32.clamp(i64 %img, i32 %x, i32 %y);
133+
ret {i32,i32,i32,i32} %0
134+
}
135+
136+
declare {i32,i32,i32,i32} @llvm.nvvm.suld.3d.v4i32.clamp(i64, i32, i32, i32)
137+
define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_3d_v4i32_clamp(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
138+
entry:
139+
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.3d.v4i32.clamp(i64 %img, i32 %x, i32 %y, i32 %z);
140+
ret {i32,i32,i32,i32} %0
141+
}
142+
143+
declare {i32,i32,i32,i32} @llvm.nvvm.suld.1d.v4i32.zero(i64, i32)
144+
define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_1d_v4i32_zero(i64 %img, i32 %x) nounwind alwaysinline {
145+
entry:
146+
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.1d.v4i32.zero(i64 %img, i32 %x);
147+
ret {i32,i32,i32,i32} %0
148+
}
149+
150+
declare {i32,i32,i32,i32} @llvm.nvvm.suld.2d.v4i32.zero(i64, i32, i32)
151+
define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_2d_v4i32_zero(i64 %img, i32 %x, i32 %y) nounwind alwaysinline {
152+
entry:
153+
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.2d.v4i32.zero(i64 %img, i32 %x, i32 %y);
154+
ret {i32,i32,i32,i32} %0
155+
}
156+
157+
declare {i32,i32,i32,i32} @llvm.nvvm.suld.3d.v4i32.zero(i64, i32, i32, i32)
158+
define {i32,i32,i32,i32} @__clc_llvm_nvvm_suld_3d_v4i32_zero(i64 %img, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
159+
entry:
160+
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.3d.v4i32.zero(i64 %img, i32 %x, i32 %y, i32 %z);
161+
ret {i32,i32,i32,i32} %0
162+
}

0 commit comments

Comments
 (0)