Skip to content

Commit 9057546

Browse files
committed
AMDGPU: Add clamp bit to dot builtins
Differential Revision: https://reviews.llvm.org/D50011 llvm-svn: 338471
1 parent bb30ef7 commit 9057546

File tree

4 files changed

+83
-28
lines changed

4 files changed

+83
-28
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

+7-7
Original file line numberDiff line numberDiff line change
@@ -124,13 +124,13 @@ TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "hhhh", "nc", "gfx9-insts")
124124
// Deep learning builtins.
125125
//===----------------------------------------------------------------------===//
126126

127-
TARGET_BUILTIN(__builtin_amdgcn_fdot2, "fV2hV2hf", "nc", "dl-insts")
128-
TARGET_BUILTIN(__builtin_amdgcn_sdot2, "SiV2SsV2SsSi", "nc", "dl-insts")
129-
TARGET_BUILTIN(__builtin_amdgcn_udot2, "UiV2UsV2UsUi", "nc", "dl-insts")
130-
TARGET_BUILTIN(__builtin_amdgcn_sdot4, "SiSiSiSi", "nc", "dl-insts")
131-
TARGET_BUILTIN(__builtin_amdgcn_udot4, "UiUiUiUi", "nc", "dl-insts")
132-
TARGET_BUILTIN(__builtin_amdgcn_sdot8, "SiSiSiSi", "nc", "dl-insts")
133-
TARGET_BUILTIN(__builtin_amdgcn_udot8, "UiUiUiUi", "nc", "dl-insts")
127+
TARGET_BUILTIN(__builtin_amdgcn_fdot2, "fV2hV2hfIb", "nc", "dl-insts")
128+
TARGET_BUILTIN(__builtin_amdgcn_sdot2, "SiV2SsV2SsSiIb", "nc", "dl-insts")
129+
TARGET_BUILTIN(__builtin_amdgcn_udot2, "UiV2UsV2UsUiIb", "nc", "dl-insts")
130+
TARGET_BUILTIN(__builtin_amdgcn_sdot4, "SiSiSiSiIb", "nc", "dl-insts")
131+
TARGET_BUILTIN(__builtin_amdgcn_udot4, "UiUiUiUiIb", "nc", "dl-insts")
132+
TARGET_BUILTIN(__builtin_amdgcn_sdot8, "SiSiSiSiIb", "nc", "dl-insts")
133+
TARGET_BUILTIN(__builtin_amdgcn_udot8, "UiUiUiUiIb", "nc", "dl-insts")
134134

135135
//===----------------------------------------------------------------------===//
136136
// Special builtins.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// REQUIRES: amdgpu-registered-target
2+
3+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -verify -S -emit-llvm -o - %s
4+
5+
typedef unsigned int uint;
6+
typedef half __attribute__((ext_vector_type(2))) half2;
7+
typedef short __attribute__((ext_vector_type(2))) short2;
8+
typedef unsigned short __attribute__((ext_vector_type(2))) ushort2;
9+
10+
kernel void builtins_amdgcn_dl_insts_err(
11+
global float *fOut, global int *siOut, global uint *uiOut,
12+
half2 v2hA, half2 v2hB, float fC,
13+
short2 v2ssA, short2 v2ssB, int siA, int siB, int siC,
14+
ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC, uint isClamp) {
15+
fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, isClamp == 0 ? false : true); // expected-error {{'__builtin_amdgcn_fdot2' must be a constant integer}}
16+
17+
siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, isClamp == 0 ? false : true); // expected-error {{'__builtin_amdgcn_sdot2' must be a constant integer}}
18+
uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, isClamp == 0 ? false : true); // expected-error {{'__builtin_amdgcn_udot2' must be a constant integer}}
19+
20+
siOut[1] = __builtin_amdgcn_sdot4(siA, siB, siC, isClamp == 0 ? false : true); // expected-error {{'__builtin_amdgcn_sdot4' must be a constant integer}}
21+
uiOut[1] = __builtin_amdgcn_udot4(uiA, uiB, uiC, isClamp == 0 ? false : true); // expected-error {{'__builtin_amdgcn_udot4' must be a constant integer}}
22+
23+
siOut[2] = __builtin_amdgcn_sdot8(siA, siB, siC, isClamp == 0 ? false : true); // expected-error {{'__builtin_amdgcn_sdot8' must be a constant integer}}
24+
uiOut[2] = __builtin_amdgcn_udot8(uiA, uiB, uiC, isClamp == 0 ? false : true); // expected-error {{'__builtin_amdgcn_udot8' must be a constant integer}}
25+
}

clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl

+17-7
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,24 @@ kernel void builtins_amdgcn_dl_insts_err(
1212
half2 v2hA, half2 v2hB, float fC,
1313
short2 v2ssA, short2 v2ssB, int siA, int siB, int siC,
1414
ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC) {
15-
fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dl-insts}}
15+
fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dl-insts}}
16+
fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dl-insts}}
1617

17-
siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dl-insts}}
18-
uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dl-insts}}
18+
siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dl-insts}}
19+
siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dl-insts}}
1920

20-
siOut[1] = __builtin_amdgcn_sdot4(siA, siB, siC); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dl-insts}}
21-
uiOut[1] = __builtin_amdgcn_udot4(uiA, uiB, uiC); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dl-insts}}
21+
uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, false); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dl-insts}}
22+
uiOut[1] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, true); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dl-insts}}
2223

23-
siOut[2] = __builtin_amdgcn_sdot8(siA, siB, siC); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dl-insts}}
24-
uiOut[2] = __builtin_amdgcn_udot8(uiA, uiB, uiC); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dl-insts}}
24+
siOut[2] = __builtin_amdgcn_sdot4(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dl-insts}}
25+
siOut[3] = __builtin_amdgcn_sdot4(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dl-insts}}
26+
27+
uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dl-insts}}
28+
uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dl-insts}}
29+
30+
siOut[4] = __builtin_amdgcn_sdot8(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dl-insts}}
31+
siOut[5] = __builtin_amdgcn_sdot8(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dl-insts}}
32+
33+
uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dl-insts}}
34+
uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dl-insts}}
2535
}

clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts.cl

+34-14
Original file line numberDiff line numberDiff line change
@@ -8,29 +8,49 @@ typedef short __attribute__((ext_vector_type(2))) short2;
88
typedef unsigned short __attribute__((ext_vector_type(2))) ushort2;
99

1010
// CHECK-LABEL: @builtins_amdgcn_dl_insts
11-
// CHECK: call float @llvm.amdgcn.fdot2
11+
// CHECK: call float @llvm.amdgcn.fdot2(<2 x half> %v2hA, <2 x half> %v2hB, float %fC, i1 false)
12+
// CHECK: call float @llvm.amdgcn.fdot2(<2 x half> %v2hA, <2 x half> %v2hB, float %fC, i1 true)
1213

13-
// CHECK: call i32 @llvm.amdgcn.sdot2
14-
// CHECK: call i32 @llvm.amdgcn.udot2
14+
// CHECK: call i32 @llvm.amdgcn.sdot2(<2 x i16> %v2ssA, <2 x i16> %v2ssB, i32 %siC, i1 false)
15+
// CHECK: call i32 @llvm.amdgcn.sdot2(<2 x i16> %v2ssA, <2 x i16> %v2ssB, i32 %siC, i1 true)
1516

16-
// CHECK: call i32 @llvm.amdgcn.sdot4
17-
// CHECK: call i32 @llvm.amdgcn.udot4
17+
// CHECK: call i32 @llvm.amdgcn.udot2(<2 x i16> %v2usA, <2 x i16> %v2usB, i32 %uiC, i1 false)
18+
// CHECK: call i32 @llvm.amdgcn.udot2(<2 x i16> %v2usA, <2 x i16> %v2usB, i32 %uiC, i1 true)
1819

19-
// CHECK: call i32 @llvm.amdgcn.sdot8
20-
// CHECK: call i32 @llvm.amdgcn.udot8
20+
// CHECK: call i32 @llvm.amdgcn.sdot4(i32 %siA, i32 %siB, i32 %siC, i1 false)
21+
// CHECK: call i32 @llvm.amdgcn.sdot4(i32 %siA, i32 %siB, i32 %siC, i1 true)
22+
23+
// CHECK: call i32 @llvm.amdgcn.udot4(i32 %uiA, i32 %uiB, i32 %uiC, i1 false)
24+
// CHECK: call i32 @llvm.amdgcn.udot4(i32 %uiA, i32 %uiB, i32 %uiC, i1 true)
25+
26+
// CHECK: call i32 @llvm.amdgcn.sdot8(i32 %siA, i32 %siB, i32 %siC, i1 false)
27+
// CHECK: call i32 @llvm.amdgcn.sdot8(i32 %siA, i32 %siB, i32 %siC, i1 true)
28+
29+
// CHECK: call i32 @llvm.amdgcn.udot8(i32 %uiA, i32 %uiB, i32 %uiC, i1 false)
30+
// CHECK: call i32 @llvm.amdgcn.udot8(i32 %uiA, i32 %uiB, i32 %uiC, i1 true)
2131
kernel void builtins_amdgcn_dl_insts(
2232
global float *fOut, global int *siOut, global uint *uiOut,
2333
half2 v2hA, half2 v2hB, float fC,
2434
short2 v2ssA, short2 v2ssB, int siA, int siB, int siC,
2535
ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC) {
26-
fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC);
36+
fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false);
37+
fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true);
38+
39+
siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false);
40+
siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true);
41+
42+
uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, false);
43+
uiOut[1] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, true);
44+
45+
siOut[2] = __builtin_amdgcn_sdot4(siA, siB, siC, false);
46+
siOut[3] = __builtin_amdgcn_sdot4(siA, siB, siC, true);
2747

28-
siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC);
29-
uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC);
48+
uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false);
49+
uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true);
3050

31-
siOut[1] = __builtin_amdgcn_sdot4(siA, siB, siC);
32-
uiOut[1] = __builtin_amdgcn_udot4(uiA, uiB, uiC);
51+
siOut[4] = __builtin_amdgcn_sdot8(siA, siB, siC, false);
52+
siOut[5] = __builtin_amdgcn_sdot8(siA, siB, siC, true);
3353

34-
siOut[2] = __builtin_amdgcn_sdot8(siA, siB, siC);
35-
uiOut[2] = __builtin_amdgcn_udot8(uiA, uiB, uiC);
54+
uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false);
55+
uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true);
3656
}

0 commit comments

Comments
 (0)