@@ -8,29 +8,49 @@ typedef short __attribute__((ext_vector_type(2))) short2;
8
8
typedef unsigned short __attribute__((ext_vector_type (2 ))) ushort2 ;
9
9
10
10
// CHECK-LABEL: @builtins_amdgcn_dl_insts
11
- // CHECK: call float @llvm.amdgcn.fdot2
11
+ // CHECK: call float @llvm.amdgcn.fdot2(<2 x half> %v2hA, <2 x half> %v2hB, float %fC, i1 false)
12
+ // CHECK: call float @llvm.amdgcn.fdot2(<2 x half> %v2hA, <2 x half> %v2hB, float %fC, i1 true)
12
13
13
- // CHECK: call i32 @llvm.amdgcn.sdot2
14
- // CHECK: call i32 @llvm.amdgcn.udot2
14
+ // CHECK: call i32 @llvm.amdgcn.sdot2(<2 x i16> %v2ssA, <2 x i16> %v2ssB, i32 %siC, i1 false)
15
+ // CHECK: call i32 @llvm.amdgcn.sdot2(<2 x i16> %v2ssA, <2 x i16> %v2ssB, i32 %siC, i1 true)
15
16
16
- // CHECK: call i32 @llvm.amdgcn.sdot4
17
- // CHECK: call i32 @llvm.amdgcn.udot4
17
+ // CHECK: call i32 @llvm.amdgcn.udot2(<2 x i16> %v2usA, <2 x i16> %v2usB, i32 %uiC, i1 false)
18
+ // CHECK: call i32 @llvm.amdgcn.udot2(<2 x i16> %v2usA, <2 x i16> %v2usB, i32 %uiC, i1 true)
18
19
19
- // CHECK: call i32 @llvm.amdgcn.sdot8
20
- // CHECK: call i32 @llvm.amdgcn.udot8
20
+ // CHECK: call i32 @llvm.amdgcn.sdot4(i32 %siA, i32 %siB, i32 %siC, i1 false)
21
+ // CHECK: call i32 @llvm.amdgcn.sdot4(i32 %siA, i32 %siB, i32 %siC, i1 true)
22
+
23
+ // CHECK: call i32 @llvm.amdgcn.udot4(i32 %uiA, i32 %uiB, i32 %uiC, i1 false)
24
+ // CHECK: call i32 @llvm.amdgcn.udot4(i32 %uiA, i32 %uiB, i32 %uiC, i1 true)
25
+
26
+ // CHECK: call i32 @llvm.amdgcn.sdot8(i32 %siA, i32 %siB, i32 %siC, i1 false)
27
+ // CHECK: call i32 @llvm.amdgcn.sdot8(i32 %siA, i32 %siB, i32 %siC, i1 true)
28
+
29
+ // CHECK: call i32 @llvm.amdgcn.udot8(i32 %uiA, i32 %uiB, i32 %uiC, i1 false)
30
+ // CHECK: call i32 @llvm.amdgcn.udot8(i32 %uiA, i32 %uiB, i32 %uiC, i1 true)
21
31
kernel void builtins_amdgcn_dl_insts (
22
32
global float * fOut , global int * siOut , global uint * uiOut ,
23
33
half2 v2hA , half2 v2hB , float fC ,
24
34
short2 v2ssA , short2 v2ssB , int siA , int siB , int siC ,
25
35
ushort2 v2usA , ushort2 v2usB , uint uiA , uint uiB , uint uiC ) {
26
- fOut [0 ] = __builtin_amdgcn_fdot2 (v2hA , v2hB , fC );
36
+ fOut [0 ] = __builtin_amdgcn_fdot2 (v2hA , v2hB , fC , false);
37
+ fOut [1 ] = __builtin_amdgcn_fdot2 (v2hA , v2hB , fC , true);
38
+
39
+ siOut [0 ] = __builtin_amdgcn_sdot2 (v2ssA , v2ssB , siC , false);
40
+ siOut [1 ] = __builtin_amdgcn_sdot2 (v2ssA , v2ssB , siC , true);
41
+
42
+ uiOut [0 ] = __builtin_amdgcn_udot2 (v2usA , v2usB , uiC , false);
43
+ uiOut [1 ] = __builtin_amdgcn_udot2 (v2usA , v2usB , uiC , true);
44
+
45
+ siOut [2 ] = __builtin_amdgcn_sdot4 (siA , siB , siC , false);
46
+ siOut [3 ] = __builtin_amdgcn_sdot4 (siA , siB , siC , true);
27
47
28
- siOut [ 0 ] = __builtin_amdgcn_sdot2 ( v2ssA , v2ssB , siC );
29
- uiOut [0 ] = __builtin_amdgcn_udot2 ( v2usA , v2usB , uiC );
48
+ uiOut [ 2 ] = __builtin_amdgcn_udot4 ( uiA , uiB , uiC , false );
49
+ uiOut [3 ] = __builtin_amdgcn_udot4 ( uiA , uiB , uiC , true );
30
50
31
- siOut [1 ] = __builtin_amdgcn_sdot4 (siA , siB , siC );
32
- uiOut [ 1 ] = __builtin_amdgcn_udot4 ( uiA , uiB , uiC );
51
+ siOut [4 ] = __builtin_amdgcn_sdot8 (siA , siB , siC , false );
52
+ siOut [ 5 ] = __builtin_amdgcn_sdot8 ( siA , siB , siC , true );
33
53
34
- siOut [ 2 ] = __builtin_amdgcn_sdot8 ( siA , siB , siC );
35
- uiOut [2 ] = __builtin_amdgcn_udot8 (uiA , uiB , uiC );
54
+ uiOut [ 4 ] = __builtin_amdgcn_udot8 ( uiA , uiB , uiC , false );
55
+ uiOut [5 ] = __builtin_amdgcn_udot8 (uiA , uiB , uiC , true );
36
56
}
0 commit comments