Skip to content

Commit e72bc64

Browse files
authored
Use UMad for dot products on uint vectors (cp to 1.8.2502) (microsoft#7092)
Cherry-pick of microsoft#7059 to release-1.8.2502 Dot products on uint vectors were not using the unsigned mad operations when they were formed using the dot intrinsic though they did when it was the effect of the mul intrinsic. While such operations aren't very common and less common still are the cases where the unsigned variant makes a difference, this was an internal inconsistency as well as with the clang implementation. This corrects the problem using a dummy op for udot similar to how other intrinsics address similar problems to pass the unsigned information to operation lowering. Incidentally adds unsigned notations to all the mul intrinsic entries. This is a non-functional change as any one overload that specifies an unsigned variant will apply for all of them, but it is less confusing than having half of them do it and the other half not. The test verifies the output of a few dot operations with sint, float, and uint types for both dot and mul intrinsics Fixes microsoft#7058 (cherry picked from commit 1fdebc4)
1 parent e52b6bc commit e72bc64

File tree

6 files changed

+177
-9
lines changed

6 files changed

+177
-9
lines changed

include/dxc/HlslIntrinsicOp.h

+4
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,7 @@ enum class IntrinsicOp {
367367
IOP_WavePrefixUSum,
368368
IOP_uabs,
369369
IOP_uclamp,
370+
IOP_udot,
370371
IOP_ufirstbithigh,
371372
IOP_umad,
372373
IOP_umax,
@@ -391,6 +392,7 @@ inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) {
391392
case IntrinsicOp::IOP_WavePrefixSum:
392393
case IntrinsicOp::IOP_abs:
393394
case IntrinsicOp::IOP_clamp:
395+
case IntrinsicOp::IOP_dot:
394396
case IntrinsicOp::IOP_firstbithigh:
395397
case IntrinsicOp::IOP_mad:
396398
case IntrinsicOp::IOP_max:
@@ -432,6 +434,8 @@ inline unsigned GetUnsignedIntrinsicOpcode(IntrinsicOp opcode) {
432434
return static_cast<unsigned>(IntrinsicOp::IOP_uabs);
433435
case IntrinsicOp::IOP_clamp:
434436
return static_cast<unsigned>(IntrinsicOp::IOP_uclamp);
437+
case IntrinsicOp::IOP_dot:
438+
return static_cast<unsigned>(IntrinsicOp::IOP_udot);
435439
case IntrinsicOp::IOP_firstbithigh:
436440
return static_cast<unsigned>(IntrinsicOp::IOP_ufirstbithigh);
437441
case IntrinsicOp::IOP_mad:

lib/HLSL/HLOperationLower.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -2480,7 +2480,8 @@ Value *TranslateDot(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
24802480
if (Ty->getScalarType()->isFloatingPointTy()) {
24812481
return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
24822482
} else {
2483-
return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder);
2483+
return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder,
2484+
IOP == IntrinsicOp::IOP_udot);
24842485
}
24852486
}
24862487

@@ -6789,6 +6790,7 @@ IntrinsicLower gLowerTable[] = {
67896790
DXIL::OpCode::WavePrefixOp},
67906791
{IntrinsicOp::IOP_uabs, TranslateUAbs, DXIL::OpCode::NumOpCodes},
67916792
{IntrinsicOp::IOP_uclamp, TranslateClamp, DXIL::OpCode::NumOpCodes},
6793+
{IntrinsicOp::IOP_udot, TranslateDot, DXIL::OpCode::NumOpCodes},
67926794
{IntrinsicOp::IOP_ufirstbithigh, TranslateFirstbitHi,
67936795
DXIL::OpCode::FirstbitHi},
67946796
{IntrinsicOp::IOP_umad, TranslateFUITrinary, DXIL::OpCode::UMad},

tools/clang/lib/SPIRV/SpirvEmitter.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -8957,6 +8957,7 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
89578957
return nullptr;
89588958
}
89598959
case hlsl::IntrinsicOp::IOP_dot:
8960+
case hlsl::IntrinsicOp::IOP_udot:
89608961
retVal = processIntrinsicDot(callExpr);
89618962
break;
89628963
case hlsl::IntrinsicOp::IOP_GroupMemoryBarrier:

tools/clang/test/CodeGenHLSL/dot.hlsl

+161
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
// RUN: %dxc -T vs_6_0 -DFUNC=dot %s | FileCheck %s
2+
// RUN: %dxc -T vs_6_0 -DFUNC=mul %s | FileCheck %s
3+
// RUN: %dxc -T vs_6_0 -DFUNC=dot -fcgl %s | FileCheck %s --check-prefix=CGLDOT
4+
// RUN: %dxc -T vs_6_0 -DFUNC=mul -fcgl %s | FileCheck %s --check-prefix=CGLMUL
5+
6+
// Verifies correct implementation of dot and mul with vectors for various sizes and types.
7+
8+
// Partially pilfered from SPIRV's intrinsic.dot.hlsl
9+
10+
float4 main(int1 i1[2] : IO, int2 i2[2] : IT, int3 i3[2] : IH, int4 i4[2] : IF,
11+
float1 f1[2] : FO, float2 f2[2] : FT, float3 f3[2] : FH, float4 f4[2] : FF,
12+
uint1 u1[2] : UO, uint2 u2[2] : UT, uint3 u3[2] : UH, uint4 u4[2] : UF) : SV_Position {
13+
int i = 0;
14+
// CHECK-DAG: [[I0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
15+
// CHECK-DAG: [[I1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 1, i8 0, i32 undef)
16+
// CHECK: mul i32 [[I0]], [[I1]]
17+
// CGLDOT: call i32 @"dx.hl.op.rn.i32 (i32, <1 x i32>, <1 x i32>)"(i32 134, <1 x i32> %{{.*}}, <1 x i32> %{{.*}})
18+
// CGLMUL: call i32 @"dx.hl.op.rn.i32 (i32, <1 x i32>, <1 x i32>)"(i32 167, <1 x i32> %{{.*}}, <1 x i32> %{{.*}})
19+
i += FUNC(i1[0], i1[1]);
20+
21+
// CHECK-DAG: [[I00:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0, i32 undef)
22+
// CHECK-DAG: [[I01:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 1, i32 undef)
23+
// CHECK-DAG: [[I10:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 1, i8 0, i32 undef)
24+
// CHECK-DAG: [[I11:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 1, i8 1, i32 undef)
25+
26+
// CHECK: [[MUL:%.*]] = mul i32 [[I00]], [[I10]]
27+
// CHECK: call i32 @dx.op.tertiary.i32(i32 48, i32 [[I01]], i32 [[I11]], i32 [[MUL]]) ; IMad(a,b,c)
28+
// CGLDOT: call i32 @"dx.hl.op.rn.i32 (i32, <2 x i32>, <2 x i32>)"(i32 134, <2 x i32> %{{.*}}, <2 x i32> %{{.*}})
29+
// CGLMUL: call i32 @"dx.hl.op.rn.i32 (i32, <2 x i32>, <2 x i32>)"(i32 167, <2 x i32> %{{.*}}, <2 x i32> %{{.*}})
30+
i += FUNC(i2[0], i2[1]);
31+
32+
// CHECK-DAG: [[I00:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 0, i32 undef)
33+
// CHECK-DAG: [[I01:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 1, i32 undef)
34+
// CHECK-DAG: [[I02:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 0, i8 2, i32 undef)
35+
// CHECK-DAG: [[I10:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 1, i8 0, i32 undef)
36+
// CHECK-DAG: [[I11:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 1, i8 1, i32 undef)
37+
// CHECK-DAG: [[I12:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 2, i32 1, i8 2, i32 undef)
38+
39+
// PING and PONG are just conveniences to track the result as it accumulates.
40+
// Since we can't capture and match the source and result in the same line with the same variable.
41+
// CHECK: [[PING:%.*]] = mul i32 [[I00]], [[I10]]
42+
// CHECK: [[PONG:%.*]] = call i32 @dx.op.tertiary.i32(i32 48, i32 [[I01]], i32 [[I11]], i32 [[PING]]) ; IMad(a,b,c)
43+
// CHECK: [[PING:%.*]] = call i32 @dx.op.tertiary.i32(i32 48, i32 [[I02]], i32 [[I12]], i32 [[PONG]]) ; IMad(a,b,c)
44+
// CGLDOT: call i32 @"dx.hl.op.rn.i32 (i32, <3 x i32>, <3 x i32>)"(i32 134, <3 x i32> %{{.*}}, <3 x i32> %{{.*}})
45+
// CGLMUL: call i32 @"dx.hl.op.rn.i32 (i32, <3 x i32>, <3 x i32>)"(i32 167, <3 x i32> %{{.*}}, <3 x i32> %{{.*}})
46+
i += FUNC(i3[0], i3[1]);
47+
48+
// CHECK-DAG: [[I00:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 0, i8 0, i32 undef)
49+
// CHECK-DAG: [[I01:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 0, i8 1, i32 undef)
50+
// CHECK-DAG: [[I02:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 0, i8 2, i32 undef)
51+
// CHECK-DAG: [[I03:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 0, i8 3, i32 undef)
52+
// CHECK-DAG: [[I10:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 1, i8 0, i32 undef)
53+
// CHECK-DAG: [[I11:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 1, i8 1, i32 undef)
54+
// CHECK-DAG: [[I12:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 1, i8 2, i32 undef)
55+
// CHECK-DAG: [[I13:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 3, i32 1, i8 3, i32 undef)
56+
57+
// CHECK: [[PING:%.*]] = mul i32 [[I00]], [[I10]]
58+
// CHECK: [[PONG:%.*]] = call i32 @dx.op.tertiary.i32(i32 48, i32 [[I01]], i32 [[I11]], i32 [[PING]]) ; IMad(a,b,c)
59+
// CHECK: [[PING:%.*]] = call i32 @dx.op.tertiary.i32(i32 48, i32 [[I02]], i32 [[I12]], i32 [[PONG]]) ; IMad(a,b,c)
60+
// CHECK: [[PONG:%.*]] = call i32 @dx.op.tertiary.i32(i32 48, i32 [[I03]], i32 [[I13]], i32 [[PING]]) ; IMad(a,b,c)
61+
// CGLDOT: call i32 @"dx.hl.op.rn.i32 (i32, <4 x i32>, <4 x i32>)"(i32 134, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
62+
// CGLMUL: call i32 @"dx.hl.op.rn.i32 (i32, <4 x i32>, <4 x i32>)"(i32 167, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
63+
i += FUNC(i4[0], i4[1]);
64+
65+
float f = 0.0;
66+
67+
// CHECK-DAG: [[F0:%.*]] = call float @dx.op.loadInput.f32(i32 4, i32 4, i32 0, i8 0, i32 undef)
68+
// CHECK-DAG: [[F1:%.*]] = call float @dx.op.loadInput.f32(i32 4, i32 4, i32 1, i8 0, i32 undef)
69+
// CHECK: mul fast float [[F0]], [[F1]]
70+
// CGLDOT: call float @"dx.hl.op.rn.float (i32, <1 x float>, <1 x float>)"(i32 134, <1 x float> %{{.*}}, <1 x float> %{{.*}})
71+
// CGLMUL: call float @"dx.hl.op.rn.float (i32, <1 x float>, <1 x float>)"(i32 167, <1 x float> %{{.*}}, <1 x float> %{{.*}})
72+
f += FUNC(f1[0], f1[1]);
73+
74+
// CHECK-DAG: [[F00:%.*]] = call float @dx.op.loadInput.f32(i32 4, i32 5, i32 0, i8 0, i32 undef)
75+
// CHECK-DAG: [[F01:%.*]] = call float @dx.op.loadInput.f32(i32 4, i32 5, i32 0, i8 1, i32 undef)
76+
// CHECK-DAG: [[F10:%.*]] = call float @dx.op.loadInput.f32(i32 4, i32 5, i32 1, i8 0, i32 undef)
77+
// CHECK-DAG: [[F11:%.*]] = call float @dx.op.loadInput.f32(i32 4, i32 5, i32 1, i8 1, i32 undef)
78+
79+
// CHECK: call float @dx.op.dot2.f32(i32 54, float [[F00]], float [[F01]], float [[F10]], float [[F11]])
80+
// CGLDOT: call float @"dx.hl.op.rn.float (i32, <2 x float>, <2 x float>)"(i32 134, <2 x float> %{{.*}}, <2 x float> %{{.*}})
81+
// CGLMUL: call float @"dx.hl.op.rn.float (i32, <2 x float>, <2 x float>)"(i32 167, <2 x float> %{{.*}}, <2 x float> %{{.*}})
82+
f += FUNC(f2[0], f2[1]);
83+
84+
// CHECK-DAG: [[F00:%.*]] = call float @dx.op.loadInput.f32(i32 4, i32 6, i32 0, i8 0, i32 undef)
85+
// CHECK-DAG: [[F01:%.*]] = call float @dx.op.loadInput.f32(i32 4, i32 6, i32 0, i8 1, i32 undef)
86+
// CHECK-DAG: [[F02:%.*]] = call float @dx.op.loadInput.f32(i32 4, i32 6, i32 0, i8 2, i32 undef)
87+
// CHECK-DAG: [[F10:%.*]] = call float @dx.op.loadInput.f32(i32 4, i32 6, i32 1, i8 0, i32 undef)
88+
// CHECK-DAG: [[F11:%.*]] = call float @dx.op.loadInput.f32(i32 4, i32 6, i32 1, i8 1, i32 undef)
89+
// CHECK-DAG: [[F12:%.*]] = call float @dx.op.loadInput.f32(i32 4, i32 6, i32 1, i8 2, i32 undef)
90+
91+
// CHECK: call float @dx.op.dot3.f32(i32 55, float [[F00]], float [[F01]], float [[F02]], float [[F10]], float [[F11]], float [[F12]])
92+
// CGLDOT: call float @"dx.hl.op.rn.float (i32, <3 x float>, <3 x float>)"(i32 134, <3 x float> %{{.*}}, <3 x float> %{{.*}})
93+
// CGLMUL: call float @"dx.hl.op.rn.float (i32, <3 x float>, <3 x float>)"(i32 167, <3 x float> %{{.*}}, <3 x float> %{{.*}})
94+
f += FUNC(f3[0], f3[1]);
95+
96+
// CHECK-DAG: [[F00:%.*]] = call float @dx.op.loadInput.f32(i32 4, i32 7, i32 0, i8 0, i32 undef)
97+
// CHECK-DAG: [[F01:%.*]] = call float @dx.op.loadInput.f32(i32 4, i32 7, i32 0, i8 1, i32 undef)
98+
// CHECK-DAG: [[F02:%.*]] = call float @dx.op.loadInput.f32(i32 4, i32 7, i32 0, i8 2, i32 undef)
99+
// CHECK-DAG: [[F03:%.*]] = call float @dx.op.loadInput.f32(i32 4, i32 7, i32 0, i8 3, i32 undef)
100+
// CHECK-DAG: [[F10:%.*]] = call float @dx.op.loadInput.f32(i32 4, i32 7, i32 1, i8 0, i32 undef)
101+
// CHECK-DAG: [[F11:%.*]] = call float @dx.op.loadInput.f32(i32 4, i32 7, i32 1, i8 1, i32 undef)
102+
// CHECK-DAG: [[F12:%.*]] = call float @dx.op.loadInput.f32(i32 4, i32 7, i32 1, i8 2, i32 undef)
103+
// CHECK-DAG: [[F13:%.*]] = call float @dx.op.loadInput.f32(i32 4, i32 7, i32 1, i8 3, i32 undef)
104+
105+
// CHECK: call float @dx.op.dot4.f32(i32 56, float [[F00]], float [[F01]], float [[F02]], float [[F03]], float [[F10]], float [[F11]], float [[F12]], float [[F13]])
106+
// CGLDOT: call float @"dx.hl.op.rn.float (i32, <4 x float>, <4 x float>)"(i32 134, <4 x float> %{{.*}}, <4 x float> %{{.*}})
107+
// CGLMUL: call float @"dx.hl.op.rn.float (i32, <4 x float>, <4 x float>)"(i32 167, <4 x float> %{{.*}}, <4 x float> %{{.*}})
108+
f += FUNC(f4[0], f4[1]);
109+
110+
int u = 0;
111+
// CHECK-DAG: [[I0:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 8, i32 0, i8 0, i32 undef)
112+
// CHECK-DAG: [[I1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 8, i32 1, i8 0, i32 undef)
113+
// CHECK: mul i32 [[I0]], [[I1]]
114+
// CGLDOT: call i32 @"dx.hl.op.rn.i32 (i32, <1 x i32>, <1 x i32>)"(i32 349, <1 x i32> %{{.*}}, <1 x i32> %{{.*}})
115+
// CGLMUL: call i32 @"dx.hl.op.rn.i32 (i32, <1 x i32>, <1 x i32>)"(i32 354, <1 x i32> %{{.*}}, <1 x i32> %{{.*}})
116+
u += FUNC(u1[0], u1[1]);
117+
118+
// CHECK-DAG: [[I00:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 9, i32 0, i8 0, i32 undef)
119+
// CHECK-DAG: [[I01:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 9, i32 0, i8 1, i32 undef)
120+
// CHECK-DAG: [[I10:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 9, i32 1, i8 0, i32 undef)
121+
// CHECK-DAG: [[I11:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 9, i32 1, i8 1, i32 undef)
122+
123+
// CHECK: [[MUL:%.*]] = mul i32 [[I00]], [[I10]]
124+
// CHECK: call i32 @dx.op.tertiary.i32(i32 49, i32 [[I01]], i32 [[I11]], i32 [[MUL]]) ; UMad(a,b,c)
125+
// CGLDOT: call i32 @"dx.hl.op.rn.i32 (i32, <2 x i32>, <2 x i32>)"(i32 349, <2 x i32> %{{.*}}, <2 x i32> %{{.*}})
126+
// CGLMUL: call i32 @"dx.hl.op.rn.i32 (i32, <2 x i32>, <2 x i32>)"(i32 354, <2 x i32> %{{.*}}, <2 x i32> %{{.*}})
127+
u += FUNC(u2[0], u2[1]);
128+
129+
// CHECK-DAG: [[I00:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 10, i32 0, i8 0, i32 undef)
130+
// CHECK-DAG: [[I01:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 10, i32 0, i8 1, i32 undef)
131+
// CHECK-DAG: [[I02:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 10, i32 0, i8 2, i32 undef)
132+
// CHECK-DAG: [[I10:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 10, i32 1, i8 0, i32 undef)
133+
// CHECK-DAG: [[I11:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 10, i32 1, i8 1, i32 undef)
134+
// CHECK-DAG: [[I12:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 10, i32 1, i8 2, i32 undef)
135+
136+
// CHECK: [[PING:%.*]] = mul i32 [[I00]], [[I10]]
137+
// CHECK: [[PONG:%.*]] = call i32 @dx.op.tertiary.i32(i32 49, i32 [[I01]], i32 [[I11]], i32 [[PING]]) ; UMad(a,b,c)
138+
// CHECK: [[PING:%.*]] = call i32 @dx.op.tertiary.i32(i32 49, i32 [[I02]], i32 [[I12]], i32 [[PONG]]) ; UMad(a,b,c)
139+
// CGLDOT: call i32 @"dx.hl.op.rn.i32 (i32, <3 x i32>, <3 x i32>)"(i32 349, <3 x i32> %{{.*}}, <3 x i32> %{{.*}})
140+
// CGLMUL: call i32 @"dx.hl.op.rn.i32 (i32, <3 x i32>, <3 x i32>)"(i32 354, <3 x i32> %{{.*}}, <3 x i32> %{{.*}})
141+
u += FUNC(u3[0], u3[1]);
142+
143+
// CHECK-DAG: [[I00:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 11, i32 0, i8 0, i32 undef)
144+
// CHECK-DAG: [[I01:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 11, i32 0, i8 1, i32 undef)
145+
// CHECK-DAG: [[I02:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 11, i32 0, i8 2, i32 undef)
146+
// CHECK-DAG: [[I03:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 11, i32 0, i8 3, i32 undef)
147+
// CHECK-DAG: [[I10:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 11, i32 1, i8 0, i32 undef)
148+
// CHECK-DAG: [[I11:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 11, i32 1, i8 1, i32 undef)
149+
// CHECK-DAG: [[I12:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 11, i32 1, i8 2, i32 undef)
150+
// CHECK-DAG: [[I13:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 11, i32 1, i8 3, i32 undef)
151+
152+
// CHECK: [[PING:%.*]] = mul i32 [[I00]], [[I10]]
153+
// CHECK: [[PONG:%.*]] = call i32 @dx.op.tertiary.i32(i32 49, i32 [[I01]], i32 [[I11]], i32 [[PING]]) ; UMad(a,b,c)
154+
// CHECK: [[PING:%.*]] = call i32 @dx.op.tertiary.i32(i32 49, i32 [[I02]], i32 [[I12]], i32 [[PONG]]) ; UMad(a,b,c)
155+
// CHECK: [[PONG:%.*]] = call i32 @dx.op.tertiary.i32(i32 49, i32 [[I03]], i32 [[I13]], i32 [[PING]]) ; UMad(a,b,c)
156+
// CGLDOT: call i32 @"dx.hl.op.rn.i32 (i32, <4 x i32>, <4 x i32>)"(i32 349, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
157+
// CGLMUL: call i32 @"dx.hl.op.rn.i32 (i32, <4 x i32>, <4 x i32>)"(i32 354, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
158+
u += FUNC(u4[0], u4[1]);
159+
160+
return float4(i, f, u, 0);
161+
}

tools/clang/test/HLSLFileCheck/samples/d3d11/FluidCS11_BuildGridCS.hlsl

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
// CHECK: bufferLoad
55
// CHECK: FMax
66
// CHECK: FMin
7-
// CHECK: IMad
7+
// CHECK: UMad
88
// CHECK: bufferStore
99

1010
//--------------------------------------------------------------------------------------

utils/hct/gen_intrin_main.txt

+7-7
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ $match<0, 1> float_like [[rn]] determinant(in float_like<r, r> x);
124124
void [[]] DeviceMemoryBarrier() : syncdevicememory_ug;
125125
void [[]] DeviceMemoryBarrierWithGroupSync() : syncgroupanddevicememory_ug;
126126
$match<0, 1> float_like [[rn]] distance(in float_like<c> a, in $type1 b);
127-
$match<0, 1> numeric [[rn]] dot(in numeric<c> a, in $type1 b);
127+
$match<0, 1> numeric [[rn,unsigned_op=udot]] dot(in numeric<c> a, in $type1 b);
128128
$type1 [[rn]] dst(in numeric<4> a, in $type1 b);
129129
// void errorf(in string Format, ...);
130130
$type1 [[rn]] EvaluateAttributeAtSample(in numeric<> value, in uint index);
@@ -198,13 +198,13 @@ $type1 [[rn,unsigned_op=umax]] max(in numeric<> a, in $type1 b);
198198
$type1 [[rn,unsigned_op=umin]] min(in numeric<> a, in $type1 b);
199199
$type1 [[]] modf(in float_like<> x, out $type1 ip);
200200
uint<4> [[rn]] msad4(in uint reference, in uint<2> source, in uint<4> accum);
201-
numeric [[rn]] mul(in $match<1, 0> numeric a, in $match<2, 0> numeric b) : mul_ss;
202-
numeric<c2> [[rn]] mul(in $match<1, 0> numeric a, in $match<2, 0> numeric<c2> b) : mul_sv;
203-
numeric<r2, c2> [[rn]] mul(in $match<1, 0> numeric a, in $match<2, 0> numeric<r2, c2> b) : mul_sm;
204-
numeric<c> [[rn]] mul(in $match<1, 0> numeric<c> a, in $match<2, 0> numeric b) : mul_vs;
205-
numeric [[rn]] mul(in $match<1, 0> numeric<c> a, in $match<2, 0> numeric<c> b) : mul_vv;
201+
numeric [[rn,unsigned_op=umul]] mul(in $match<1, 0> numeric a, in $match<2, 0> numeric b) : mul_ss;
202+
numeric<c2> [[rn,unsigned_op=umul]] mul(in $match<1, 0> numeric a, in $match<2, 0> numeric<c2> b) : mul_sv;
203+
numeric<r2, c2> [[rn,unsigned_op=umul]] mul(in $match<1, 0> numeric a, in $match<2, 0> numeric<r2, c2> b) : mul_sm;
204+
numeric<c> [[rn,unsigned_op=umul]] mul(in $match<1, 0> numeric<c> a, in $match<2, 0> numeric b) : mul_vs;
205+
numeric [[rn,unsigned_op=umul]] mul(in $match<1, 0> numeric<c> a, in $match<2, 0> numeric<c> b) : mul_vv;
206206
numeric<c2> [[rn,unsigned_op=umul]] mul(in $match<1, 0> numeric<c> a, in col_major $match<2, 0> numeric<c, c2> b) : mul_vm;
207-
numeric<r, c> [[rn]] mul(in $match<1, 0> numeric<r, c> a, in $match<2, 0> numeric b) : mul_ms;
207+
numeric<r, c> [[rn,unsigned_op=umul]] mul(in $match<1, 0> numeric<r, c> a, in $match<2, 0> numeric b) : mul_ms;
208208
numeric<r> [[rn,unsigned_op=umul]] mul(in row_major $match<1, 0> numeric<r, c> a, in $match<2, 0> numeric<c> b) : mul_mv;
209209
numeric<r, c2> [[rn,unsigned_op=umul]] mul(in row_major $match<1, 0> numeric<r, c> a, in col_major $match<2, 0> numeric<c, c2> b) : mul_mm;
210210
$type1 [[rn]] normalize(in float_like<c> x);

0 commit comments

Comments
 (0)