Skip to content

Commit 891759d

Browse files
committed
GlobalISel: Add scalarSameSizeAs LegalizeRule
Widen or narrow a type to a type with the same scalar size as another. This can be used to force G_PTR_ADD/G_PTRMASK's scalar operand to match the bitwidth of the pointer type. Use this to disallow narrower types for G_PTRMASK.
1 parent 8e62ffd commit 891759d

File tree

5 files changed

+49
-94
lines changed

5 files changed

+49
-94
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h

+23-5
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,11 @@ LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx);
308308
/// Keep the same scalar or element type as the given type.
309309
LegalizeMutation changeElementTo(unsigned TypeIdx, LLT Ty);
310310

311+
/// Change the scalar size or element size to have the same scalar size as type
312+
/// index \p FromIndex. Unlike changeElementTo, this discards pointer types and
313+
/// only changes the size.
314+
LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx);
315+
311316
/// Widen the scalar type or vector element type for the given type index to the
312317
/// next power of 2.
313318
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min = 0);
@@ -893,12 +898,25 @@ class LegalizeRuleSet {
893898
return Query.Types[LargeTypeIdx].getScalarSizeInBits() >
894899
Query.Types[TypeIdx].getSizeInBits();
895900
},
901+
LegalizeMutations::changeElementSizeTo(TypeIdx, LargeTypeIdx));
902+
}
903+
904+
/// Narrow the scalar to match the size of another.
905+
LegalizeRuleSet &maxScalarSameAs(unsigned TypeIdx, unsigned NarrowTypeIdx) {
906+
typeIdx(TypeIdx);
907+
return narrowScalarIf(
896908
[=](const LegalityQuery &Query) {
897-
const LLT Ty = Query.Types[TypeIdx];
898-
const LLT LargeTy = Query.Types[LargeTypeIdx];
899-
LLT NewEltTy = LLT::scalar(LargeTy.getScalarSizeInBits());
900-
return std::make_pair(TypeIdx, Ty.changeElementType(NewEltTy));
901-
});
909+
return Query.Types[NarrowTypeIdx].getScalarSizeInBits() <
910+
Query.Types[TypeIdx].getSizeInBits();
911+
},
912+
LegalizeMutations::changeElementSizeTo(TypeIdx, NarrowTypeIdx));
913+
}
914+
915+
/// Change the type \p TypeIdx to have the same scalar size as type \p
916+
/// SameSizeIdx.
917+
LegalizeRuleSet &scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx) {
918+
return minScalarSameAs(TypeIdx, SameSizeIdx)
919+
.maxScalarSameAs(TypeIdx, SameSizeIdx);
902920
}
903921

904922
/// Conditionally widen the scalar or elt to match the size of another.

llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp

+10
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,16 @@ LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx,
4343
};
4444
}
4545

46+
LegalizeMutation LegalizeMutations::changeElementSizeTo(unsigned TypeIdx,
47+
unsigned FromTypeIdx) {
48+
return [=](const LegalityQuery &Query) {
49+
const LLT OldTy = Query.Types[TypeIdx];
50+
const LLT NewTy = Query.Types[FromTypeIdx];
51+
const LLT NewEltTy = LLT::scalar(NewTy.getScalarSizeInBits());
52+
return std::make_pair(TypeIdx, OldTy.changeElementType(NewEltTy));
53+
};
54+
}
55+
4656
LegalizeMutation LegalizeMutations::widenScalarOrEltToNextPow2(unsigned TypeIdx,
4757
unsigned Min) {
4858
return [=](const LegalityQuery &Query) {

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

+2-4
Original file line numberDiff line numberDiff line change
@@ -738,10 +738,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
738738
.scalarize(0);
739739

740740
getActionDefinitionsBuilder(G_PTRMASK)
741-
.legalIf(typeInSet(1, {S64, S32}))
742-
.minScalar(1, S32)
743-
.maxScalarIf(sizeIs(0, 32), 1, S32)
744-
.maxScalarIf(sizeIs(0, 64), 1, S64)
741+
.legalIf(all(sameSize(0, 1), typeInSet(1, {S64, S32})))
742+
.scalarSameSizeAs(1, 0)
745743
.scalarize(0);
746744

747745
auto &CmpBuilder =

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir

-74
Original file line numberDiff line numberDiff line change
@@ -337,31 +337,6 @@ body: |
337337
338338
...
339339

340-
---
341-
name: ptrmask_p0_s32_sgpr_sgpr_sgpr
342-
legalized: true
343-
regBankSelected: true
344-
345-
body: |
346-
bb.0:
347-
liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
348-
349-
; CHECK-LABEL: name: ptrmask_p0_s32_sgpr_sgpr_sgpr
350-
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
351-
; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
352-
; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
353-
; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
354-
; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY1]], implicit-def $scc
355-
; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], [[COPY1]], implicit-def $scc
356-
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
357-
; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
358-
%0:sgpr(p0) = COPY $sgpr0_sgpr1
359-
%1:sgpr(s32) = COPY $sgpr2
360-
%2:sgpr(p0) = G_PTRMASK %0, %1
361-
S_ENDPGM 0, implicit %2
362-
363-
...
364-
365340
---
366341
name: ptrmask_p0_s64_sgpr_sgpr_clearhi1
367342
legalized: true
@@ -756,55 +731,6 @@ body: |
756731
757732
...
758733

759-
---
760-
name: ptrmask_p0_s32_vgpr_vgpr_vgpr
761-
legalized: true
762-
regBankSelected: true
763-
764-
body: |
765-
bb.0:
766-
liveins: $vgpr0_vgpr1, $vgpr2
767-
768-
; CHECK-LABEL: name: ptrmask_p0_s32_vgpr_vgpr_vgpr
769-
; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
770-
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
771-
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
772-
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
773-
; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], [[COPY1]], implicit $exec
774-
; CHECK: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY3]], [[COPY1]], implicit $exec
775-
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
776-
; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
777-
%0:vgpr(p0) = COPY $vgpr0_vgpr1
778-
%1:vgpr(s32) = COPY $vgpr2
779-
%2:vgpr(p0) = G_PTRMASK %0, %1
780-
S_ENDPGM 0, implicit %2
781-
782-
...
783-
784-
---
785-
name: ptrmask_p0_s32_vgpr_vgpr_vgpr_0xffffffff
786-
legalized: true
787-
regBankSelected: true
788-
789-
body: |
790-
bb.0:
791-
liveins: $vgpr0_vgpr1, $vgpr2
792-
793-
; CHECK-LABEL: name: ptrmask_p0_s32_vgpr_vgpr_vgpr_0xffffffff
794-
; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
795-
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
796-
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
797-
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
798-
; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec
799-
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1
800-
; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
801-
%0:vgpr(p0) = COPY $vgpr0_vgpr1
802-
%1:vgpr(s32) = G_CONSTANT i32 -1
803-
%2:vgpr(p0) = G_PTRMASK %0, %1
804-
S_ENDPGM 0, implicit %2
805-
806-
...
807-
808734
---
809735
name: ptrmask_p0_s64_vgpr_vgpr_clearlo1
810736
legalized: true

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptrmask.mir

+14-11
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ body: |
1010
; CHECK-LABEL: name: ptrmask_p1_s16
1111
; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
1212
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
13-
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
14-
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
15-
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
16-
; CHECK: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[AND]](s32)
13+
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535
14+
; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32)
15+
; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
16+
; CHECK: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[AND]](s64)
1717
; CHECK: $vgpr0_vgpr1 = COPY [[PTRMASK]](p1)
1818
%0:_(p1) = COPY $vgpr0_vgpr1
1919
%1:_(s32) = COPY $vgpr2
@@ -31,7 +31,8 @@ body: |
3131
; CHECK-LABEL: name: ptrmask_p1_s32
3232
; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
3333
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
34-
; CHECK: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[COPY1]](s32)
34+
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY1]](s32)
35+
; CHECK: [[PTRMASK:%[0-9]+]]:_(p1) = G_PTRMASK [[COPY]], [[ZEXT]](s64)
3536
; CHECK: $vgpr0_vgpr1 = COPY [[PTRMASK]](p1)
3637
%0:_(p1) = COPY $vgpr0_vgpr1
3738
%1:_(s32) = COPY $vgpr2
@@ -83,10 +84,10 @@ body: |
8384
; CHECK-LABEL: name: ptrmask_p0_s16
8485
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
8586
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
86-
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
87-
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
88-
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
89-
; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[AND]](s32)
87+
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535
88+
; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32)
89+
; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
90+
; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[AND]](s64)
9091
; CHECK: $vgpr0_vgpr1 = COPY [[PTRMASK]](p0)
9192
%0:_(p0) = COPY $vgpr0_vgpr1
9293
%1:_(s32) = COPY $vgpr2
@@ -104,7 +105,8 @@ body: |
104105
; CHECK-LABEL: name: ptrmask_p0_s32
105106
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
106107
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
107-
; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[COPY1]](s32)
108+
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY1]](s32)
109+
; CHECK: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[COPY]], [[ZEXT]](s64)
108110
; CHECK: $vgpr0_vgpr1 = COPY [[PTRMASK]](p0)
109111
%0:_(p0) = COPY $vgpr0_vgpr1
110112
%1:_(s32) = COPY $vgpr2
@@ -194,7 +196,8 @@ body: |
194196
; CHECK-LABEL: name: ptrmask_p3_s64
195197
; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
196198
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2
197-
; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](s64)
199+
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
200+
; CHECK: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s32)
198201
; CHECK: $vgpr0 = COPY [[PTRMASK]](p3)
199202
%0:_(p3) = COPY $vgpr0
200203
%1:_(s64) = COPY $vgpr1_vgpr2

0 commit comments

Comments
 (0)