Skip to content

Commit 94bf09a

Browse files
committed
[GISel][CombinerHelper] Combine op(trunc(x), trunc(y)) -> trunc(op(x, y))
1 parent 51349fb commit 94bf09a

File tree

11 files changed

+234
-264
lines changed

11 files changed

+234
-264
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -3083,8 +3083,10 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
30833083
return false;
30843084
case TargetOpcode::G_ANYEXT:
30853085
case TargetOpcode::G_SEXT:
3086-
case TargetOpcode::G_ZEXT: {
3086+
case TargetOpcode::G_ZEXT:
3087+
case TargetOpcode::G_TRUNC: {
30873088
// Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3089+
// Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
30883090
break;
30893091
}
30903092
case TargetOpcode::G_AND:

llvm/test/CodeGen/AArch64/GlobalISel/combine-op-trunc.mir

+15-27
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,8 @@ body: |
1414
; CHECK-NEXT: {{ $}}
1515
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
1616
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
17-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
18-
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
19-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]]
20-
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16)
21-
; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
17+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]]
18+
; CHECK-NEXT: $w0 = COPY [[AND]](s32)
2219
%0:_(s32) = COPY $w0
2320
%1:_(s32) = COPY $w1
2421
%2:_(s16) = G_TRUNC %0
@@ -37,10 +34,9 @@ body: |
3734
; CHECK-NEXT: {{ $}}
3835
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
3936
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
40-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>)
41-
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>)
42-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC]], [[TRUNC1]]
43-
; CHECK-NEXT: $x0 = COPY [[AND]](<4 x s16>)
37+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[COPY]], [[COPY1]]
38+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[AND]](<4 x s32>)
39+
; CHECK-NEXT: $x0 = COPY [[TRUNC]](<4 x s16>)
4440
%0:_(<4 x s32>) = COPY $q0
4541
%1:_(<4 x s32>) = COPY $q1
4642
%2:_(<4 x s16>) = G_TRUNC %0
@@ -60,11 +56,8 @@ body: |
6056
; CHECK-NEXT: {{ $}}
6157
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
6258
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
63-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
64-
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
65-
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]]
66-
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
67-
; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
59+
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]]
60+
; CHECK-NEXT: $w0 = COPY [[OR]](s32)
6861
%0:_(s32) = COPY $w0
6962
%1:_(s32) = COPY $w1
7063
%2:_(s16) = G_TRUNC %0
@@ -83,10 +76,9 @@ body: |
8376
; CHECK-NEXT: {{ $}}
8477
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
8578
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
86-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>)
87-
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>)
88-
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[TRUNC]], [[TRUNC1]]
89-
; CHECK-NEXT: $x0 = COPY [[OR]](<4 x s16>)
79+
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[COPY]], [[COPY1]]
80+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[OR]](<4 x s32>)
81+
; CHECK-NEXT: $x0 = COPY [[TRUNC]](<4 x s16>)
9082
%0:_(<4 x s32>) = COPY $q0
9183
%1:_(<4 x s32>) = COPY $q1
9284
%2:_(<4 x s16>) = G_TRUNC %0
@@ -106,11 +98,8 @@ body: |
10698
; CHECK-NEXT: {{ $}}
10799
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
108100
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
109-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
110-
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
111-
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[TRUNC1]]
112-
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s16)
113-
; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
101+
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY1]]
102+
; CHECK-NEXT: $w0 = COPY [[XOR]](s32)
114103
%0:_(s32) = COPY $w0
115104
%1:_(s32) = COPY $w1
116105
%2:_(s16) = G_TRUNC %0
@@ -129,10 +118,9 @@ body: |
129118
; CHECK-NEXT: {{ $}}
130119
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
131120
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
132-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>)
133-
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>)
134-
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC]], [[TRUNC1]]
135-
; CHECK-NEXT: $x0 = COPY [[XOR]](<4 x s16>)
121+
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s32>) = G_XOR [[COPY]], [[COPY1]]
122+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[XOR]](<4 x s32>)
123+
; CHECK-NEXT: $x0 = COPY [[TRUNC]](<4 x s16>)
136124
%0:_(<4 x s32>) = COPY $q0
137125
%1:_(<4 x s32>) = COPY $q1
138126
%2:_(<4 x s16>) = G_TRUNC %0

llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown- --aarch64postlegalizercombiner-only-enable-rule="select_to_logical" %s -o - | FileCheck %s
2+
# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
33
# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
44
# REQUIRES: asserts
5+
56
---
67
# select (c, x, x) -> x
78
name: test_combine_select_same_res

llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir

+6-8
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,9 @@ body: |
8484
; CHECK: liveins: $x0, $x1
8585
; CHECK: %binop_lhs:_(s64) = COPY $x0
8686
; CHECK: %binop_rhs:_(s64) = COPY $x1
87-
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %binop_lhs(s64)
88-
; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %binop_rhs(s64)
89-
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]]
90-
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[AND]](s32)
87+
; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND %binop_lhs, %binop_rhs
88+
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64)
89+
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s32)
9190
; CHECK: $x0 = COPY [[ZEXT]](s64)
9291
; CHECK: RET_ReallyLR implicit $x0
9392
%binop_lhs:_(s64) = COPY $x0
@@ -131,10 +130,9 @@ body: |
131130
; CHECK: liveins: $x0, $x1
132131
; CHECK: %binop_lhs:_(s64) = COPY $x0
133132
; CHECK: %binop_rhs:_(s64) = COPY $x1
134-
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %binop_lhs(s64)
135-
; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %binop_rhs(s64)
136-
; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[TRUNC]], [[TRUNC1]]
137-
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[XOR]](s32)
133+
; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR %binop_lhs, %binop_rhs
134+
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[XOR]](s64)
135+
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s32)
138136
; CHECK: $x0 = COPY [[ZEXT]](s64)
139137
; CHECK: RET_ReallyLR implicit $x0
140138
%binop_lhs:_(s64) = COPY $x0

llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-hoist-same-hands.mir

+3-4
Original file line numberDiff line numberDiff line change
@@ -268,10 +268,9 @@ body: |
268268
; CHECK: liveins: $w0, $w1
269269
; CHECK: %x_wide:_(s32) = COPY $w0
270270
; CHECK: %y_wide:_(s32) = COPY $w1
271-
; CHECK: %x:_(s1) = G_TRUNC %x_wide(s32)
272-
; CHECK: %y:_(s1) = G_TRUNC %y_wide(s32)
273-
; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR %x, %y
274-
; CHECK: %logic_op:_(s64) = G_SEXT [[OR]](s1)
271+
; CHECK: %8:_(s32) = G_OR %x_wide, %y_wide
272+
; CHECK: %7:_(s1) = G_TRUNC %8(s32)
273+
; CHECK: %logic_op:_(s64) = G_SEXT %7(s1)
275274
; CHECK: $x0 = COPY %logic_op(s64)
276275
; CHECK: RET_ReallyLR implicit $x0
277276
%x_wide:_(s32) = COPY $w0

llvm/test/CodeGen/AArch64/pr58431.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
define i32 @f(i64 %0) {
55
; CHECK-LABEL: f:
66
; CHECK: // %bb.0:
7-
; CHECK-NEXT: mov w8, #10
7+
; CHECK-NEXT: mov w8, #10 // =0xa
88
; CHECK-NEXT: mov w9, w0
99
; CHECK-NEXT: udiv x10, x9, x8
1010
; CHECK-NEXT: msub x0, x10, x8, x9

llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll

+94-97
Original file line numberDiff line numberDiff line change
@@ -1804,113 +1804,110 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
18041804
define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) {
18051805
; GFX6-LABEL: s_fshl_v2i24:
18061806
; GFX6: ; %bb.0:
1807+
; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
1808+
; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2
18071809
; GFX6-NEXT: s_lshr_b32 s6, s0, 16
1808-
; GFX6-NEXT: s_lshr_b32 s7, s0, 24
1809-
; GFX6-NEXT: s_and_b32 s9, s0, 0xff
1810-
; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80008
1811-
; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1812-
; GFX6-NEXT: s_lshl_b32 s0, s0, 8
1810+
; GFX6-NEXT: s_lshr_b32 s7, s1, 8
1811+
; GFX6-NEXT: s_bfe_u32 s9, s0, 0x80008
1812+
; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
1813+
; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2
1814+
; GFX6-NEXT: s_and_b32 s8, s0, 0xff
1815+
; GFX6-NEXT: s_lshl_b32 s9, s9, 8
18131816
; GFX6-NEXT: s_and_b32 s6, s6, 0xff
1814-
; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
1815-
; GFX6-NEXT: s_or_b32 s0, s9, s0
1817+
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
1818+
; GFX6-NEXT: v_mov_b32_e32 v0, s0
1819+
; GFX6-NEXT: s_and_b32 s0, s7, 0xff
1820+
; GFX6-NEXT: s_or_b32 s8, s8, s9
18161821
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1817-
; GFX6-NEXT: s_lshr_b32 s8, s1, 8
1822+
; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 24
18181823
; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
1824+
; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
18191825
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1826+
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
1827+
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
1828+
; GFX6-NEXT: v_mov_b32_e32 v3, 0xffffffe8
1829+
; GFX6-NEXT: s_or_b32 s6, s8, s6
1830+
; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
1831+
; GFX6-NEXT: s_lshr_b32 s0, s2, 16
1832+
; GFX6-NEXT: s_lshr_b32 s1, s3, 8
1833+
; GFX6-NEXT: s_bfe_u32 s8, s2, 0x80008
1834+
; GFX6-NEXT: v_mul_lo_u32 v3, v2, v3
1835+
; GFX6-NEXT: s_and_b32 s7, s2, 0xff
1836+
; GFX6-NEXT: s_lshl_b32 s8, s8, 8
1837+
; GFX6-NEXT: s_and_b32 s0, s0, 0xff
1838+
; GFX6-NEXT: s_and_b32 s3, s3, 0xff
1839+
; GFX6-NEXT: v_mov_b32_e32 v1, s2
18201840
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
1821-
; GFX6-NEXT: s_or_b32 s0, s0, s6
1822-
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
1823-
; GFX6-NEXT: s_and_b32 s6, s8, 0xff
1824-
; GFX6-NEXT: s_or_b32 s1, s7, s1
1825-
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1826-
; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1841+
; GFX6-NEXT: s_or_b32 s7, s7, s8
1842+
; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
1843+
; GFX6-NEXT: v_alignbit_b32 v1, s3, v1, 24
1844+
; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
1845+
; GFX6-NEXT: s_and_b32 s7, 0xffff, s7
1846+
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
1847+
; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
1848+
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
1849+
; GFX6-NEXT: s_or_b32 s0, s7, s0
1850+
; GFX6-NEXT: v_or_b32_e32 v1, s1, v1
1851+
; GFX6-NEXT: s_lshr_b32 s1, s4, 16
1852+
; GFX6-NEXT: s_bfe_u32 s7, s4, 0x80008
1853+
; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
1854+
; GFX6-NEXT: s_and_b32 s3, s4, 0xff
1855+
; GFX6-NEXT: s_lshl_b32 s7, s7, 8
1856+
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
1857+
; GFX6-NEXT: s_or_b32 s3, s3, s7
18271858
; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
1828-
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1829-
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
1830-
; GFX6-NEXT: s_or_b32 s1, s1, s6
1831-
; GFX6-NEXT: s_lshr_b32 s6, s2, 16
1832-
; GFX6-NEXT: s_lshr_b32 s7, s2, 24
1833-
; GFX6-NEXT: s_and_b32 s9, s2, 0xff
1834-
; GFX6-NEXT: s_bfe_u32 s2, s2, 0x80008
1835-
; GFX6-NEXT: s_lshl_b32 s2, s2, 8
1836-
; GFX6-NEXT: s_and_b32 s6, s6, 0xff
1837-
; GFX6-NEXT: s_or_b32 s2, s9, s2
1838-
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1839-
; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8
1840-
; GFX6-NEXT: s_lshr_b32 s8, s3, 8
1841-
; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
1842-
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1843-
; GFX6-NEXT: s_and_b32 s3, s3, 0xff
1844-
; GFX6-NEXT: v_mul_lo_u32 v1, v0, v1
1845-
; GFX6-NEXT: s_or_b32 s2, s2, s6
1846-
; GFX6-NEXT: s_lshl_b32 s3, s3, 8
1847-
; GFX6-NEXT: s_and_b32 s6, s8, 0xff
1848-
; GFX6-NEXT: s_or_b32 s3, s7, s3
1849-
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
18501859
; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
1851-
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1852-
; GFX6-NEXT: s_or_b32 s3, s3, s6
1853-
; GFX6-NEXT: s_lshr_b32 s6, s4, 16
1854-
; GFX6-NEXT: s_lshr_b32 s7, s4, 24
1855-
; GFX6-NEXT: s_and_b32 s9, s4, 0xff
1856-
; GFX6-NEXT: s_bfe_u32 s4, s4, 0x80008
1857-
; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
1858-
; GFX6-NEXT: s_lshl_b32 s4, s4, 8
1859-
; GFX6-NEXT: s_and_b32 s6, s6, 0xff
1860-
; GFX6-NEXT: s_or_b32 s4, s9, s4
1861-
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1862-
; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
1863-
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1864-
; GFX6-NEXT: s_or_b32 s4, s4, s6
1865-
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
1866-
; GFX6-NEXT: v_mul_hi_u32 v1, s4, v0
1867-
; GFX6-NEXT: s_lshr_b32 s8, s5, 8
1868-
; GFX6-NEXT: s_and_b32 s5, s5, 0xff
1869-
; GFX6-NEXT: s_lshl_b32 s5, s5, 8
1870-
; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24
1871-
; GFX6-NEXT: s_and_b32 s6, s8, 0xff
1872-
; GFX6-NEXT: s_or_b32 s5, s7, s5
1873-
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1874-
; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
1875-
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1876-
; GFX6-NEXT: s_or_b32 s5, s5, s6
1877-
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s4, v1
1878-
; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1
1879-
; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0
1880-
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
1881-
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1882-
; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1
1883-
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
1884-
; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24
1885-
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1886-
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1
1887-
; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1888-
; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1
1889-
; GFX6-NEXT: s_lshr_b32 s0, s2, 1
1860+
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
1861+
; GFX6-NEXT: s_or_b32 s1, s3, s1
1862+
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
1863+
; GFX6-NEXT: v_mul_hi_u32 v3, s1, v2
1864+
; GFX6-NEXT: s_lshr_b32 s2, s5, 8
1865+
; GFX6-NEXT: s_and_b32 s3, s5, 0xff
1866+
; GFX6-NEXT: v_mov_b32_e32 v4, s4
1867+
; GFX6-NEXT: s_and_b32 s2, s2, 0xff
1868+
; GFX6-NEXT: v_alignbit_b32 v4, s3, v4, 24
1869+
; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
1870+
; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
1871+
; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24
1872+
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
1873+
; GFX6-NEXT: v_or_b32_e32 v4, s2, v4
1874+
; GFX6-NEXT: v_mul_hi_u32 v2, v4, v2
1875+
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s1, v3
1876+
; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
1877+
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
1878+
; GFX6-NEXT: v_mul_lo_u32 v2, v2, 24
1879+
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
1880+
; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
1881+
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
1882+
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
1883+
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v4, v2
1884+
; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 23, v3
1885+
; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
1886+
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1887+
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
1888+
; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
1889+
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1890+
; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3
1891+
; GFX6-NEXT: s_lshr_b32 s0, s0, 1
1892+
; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5
1893+
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
1894+
; GFX6-NEXT: v_lshl_b32_e32 v3, s6, v3
1895+
; GFX6-NEXT: v_lshr_b32_e32 v5, s0, v5
1896+
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2
18901897
; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1891-
; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2
1892-
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s5, v0
1898+
; GFX6-NEXT: v_or_b32_e32 v3, v3, v5
1899+
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
1900+
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
1901+
; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v4
1902+
; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
1903+
; GFX6-NEXT: v_bfe_u32 v2, v3, 8, 8
1904+
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1905+
; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v3
1906+
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2
1907+
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
1908+
; GFX6-NEXT: v_bfe_u32 v2, v3, 16, 8
1909+
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
18931910
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
1894-
; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0
1895-
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1896-
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1897-
; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0
1898-
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1899-
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1900-
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v0
1901-
; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1902-
; GFX6-NEXT: s_lshr_b32 s0, s3, 1
1903-
; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1904-
; GFX6-NEXT: v_lshl_b32_e32 v0, s1, v0
1905-
; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2
1906-
; GFX6-NEXT: v_bfe_u32 v3, v1, 8, 8
1907-
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
1908-
; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1
1909-
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3
1910-
; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8
1911-
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
1912-
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1913-
; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
19141911
; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0
19151912
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2
19161913
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2

0 commit comments

Comments
 (0)