1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
1
2
; RUN: llc < %s | FileCheck %s
2
3
3
4
; Check that the SCEVs produced from the multiple loops don't attempt to get
@@ -9,7 +10,44 @@ target triple = "x86_64-unknown-linux-gnu"
9
10
10
11
define void @in4dob_ (ptr nocapture writeonly %0 , ptr nocapture readonly %1 , ptr nocapture readonly %2 , i64 %3 , i1 %min.iters.check840 ) "target-cpu" ="icelake-server" {
11
12
; CHECK-LABEL: in4dob_:
12
- ; CHECK: .LBB0_6: # %vector.body807
13
+ ; CHECK: # %bb.0: # %.preheader263
14
+ ; CHECK-NEXT: leaq (,%rcx,4), %r9
15
+ ; CHECK-NEXT: movl $1, %r10d
16
+ ; CHECK-NEXT: xorl %eax, %eax
17
+ ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
18
+ ; CHECK-NEXT: jmp .LBB0_1
19
+ ; CHECK-NEXT: .p2align 4, 0x90
20
+ ; CHECK-NEXT: .LBB0_20: # in Loop: Header=BB0_1 Depth=1
21
+ ; CHECK-NEXT: incq %r10
22
+ ; CHECK-NEXT: addq %r9, %rax
23
+ ; CHECK-NEXT: cmpq %r10, %rcx
24
+ ; CHECK-NEXT: je .LBB0_18
25
+ ; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
26
+ ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
27
+ ; CHECK-NEXT: vucomiss %xmm0, %xmm1
28
+ ; CHECK-NEXT: jne .LBB0_20
29
+ ; CHECK-NEXT: jp .LBB0_20
30
+ ; CHECK-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1
31
+ ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
32
+ ; CHECK-NEXT: vucomiss %xmm0, %xmm1
33
+ ; CHECK-NEXT: jne .LBB0_20
34
+ ; CHECK-NEXT: jp .LBB0_20
35
+ ; CHECK-NEXT: # %bb.3: # %vector.body807.preheader
36
+ ; CHECK-NEXT: leaq 1(%rcx), %rdx
37
+ ; CHECK-NEXT: movl %edx, %esi
38
+ ; CHECK-NEXT: andl $7, %esi
39
+ ; CHECK-NEXT: cmpq $7, %rcx
40
+ ; CHECK-NEXT: jae .LBB0_5
41
+ ; CHECK-NEXT: # %bb.4:
42
+ ; CHECK-NEXT: xorl %r9d, %r9d
43
+ ; CHECK-NEXT: jmp .LBB0_7
44
+ ; CHECK-NEXT: .LBB0_5: # %vector.body807.preheader.new
45
+ ; CHECK-NEXT: movq %rdx, %r10
46
+ ; CHECK-NEXT: andq $-8, %r10
47
+ ; CHECK-NEXT: xorl %r9d, %r9d
48
+ ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
49
+ ; CHECK-NEXT: .p2align 4, 0x90
50
+ ; CHECK-NEXT: .LBB0_6: # %vector.body807
13
51
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
14
52
; CHECK-NEXT: leaq (%rdi,%r9), %r11
15
53
; CHECK-NEXT: vmovups %ymm0, (%rax,%r11)
@@ -23,7 +61,42 @@ define void @in4dob_(ptr nocapture writeonly %0, ptr nocapture readonly %1, ptr
23
61
; CHECK-NEXT: addq $8, %r9
24
62
; CHECK-NEXT: cmpq %r9, %r10
25
63
; CHECK-NEXT: jne .LBB0_6
26
- ; CHECK: .LBB0_14: # %vector.body847
64
+ ; CHECK-NEXT: .LBB0_7: # %.lr.ph373.unr-lcssa
65
+ ; CHECK-NEXT: testq %rsi, %rsi
66
+ ; CHECK-NEXT: je .LBB0_10
67
+ ; CHECK-NEXT: # %bb.8: # %vector.body807.epil.preheader
68
+ ; CHECK-NEXT: addq %rdi, %r9
69
+ ; CHECK-NEXT: xorl %r10d, %r10d
70
+ ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
71
+ ; CHECK-NEXT: .p2align 4, 0x90
72
+ ; CHECK-NEXT: .LBB0_9: # %vector.body807.epil
73
+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
74
+ ; CHECK-NEXT: leaq (%r9,%r10), %r11
75
+ ; CHECK-NEXT: vmovups %ymm0, (%rax,%r11)
76
+ ; CHECK-NEXT: incq %r10
77
+ ; CHECK-NEXT: cmpq %r10, %rsi
78
+ ; CHECK-NEXT: jne .LBB0_9
79
+ ; CHECK-NEXT: .LBB0_10: # %.lr.ph373
80
+ ; CHECK-NEXT: testb $1, %r8b
81
+ ; CHECK-NEXT: je .LBB0_11
82
+ ; CHECK-NEXT: # %bb.19: # %scalar.ph839.preheader
83
+ ; CHECK-NEXT: movl $0, (%rdi)
84
+ ; CHECK-NEXT: vzeroupper
85
+ ; CHECK-NEXT: retq
86
+ ; CHECK-NEXT: .LBB0_11: # %vector.body847.preheader
87
+ ; CHECK-NEXT: movl %edx, %esi
88
+ ; CHECK-NEXT: andl $7, %esi
89
+ ; CHECK-NEXT: cmpq $7, %rcx
90
+ ; CHECK-NEXT: jae .LBB0_13
91
+ ; CHECK-NEXT: # %bb.12:
92
+ ; CHECK-NEXT: xorl %ecx, %ecx
93
+ ; CHECK-NEXT: jmp .LBB0_15
94
+ ; CHECK-NEXT: .LBB0_13: # %vector.body847.preheader.new
95
+ ; CHECK-NEXT: andq $-8, %rdx
96
+ ; CHECK-NEXT: xorl %ecx, %ecx
97
+ ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
98
+ ; CHECK-NEXT: .p2align 4, 0x90
99
+ ; CHECK-NEXT: .LBB0_14: # %vector.body847
27
100
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
28
101
; CHECK-NEXT: leaq (%rdi,%rcx), %r8
29
102
; CHECK-NEXT: vmovups %ymm0, 96(%rax,%r8)
@@ -37,6 +110,24 @@ define void @in4dob_(ptr nocapture writeonly %0, ptr nocapture readonly %1, ptr
37
110
; CHECK-NEXT: addq $8, %rcx
38
111
; CHECK-NEXT: cmpq %rcx, %rdx
39
112
; CHECK-NEXT: jne .LBB0_14
113
+ ; CHECK-NEXT: .LBB0_15: # %common.ret.loopexit.unr-lcssa
114
+ ; CHECK-NEXT: testq %rsi, %rsi
115
+ ; CHECK-NEXT: je .LBB0_18
116
+ ; CHECK-NEXT: # %bb.16: # %vector.body847.epil.preheader
117
+ ; CHECK-NEXT: leaq 96(%rcx,%rdi), %rcx
118
+ ; CHECK-NEXT: xorl %edx, %edx
119
+ ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
120
+ ; CHECK-NEXT: .p2align 4, 0x90
121
+ ; CHECK-NEXT: .LBB0_17: # %vector.body847.epil
122
+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
123
+ ; CHECK-NEXT: leaq (%rcx,%rdx), %rdi
124
+ ; CHECK-NEXT: vmovups %ymm0, (%rax,%rdi)
125
+ ; CHECK-NEXT: incq %rdx
126
+ ; CHECK-NEXT: cmpq %rdx, %rsi
127
+ ; CHECK-NEXT: jne .LBB0_17
128
+ ; CHECK-NEXT: .LBB0_18: # %common.ret
129
+ ; CHECK-NEXT: vzeroupper
130
+ ; CHECK-NEXT: retq
40
131
.preheader263:
41
132
%4 = shl i64 %3 , 2
42
133
br label %5
0 commit comments