1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix =X32
2
+ ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes =X32,X32-NOSSE
3
3
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
4
4
; RUN: llc < %s -mtriple=i686-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X32-POPCNT
5
5
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X64-POPCNT
6
+ ; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X32,X32-SSE2
7
+ ; RUN: llc < %s -mtriple=i686-unknown -mattr=ssse3 | FileCheck %s --check-prefixes=X32,X32-SSSE3
6
8
7
9
define i8 @cnt8 (i8 %x ) nounwind readnone {
8
10
; X32-LABEL: cnt8:
@@ -172,7 +174,127 @@ define i32 @cnt32(i32 %x) nounwind readnone {
172
174
}
173
175
174
176
define i64 @cnt64 (i64 %x ) nounwind readnone {
175
- ; X32-LABEL: cnt64:
177
+ ; X32-NOSSE-LABEL: cnt64:
178
+ ; X32-NOSSE: # %bb.0:
179
+ ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
180
+ ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
181
+ ; X32-NOSSE-NEXT: movl %ecx, %edx
182
+ ; X32-NOSSE-NEXT: shrl %edx
183
+ ; X32-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
184
+ ; X32-NOSSE-NEXT: subl %edx, %ecx
185
+ ; X32-NOSSE-NEXT: movl %ecx, %edx
186
+ ; X32-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
187
+ ; X32-NOSSE-NEXT: shrl $2, %ecx
188
+ ; X32-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333
189
+ ; X32-NOSSE-NEXT: addl %edx, %ecx
190
+ ; X32-NOSSE-NEXT: movl %ecx, %edx
191
+ ; X32-NOSSE-NEXT: shrl $4, %edx
192
+ ; X32-NOSSE-NEXT: addl %ecx, %edx
193
+ ; X32-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
194
+ ; X32-NOSSE-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101
195
+ ; X32-NOSSE-NEXT: shrl $24, %ecx
196
+ ; X32-NOSSE-NEXT: movl %eax, %edx
197
+ ; X32-NOSSE-NEXT: shrl %edx
198
+ ; X32-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
199
+ ; X32-NOSSE-NEXT: subl %edx, %eax
200
+ ; X32-NOSSE-NEXT: movl %eax, %edx
201
+ ; X32-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
202
+ ; X32-NOSSE-NEXT: shrl $2, %eax
203
+ ; X32-NOSSE-NEXT: andl $858993459, %eax # imm = 0x33333333
204
+ ; X32-NOSSE-NEXT: addl %edx, %eax
205
+ ; X32-NOSSE-NEXT: movl %eax, %edx
206
+ ; X32-NOSSE-NEXT: shrl $4, %edx
207
+ ; X32-NOSSE-NEXT: addl %eax, %edx
208
+ ; X32-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
209
+ ; X32-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
210
+ ; X32-NOSSE-NEXT: shrl $24, %eax
211
+ ; X32-NOSSE-NEXT: addl %ecx, %eax
212
+ ; X32-NOSSE-NEXT: xorl %edx, %edx
213
+ ; X32-NOSSE-NEXT: retl
214
+ ;
215
+ ; X64-LABEL: cnt64:
216
+ ; X64: # %bb.0:
217
+ ; X64-NEXT: movq %rdi, %rax
218
+ ; X64-NEXT: shrq %rax
219
+ ; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
220
+ ; X64-NEXT: andq %rax, %rcx
221
+ ; X64-NEXT: subq %rcx, %rdi
222
+ ; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
223
+ ; X64-NEXT: movq %rdi, %rcx
224
+ ; X64-NEXT: andq %rax, %rcx
225
+ ; X64-NEXT: shrq $2, %rdi
226
+ ; X64-NEXT: andq %rax, %rdi
227
+ ; X64-NEXT: addq %rcx, %rdi
228
+ ; X64-NEXT: movq %rdi, %rax
229
+ ; X64-NEXT: shrq $4, %rax
230
+ ; X64-NEXT: leaq (%rax,%rdi), %rax
231
+ ; X64-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
232
+ ; X64-NEXT: andq %rax, %rcx
233
+ ; X64-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
234
+ ; X64-NEXT: imulq %rcx, %rax
235
+ ; X64-NEXT: shrq $56, %rax
236
+ ; X64-NEXT: retq
237
+ ;
238
+ ; X32-POPCNT-LABEL: cnt64:
239
+ ; X32-POPCNT: # %bb.0:
240
+ ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
241
+ ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
242
+ ; X32-POPCNT-NEXT: addl %ecx, %eax
243
+ ; X32-POPCNT-NEXT: xorl %edx, %edx
244
+ ; X32-POPCNT-NEXT: retl
245
+ ;
246
+ ; X64-POPCNT-LABEL: cnt64:
247
+ ; X64-POPCNT: # %bb.0:
248
+ ; X64-POPCNT-NEXT: popcntq %rdi, %rax
249
+ ; X64-POPCNT-NEXT: retq
250
+ ;
251
+ ; X32-SSE2-LABEL: cnt64:
252
+ ; X32-SSE2: # %bb.0:
253
+ ; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
254
+ ; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
255
+ ; X32-SSE2-NEXT: psrlw $1, %xmm1
256
+ ; X32-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1
257
+ ; X32-SSE2-NEXT: psubb %xmm1, %xmm0
258
+ ; X32-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
259
+ ; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
260
+ ; X32-SSE2-NEXT: pand %xmm1, %xmm2
261
+ ; X32-SSE2-NEXT: psrlw $2, %xmm0
262
+ ; X32-SSE2-NEXT: pand %xmm1, %xmm0
263
+ ; X32-SSE2-NEXT: paddb %xmm2, %xmm0
264
+ ; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
265
+ ; X32-SSE2-NEXT: psrlw $4, %xmm1
266
+ ; X32-SSE2-NEXT: paddb %xmm0, %xmm1
267
+ ; X32-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1
268
+ ; X32-SSE2-NEXT: pxor %xmm0, %xmm0
269
+ ; X32-SSE2-NEXT: psadbw %xmm1, %xmm0
270
+ ; X32-SSE2-NEXT: movd %xmm0, %eax
271
+ ; X32-SSE2-NEXT: xorl %edx, %edx
272
+ ; X32-SSE2-NEXT: retl
273
+ ;
274
+ ; X32-SSSE3-LABEL: cnt64:
275
+ ; X32-SSSE3: # %bb.0:
276
+ ; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
277
+ ; X32-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
278
+ ; X32-SSSE3-NEXT: movdqa %xmm1, %xmm2
279
+ ; X32-SSSE3-NEXT: pand %xmm0, %xmm2
280
+ ; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
281
+ ; X32-SSSE3-NEXT: movdqa %xmm3, %xmm4
282
+ ; X32-SSSE3-NEXT: pshufb %xmm2, %xmm4
283
+ ; X32-SSSE3-NEXT: psrlw $4, %xmm1
284
+ ; X32-SSSE3-NEXT: pand %xmm0, %xmm1
285
+ ; X32-SSSE3-NEXT: pshufb %xmm1, %xmm3
286
+ ; X32-SSSE3-NEXT: paddb %xmm4, %xmm3
287
+ ; X32-SSSE3-NEXT: pxor %xmm0, %xmm0
288
+ ; X32-SSSE3-NEXT: psadbw %xmm3, %xmm0
289
+ ; X32-SSSE3-NEXT: movd %xmm0, %eax
290
+ ; X32-SSSE3-NEXT: xorl %edx, %edx
291
+ ; X32-SSSE3-NEXT: retl
292
+ %cnt = tail call i64 @llvm.ctpop.i64 (i64 %x )
293
+ ret i64 %cnt
294
+ }
295
+
296
+ define i64 @cnt64_noimplicitfloat (i64 %x ) nounwind readnone noimplicitfloat {
297
+ ; X32-LABEL: cnt64_noimplicitfloat:
176
298
; X32: # %bb.0:
177
299
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
178
300
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -210,7 +332,7 @@ define i64 @cnt64(i64 %x) nounwind readnone {
210
332
; X32-NEXT: xorl %edx, %edx
211
333
; X32-NEXT: retl
212
334
;
213
- ; X64-LABEL: cnt64 :
335
+ ; X64-LABEL: cnt64_noimplicitfloat :
214
336
; X64: # %bb.0:
215
337
; X64-NEXT: movq %rdi, %rax
216
338
; X64-NEXT: shrq %rax
@@ -233,15 +355,15 @@ define i64 @cnt64(i64 %x) nounwind readnone {
233
355
; X64-NEXT: shrq $56, %rax
234
356
; X64-NEXT: retq
235
357
;
236
- ; X32-POPCNT-LABEL: cnt64 :
358
+ ; X32-POPCNT-LABEL: cnt64_noimplicitfloat :
237
359
; X32-POPCNT: # %bb.0:
238
360
; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
239
361
; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
240
362
; X32-POPCNT-NEXT: addl %ecx, %eax
241
363
; X32-POPCNT-NEXT: xorl %edx, %edx
242
364
; X32-POPCNT-NEXT: retl
243
365
;
244
- ; X64-POPCNT-LABEL: cnt64 :
366
+ ; X64-POPCNT-LABEL: cnt64_noimplicitfloat :
245
367
; X64-POPCNT: # %bb.0:
246
368
; X64-POPCNT-NEXT: popcntq %rdi, %rax
247
369
; X64-POPCNT-NEXT: retq
0 commit comments