@@ -312,11 +312,46 @@ define <4 x float> @ulto4f32(<4 x i64> %a) {
312
312
ret <4 x float > %b
313
313
}
314
314
315
+ define <4 x float > @ulto4f32_nneg (<4 x i64 > %a ) {
316
+ ; NODQ-LABEL: ulto4f32_nneg:
317
+ ; NODQ: # %bb.0:
318
+ ; NODQ-NEXT: vpextrq $1, %xmm0, %rax
319
+ ; NODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
320
+ ; NODQ-NEXT: vmovq %xmm0, %rax
321
+ ; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
322
+ ; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
323
+ ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
324
+ ; NODQ-NEXT: vmovq %xmm0, %rax
325
+ ; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
326
+ ; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
327
+ ; NODQ-NEXT: vpextrq $1, %xmm0, %rax
328
+ ; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
329
+ ; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
330
+ ; NODQ-NEXT: vzeroupper
331
+ ; NODQ-NEXT: retq
332
+ ;
333
+ ; VLDQ-LABEL: ulto4f32_nneg:
334
+ ; VLDQ: # %bb.0:
335
+ ; VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0
336
+ ; VLDQ-NEXT: vzeroupper
337
+ ; VLDQ-NEXT: retq
338
+ ;
339
+ ; DQNOVL-LABEL: ulto4f32_nneg:
340
+ ; DQNOVL: # %bb.0:
341
+ ; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
342
+ ; DQNOVL-NEXT: vcvtqq2ps %zmm0, %ymm0
343
+ ; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
344
+ ; DQNOVL-NEXT: vzeroupper
345
+ ; DQNOVL-NEXT: retq
346
+ %b = uitofp nneg <4 x i64 > %a to <4 x float >
347
+ ret <4 x float > %b
348
+ }
349
+
315
350
define <8 x double > @ulto8f64 (<8 x i64 > %a ) {
316
351
; NODQ-LABEL: ulto8f64:
317
352
; NODQ: # %bb.0:
318
353
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
319
- ; NODQ-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, % zmm1
354
+ ; NODQ-NEXT: vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
320
355
; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0
321
356
; NODQ-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
322
357
; NODQ-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
@@ -342,14 +377,14 @@ define <16 x double> @ulto16f64(<16 x i64> %a) {
342
377
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4294967295,4294967295,4294967295,4294967295,4294967295,4294967295,4294967295,4294967295]
343
378
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm3 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
344
379
; NODQ-NEXT: vmovdqa64 %zmm3, %zmm4
345
- ; NODQ-NEXT: vpternlogq $248, %zmm2, %zmm0, % zmm4
380
+ ; NODQ-NEXT: vpternlogq {{.*#+}} zmm4 = zmm4 | (zmm0 & zmm2)
346
381
; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0
347
382
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm5 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
348
383
; NODQ-NEXT: vporq %zmm5, %zmm0, %zmm0
349
384
; NODQ-NEXT: vbroadcastsd {{.*#+}} zmm6 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
350
385
; NODQ-NEXT: vsubpd %zmm6, %zmm0, %zmm0
351
386
; NODQ-NEXT: vaddpd %zmm0, %zmm4, %zmm0
352
- ; NODQ-NEXT: vpternlogq $248, %zmm2, %zmm1, % zmm3
387
+ ; NODQ-NEXT: vpternlogq {{.*#+}} zmm3 = zmm3 | (zmm1 & zmm2)
353
388
; NODQ-NEXT: vpsrlq $32, %zmm1, %zmm1
354
389
; NODQ-NEXT: vporq %zmm5, %zmm1, %zmm1
355
390
; NODQ-NEXT: vsubpd %zmm6, %zmm1, %zmm1
@@ -1483,7 +1518,7 @@ define <16 x float> @sbto16f32(<16 x i32> %a) {
1483
1518
; NODQ: # %bb.0:
1484
1519
; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1485
1520
; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1486
- ; NODQ-NEXT: vpternlogd $255, % zmm0, %zmm0, %zmm0 {%k1} {z}
1521
+ ; NODQ-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
1487
1522
; NODQ-NEXT: vcvtdq2ps %zmm0, %zmm0
1488
1523
; NODQ-NEXT: retq
1489
1524
;
@@ -1564,7 +1599,7 @@ define <16 x double> @sbto16f64(<16 x double> %a) {
1564
1599
; NODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0
1565
1600
; NODQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
1566
1601
; NODQ-NEXT: kunpckbw %k0, %k1, %k1
1567
- ; NODQ-NEXT: vpternlogd $255, % zmm1, %zmm1, %zmm1 {%k1} {z}
1602
+ ; NODQ-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1
1568
1603
; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0
1569
1604
; NODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1570
1605
; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm1
@@ -1603,7 +1638,7 @@ define <8 x double> @sbto8f64(<8 x double> %a) {
1603
1638
; NOVLDQ: # %bb.0:
1604
1639
; NOVLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1605
1640
; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
1606
- ; NOVLDQ-NEXT: vpternlogd $255, % zmm0, %zmm0, %zmm0 {%k1} {z}
1641
+ ; NOVLDQ-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
1607
1642
; NOVLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
1608
1643
; NOVLDQ-NEXT: retq
1609
1644
;
@@ -1864,7 +1899,7 @@ define <16 x float> @ubto16f32(<16 x i32> %a) {
1864
1899
; NODQ: # %bb.0:
1865
1900
; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1866
1901
; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1867
- ; NODQ-NEXT: vpternlogd $255, % zmm0, %zmm0, %zmm0 {%k1} {z}
1902
+ ; NODQ-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
1868
1903
; NODQ-NEXT: vpsrld $31, %zmm0, %zmm0
1869
1904
; NODQ-NEXT: vcvtdq2ps %zmm0, %zmm0
1870
1905
; NODQ-NEXT: retq
@@ -1894,7 +1929,7 @@ define <16 x double> @ubto16f64(<16 x i32> %a) {
1894
1929
; NODQ: # %bb.0:
1895
1930
; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1896
1931
; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1897
- ; NODQ-NEXT: vpternlogd $255, % zmm0, %zmm0, %zmm0 {%k1} {z}
1932
+ ; NODQ-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
1898
1933
; NODQ-NEXT: vpsrld $31, %zmm0, %zmm1
1899
1934
; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0
1900
1935
; NODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
0 commit comments