@@ -310,29 +310,43 @@ define <4 x bfloat> @sitofp_i32(<4 x i32> %a) #0 {
310
310
define <4 x bfloat> @sitofp_i64 (<4 x i64 > %a ) #0 {
311
311
; CHECK-CVT-LABEL: sitofp_i64:
312
312
; CHECK-CVT: // %bb.0:
313
- ; CHECK-CVT-NEXT: scvtf v0.2d, v0.2d
314
- ; CHECK-CVT-NEXT: scvtf v1.2d, v1.2d
315
- ; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8
316
- ; CHECK-CVT-NEXT: fcvtn v0.2s, v0.2d
317
- ; CHECK-CVT-NEXT: fcvtn2 v0.4s, v1.2d
318
- ; CHECK-CVT-NEXT: movi v1.4s, #1
319
- ; CHECK-CVT-NEXT: ushr v3.4s, v0.4s, #16
320
- ; CHECK-CVT-NEXT: add v2.4s, v0.4s, v2.4s
321
- ; CHECK-CVT-NEXT: and v1.16b, v3.16b, v1.16b
322
- ; CHECK-CVT-NEXT: fcmeq v3.4s, v0.4s, v0.4s
323
- ; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16
324
- ; CHECK-CVT-NEXT: add v1.4s, v1.4s, v2.4s
325
- ; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v3.16b
313
+ ; CHECK-CVT-NEXT: mov x8, v0.d[1]
314
+ ; CHECK-CVT-NEXT: fmov x9, d0
315
+ ; CHECK-CVT-NEXT: scvtf s2, x9
316
+ ; CHECK-CVT-NEXT: mov x9, v1.d[1]
317
+ ; CHECK-CVT-NEXT: scvtf s0, x8
318
+ ; CHECK-CVT-NEXT: fmov x8, d1
319
+ ; CHECK-CVT-NEXT: scvtf s1, x8
320
+ ; CHECK-CVT-NEXT: mov v2.s[1], v0.s[0]
321
+ ; CHECK-CVT-NEXT: scvtf s0, x9
322
+ ; CHECK-CVT-NEXT: mov v2.s[2], v1.s[0]
323
+ ; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8
324
+ ; CHECK-CVT-NEXT: mov v2.s[3], v0.s[0]
325
+ ; CHECK-CVT-NEXT: movi v0.4s, #1
326
+ ; CHECK-CVT-NEXT: ushr v3.4s, v2.4s, #16
327
+ ; CHECK-CVT-NEXT: add v1.4s, v2.4s, v1.4s
328
+ ; CHECK-CVT-NEXT: and v0.16b, v3.16b, v0.16b
329
+ ; CHECK-CVT-NEXT: fcmeq v3.4s, v2.4s, v2.4s
330
+ ; CHECK-CVT-NEXT: orr v2.4s, #64, lsl #16
331
+ ; CHECK-CVT-NEXT: add v0.4s, v0.4s, v1.4s
332
+ ; CHECK-CVT-NEXT: bif v0.16b, v2.16b, v3.16b
326
333
; CHECK-CVT-NEXT: shrn v0.4h, v0.4s, #16
327
334
; CHECK-CVT-NEXT: ret
328
335
;
329
336
; CHECK-BF16-LABEL: sitofp_i64:
330
337
; CHECK-BF16: // %bb.0:
331
- ; CHECK-BF16-NEXT: scvtf v0.2d, v0.2d
332
- ; CHECK-BF16-NEXT: scvtf v1.2d, v1.2d
333
- ; CHECK-BF16-NEXT: fcvtn v0.2s, v0.2d
334
- ; CHECK-BF16-NEXT: fcvtn2 v0.4s, v1.2d
335
- ; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s
338
+ ; CHECK-BF16-NEXT: mov x8, v0.d[1]
339
+ ; CHECK-BF16-NEXT: fmov x9, d0
340
+ ; CHECK-BF16-NEXT: scvtf s2, x9
341
+ ; CHECK-BF16-NEXT: mov x9, v1.d[1]
342
+ ; CHECK-BF16-NEXT: scvtf s0, x8
343
+ ; CHECK-BF16-NEXT: fmov x8, d1
344
+ ; CHECK-BF16-NEXT: mov v2.s[1], v0.s[0]
345
+ ; CHECK-BF16-NEXT: scvtf s0, x8
346
+ ; CHECK-BF16-NEXT: mov v2.s[2], v0.s[0]
347
+ ; CHECK-BF16-NEXT: scvtf s0, x9
348
+ ; CHECK-BF16-NEXT: mov v2.s[3], v0.s[0]
349
+ ; CHECK-BF16-NEXT: bfcvtn v0.4h, v2.4s
336
350
; CHECK-BF16-NEXT: ret
337
351
%1 = sitofp <4 x i64 > %a to <4 x bfloat>
338
352
ret <4 x bfloat> %1
@@ -413,29 +427,43 @@ define <4 x bfloat> @uitofp_i32(<4 x i32> %a) #0 {
413
427
define <4 x bfloat> @uitofp_i64 (<4 x i64 > %a ) #0 {
414
428
; CHECK-CVT-LABEL: uitofp_i64:
415
429
; CHECK-CVT: // %bb.0:
416
- ; CHECK-CVT-NEXT: ucvtf v0.2d, v0.2d
417
- ; CHECK-CVT-NEXT: ucvtf v1.2d, v1.2d
418
- ; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8
419
- ; CHECK-CVT-NEXT: fcvtn v0.2s, v0.2d
420
- ; CHECK-CVT-NEXT: fcvtn2 v0.4s, v1.2d
421
- ; CHECK-CVT-NEXT: movi v1.4s, #1
422
- ; CHECK-CVT-NEXT: ushr v3.4s, v0.4s, #16
423
- ; CHECK-CVT-NEXT: add v2.4s, v0.4s, v2.4s
424
- ; CHECK-CVT-NEXT: and v1.16b, v3.16b, v1.16b
425
- ; CHECK-CVT-NEXT: fcmeq v3.4s, v0.4s, v0.4s
426
- ; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16
427
- ; CHECK-CVT-NEXT: add v1.4s, v1.4s, v2.4s
428
- ; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v3.16b
430
+ ; CHECK-CVT-NEXT: mov x8, v0.d[1]
431
+ ; CHECK-CVT-NEXT: fmov x9, d0
432
+ ; CHECK-CVT-NEXT: ucvtf s2, x9
433
+ ; CHECK-CVT-NEXT: mov x9, v1.d[1]
434
+ ; CHECK-CVT-NEXT: ucvtf s0, x8
435
+ ; CHECK-CVT-NEXT: fmov x8, d1
436
+ ; CHECK-CVT-NEXT: ucvtf s1, x8
437
+ ; CHECK-CVT-NEXT: mov v2.s[1], v0.s[0]
438
+ ; CHECK-CVT-NEXT: ucvtf s0, x9
439
+ ; CHECK-CVT-NEXT: mov v2.s[2], v1.s[0]
440
+ ; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8
441
+ ; CHECK-CVT-NEXT: mov v2.s[3], v0.s[0]
442
+ ; CHECK-CVT-NEXT: movi v0.4s, #1
443
+ ; CHECK-CVT-NEXT: ushr v3.4s, v2.4s, #16
444
+ ; CHECK-CVT-NEXT: add v1.4s, v2.4s, v1.4s
445
+ ; CHECK-CVT-NEXT: and v0.16b, v3.16b, v0.16b
446
+ ; CHECK-CVT-NEXT: fcmeq v3.4s, v2.4s, v2.4s
447
+ ; CHECK-CVT-NEXT: orr v2.4s, #64, lsl #16
448
+ ; CHECK-CVT-NEXT: add v0.4s, v0.4s, v1.4s
449
+ ; CHECK-CVT-NEXT: bif v0.16b, v2.16b, v3.16b
429
450
; CHECK-CVT-NEXT: shrn v0.4h, v0.4s, #16
430
451
; CHECK-CVT-NEXT: ret
431
452
;
432
453
; CHECK-BF16-LABEL: uitofp_i64:
433
454
; CHECK-BF16: // %bb.0:
434
- ; CHECK-BF16-NEXT: ucvtf v0.2d, v0.2d
435
- ; CHECK-BF16-NEXT: ucvtf v1.2d, v1.2d
436
- ; CHECK-BF16-NEXT: fcvtn v0.2s, v0.2d
437
- ; CHECK-BF16-NEXT: fcvtn2 v0.4s, v1.2d
438
- ; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s
455
+ ; CHECK-BF16-NEXT: mov x8, v0.d[1]
456
+ ; CHECK-BF16-NEXT: fmov x9, d0
457
+ ; CHECK-BF16-NEXT: ucvtf s2, x9
458
+ ; CHECK-BF16-NEXT: mov x9, v1.d[1]
459
+ ; CHECK-BF16-NEXT: ucvtf s0, x8
460
+ ; CHECK-BF16-NEXT: fmov x8, d1
461
+ ; CHECK-BF16-NEXT: mov v2.s[1], v0.s[0]
462
+ ; CHECK-BF16-NEXT: ucvtf s0, x8
463
+ ; CHECK-BF16-NEXT: mov v2.s[2], v0.s[0]
464
+ ; CHECK-BF16-NEXT: ucvtf s0, x9
465
+ ; CHECK-BF16-NEXT: mov v2.s[3], v0.s[0]
466
+ ; CHECK-BF16-NEXT: bfcvtn v0.4h, v2.4s
439
467
; CHECK-BF16-NEXT: ret
440
468
%1 = uitofp <4 x i64 > %a to <4 x bfloat>
441
469
ret <4 x bfloat> %1
0 commit comments