@@ -273,11 +273,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
273
273
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
274
274
; CHECK-NEXT: ds_write_b32 v0, v58
275
275
; CHECK-NEXT: s_branch .LBB0_7
276
- ; CHECK-NEXT: .LBB0_16: ; %Flow43
276
+ ; CHECK-NEXT: .LBB0_16: ; %Flow45
277
277
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
278
278
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s53
279
279
; CHECK-NEXT: v_mov_b32_e32 v57, v0
280
- ; CHECK-NEXT: .LBB0_17: ; %Flow44
280
+ ; CHECK-NEXT: .LBB0_17: ; %Flow46
281
281
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
282
282
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52
283
283
; CHECK-NEXT: s_mov_b32 s49, exec_lo
@@ -323,11 +323,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
323
323
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
324
324
; CHECK-NEXT: ds_write_b32 v0, v57
325
325
; CHECK-NEXT: s_branch .LBB0_19
326
- ; CHECK-NEXT: .LBB0_22: ; %Flow41
326
+ ; CHECK-NEXT: .LBB0_22: ; %Flow43
327
327
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
328
328
; CHECK-NEXT: s_inst_prefetch 0x2
329
329
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52
330
- ; CHECK-NEXT: .LBB0_23: ; %Flow42
330
+ ; CHECK-NEXT: .LBB0_23: ; %Flow44
331
331
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
332
332
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s49
333
333
; CHECK-NEXT: ; %bb.24: ; in Loop: Header=BB0_5 Depth=1
@@ -340,7 +340,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
340
340
; CHECK-NEXT: s_or_b32 s43, s4, s43
341
341
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s43
342
342
; CHECK-NEXT: s_cbranch_execnz .LBB0_5
343
- ; CHECK-NEXT: .LBB0_25: ; %Flow49
343
+ ; CHECK-NEXT: .LBB0_25: ; %Flow51
344
344
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s42
345
345
; CHECK-NEXT: v_mov_b32_e32 v31, v40
346
346
; CHECK-NEXT: v_mov_b32_e32 v0, 1
@@ -362,12 +362,10 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
362
362
; CHECK-NEXT: v_cmpx_gt_u32_e64 v47, v41
363
363
; CHECK-NEXT: s_cbranch_execz .LBB0_33
364
364
; CHECK-NEXT: ; %bb.26:
365
- ; CHECK-NEXT: s_add_u32 s42, s44, 8
366
- ; CHECK-NEXT: s_addc_u32 s43, s45, 0
367
- ; CHECK-NEXT: s_mov_b32 s44, 0
365
+ ; CHECK-NEXT: s_mov_b32 s42, 0
368
366
; CHECK-NEXT: s_branch .LBB0_28
369
367
; CHECK-NEXT: .LBB0_27: ; in Loop: Header=BB0_28 Depth=1
370
- ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s45
368
+ ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s43
371
369
; CHECK-NEXT: v_mov_b32_e32 v31, v40
372
370
; CHECK-NEXT: v_mov_b32_e32 v0, 0
373
371
; CHECK-NEXT: s_add_u32 s8, s34, 40
@@ -383,12 +381,12 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
383
381
; CHECK-NEXT: s_swappc_b64 s[30:31], s[6:7]
384
382
; CHECK-NEXT: v_add_co_u32 v41, vcc_lo, v0, v41
385
383
; CHECK-NEXT: v_cmp_le_u32_e32 vcc_lo, v47, v41
386
- ; CHECK-NEXT: s_or_b32 s44 , vcc_lo, s44
387
- ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
384
+ ; CHECK-NEXT: s_or_b32 s42 , vcc_lo, s42
385
+ ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s42
388
386
; CHECK-NEXT: s_cbranch_execz .LBB0_33
389
387
; CHECK-NEXT: .LBB0_28: ; =>This Inner Loop Header: Depth=1
390
388
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v41
391
- ; CHECK-NEXT: s_mov_b32 s45 , exec_lo
389
+ ; CHECK-NEXT: s_mov_b32 s43 , exec_lo
392
390
; CHECK-NEXT: ds_read_b32 v0, v0
393
391
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
394
392
; CHECK-NEXT: v_lshrrev_b32_e32 v63, 10, v0
@@ -397,15 +395,15 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
397
395
; CHECK-NEXT: v_mul_u32_u24_e32 v1, 0x180, v63
398
396
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 5, v62
399
397
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 5, v72
400
- ; CHECK-NEXT: v_add_co_u32 v2, s4, s42 , v1
401
- ; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s43 , 0, s4
398
+ ; CHECK-NEXT: v_add_co_u32 v2, s4, s44 , v1
399
+ ; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s45 , 0, s4
402
400
; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
403
401
; CHECK-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo
404
402
; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
405
403
; CHECK-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
406
404
; CHECK-NEXT: s_clause 0x1
407
- ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
408
- ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
405
+ ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:8
406
+ ; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:8
409
407
; CHECK-NEXT: s_waitcnt vmcnt(0)
410
408
; CHECK-NEXT: v_xor_b32_e32 v46, v9, v5
411
409
; CHECK-NEXT: v_xor_b32_e32 v45, v8, v4
@@ -417,8 +415,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
417
415
; CHECK-NEXT: s_cbranch_execz .LBB0_27
418
416
; CHECK-NEXT: ; %bb.29: ; in Loop: Header=BB0_28 Depth=1
419
417
; CHECK-NEXT: s_clause 0x1
420
- ; CHECK-NEXT: global_load_dwordx2 v[58:59], v[2:3], off offset:16
421
- ; CHECK-NEXT: global_load_dwordx2 v[60:61], v[0:1], off offset:16
418
+ ; CHECK-NEXT: global_load_dwordx2 v[58:59], v[2:3], off offset:24
419
+ ; CHECK-NEXT: global_load_dwordx2 v[60:61], v[0:1], off offset:24
422
420
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 4, v45
423
421
; CHECK-NEXT: v_alignbit_b32 v1, v46, v45, 12
424
422
; CHECK-NEXT: v_and_b32_e32 v2, 0xf0000, v45
0 commit comments