AMDGPU: Cleanup immediate selection patterns #100787

arsenm · 2024-07-26T17:59:54Z

Reorder for consistency, so the same types for v/s are together.

arsenm · 2024-07-26T18:00:06Z

This stack of pull requests is managed by Graphite. Learn more about stacking.

Join @arsenm and the rest of your teammates on Graphite

llvmbot · 2024-07-26T18:02:02Z

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-globalisel

Author: Matt Arsenault (arsenm)

Changes

Reorder for consistency, so the same types for v/s are together.

Full diff: https://github.com/llvm/llvm-project/pull/100787.diff

1 Files Affected:

(modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+41-38)

diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index d2101654d2acb..bcf778b31d276 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2138,19 +2138,26 @@ def : GCNPat <
 /********** Immediate Patterns **********/
 /********** ================== **********/
 
+// FIXME: Remove VGPRImm. Should be inferrable from register bank.
+
 def : GCNPat <
   (VGPRImm<(i32 imm)>:$imm),
   (V_MOV_B32_e32 imm:$imm)
 >;
 
 def : GCNPat <
-  (VGPRImm<(f32 fpimm)>:$imm),
-  (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm)))
+  (i32 imm:$imm),
+  (S_MOV_B32 imm:$imm)
 >;
 
 def : GCNPat <
-  (i32 imm:$imm),
-  (S_MOV_B32 imm:$imm)
+  (p5 frameindex:$fi),
+  (V_MOV_B32_e32 (p5 (frameindex_to_targetframeindex $fi)))
+>;
+
+def : GCNPat <
+  (p5 frameindex:$fi),
+  (S_MOV_B32 (p5 (frameindex_to_targetframeindex $fi)))
 >;
 
 def : GCNPat <
@@ -2168,40 +2175,34 @@ def : GCNPat <
   (V_MOV_B32_e32 imm:$imm)
 >;
 
-// FIXME: Workaround for ordering issue with peephole optimizer where
-// a register class copy interferes with immediate folding.  Should
-// use s_mov_b32, which can be shrunk to s_movk_i32
 def : GCNPat <
-  (VGPRImm<(f16 fpimm)>:$imm),
-  (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm)))
+  (i16 imm:$imm),
+  (S_MOV_B32 imm:$imm)
 >;
 
 def : GCNPat <
-  (VGPRImm<(bf16 fpimm)>:$imm),
-  (V_MOV_B32_e32 (bf16 (bitcast_fpimm_to_i32 $imm)))
+  (VGPRImm<(f16 fpimm)>:$imm),
+  (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm)))
 >;
 
-// V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit
-// immediate and wil be expanded as needed, but we will only use these patterns
-// for values which can be encoded.
 def : GCNPat <
-  (VGPRImm<(i64 imm)>:$imm),
-  (V_MOV_B64_PSEUDO imm:$imm)
+  (f16 fpimm:$imm),
+  (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm)))
 >;
 
 def : GCNPat <
-  (VGPRImm<(f64 fpimm)>:$imm),
-  (V_MOV_B64_PSEUDO (f64 (bitcast_fpimm_to_i64 $imm)))
+  (VGPRImm<(bf16 fpimm)>:$imm),
+  (V_MOV_B32_e32 (bf16 (bitcast_fpimm_to_i32 $imm)))
 >;
 
 def : GCNPat <
-  (i64 imm:$imm),
-  (S_MOV_B64_IMM_PSEUDO imm:$imm)
+  (bf16 fpimm:$imm),
+  (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm)))
 >;
 
 def : GCNPat <
-  (f64 fpimm:$imm),
-  (S_MOV_B64_IMM_PSEUDO (i64 (bitcast_fpimm_to_i64 fpimm:$imm)))
+  (VGPRImm<(f32 fpimm)>:$imm),
+  (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm)))
 >;
 
 def : GCNPat <
@@ -2210,31 +2211,38 @@ def : GCNPat <
 >;
 
 def : GCNPat <
-  (f16 fpimm:$imm),
-  (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm)))
+  (VGPRImm<(i64 imm)>:$imm),
+  (V_MOV_B64_PSEUDO imm:$imm)
 >;
 
 def : GCNPat <
-  (bf16 fpimm:$imm),
-  (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm)))
+  (i64 InlineImm64:$imm),
+  (S_MOV_B64 InlineImm64:$imm)
 >;
 
 def : GCNPat <
-  (p5 frameindex:$fi),
-  (V_MOV_B32_e32 (p5 (frameindex_to_targetframeindex $fi)))
+  (i64 imm:$imm),
+  (S_MOV_B64_IMM_PSEUDO imm:$imm)
 >;
 
 def : GCNPat <
-  (p5 frameindex:$fi),
-  (S_MOV_B32 (p5 (frameindex_to_targetframeindex $fi)))
+  (VGPRImm<(f64 fpimm)>:$imm),
+  (V_MOV_B64_PSEUDO (f64 (bitcast_fpimm_to_i64 $imm)))
 >;
 
+// V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit
+// immediate and wil be expanded as needed, but we will only use these patterns
+// for values which can be encoded.
 def : GCNPat <
-  (i64 InlineImm64:$imm),
-  (S_MOV_B64 InlineImm64:$imm)
+  (f64 InlineImmFP64:$imm),
+  (S_MOV_B64 (i64 (bitcast_fpimm_to_i64 $imm)))
+>;
+
+def : GCNPat <
+  (f64 fpimm:$imm),
+  (S_MOV_B64_IMM_PSEUDO (i64 (bitcast_fpimm_to_i64 fpimm:$imm)))
 >;
 
-// Set to sign-extended 64-bit value (true = -1, false = 0)
 // Set to sign-extended 64-bit value (true = -1, false = 0)
 def : GCNPat <(i1 imm:$imm),
               (S_MOV_B64 imm:$imm)> {
@@ -2246,11 +2254,6 @@ def : GCNPat <(i1 imm:$imm),
   let WaveSizePredicate = isWave32;
 }
 
-def : GCNPat <
-  (f64 InlineImmFP64:$imm),
-  (S_MOV_B64 (f64 (bitcast_fpimm_to_i64 InlineImmFP64:$imm)))
->;
-
 /********** ================== **********/
 /********** Intrinsic Patterns **********/
 /********** ================== **********/

arsenm · 2024-07-30T14:11:23Z

Merge activity

Jul 30, 10:11 AM EDT: @arsenm started a stack merge that includes this pull request via Graphite.
Jul 30, 10:19 AM EDT: Graphite rebased this pull request as part of a merge.
Jul 30, 10:20 AM EDT: @arsenm merged this pull request with Graphite.

Reorder for consistency, so the same types for v/s are together.

This was referenced Jul 26, 2024

AMDGPU/GlobalISel: Partially move constant selection to patterns #100786

Merged

AMDGPU/GlobalISel: Select all constants in tablegen #100788

Merged

arsenm added backend:AMDGPU llvm:globalisel labels Jul 26, 2024 — with Graphite App

arsenm requested review from gandhi56, jayfoad, mbrkusanin, perlfu, petar-avramovic, Pierre-vh, rovka and Sisyph July 26, 2024 18:01

arsenm marked this pull request as ready for review July 26, 2024 18:02

Sisyph approved these changes Jul 29, 2024

View reviewed changes

arsenm force-pushed the users/arsenm/amdgpu-globalisel-partially-move-constant-selection-to-patterns branch from 4a7ffeb to 1e492ae Compare July 30, 2024 07:26

arsenm force-pushed the users/arsenm/amdgpu-tablegen-immediate-pattern-cleanup branch from 794f20e to 51e2ac2 Compare July 30, 2024 07:26

arsenm force-pushed the users/arsenm/amdgpu-globalisel-partially-move-constant-selection-to-patterns branch from 1e492ae to 8104287 Compare July 30, 2024 14:12

Base automatically changed from users/arsenm/amdgpu-globalisel-partially-move-constant-selection-to-patterns to main July 30, 2024 14:18

AMDGPU: Cleanup immediate selection patterns

35d35e4

Reorder for consistency, so the same types for v/s are together.

arsenm force-pushed the users/arsenm/amdgpu-tablegen-immediate-pattern-cleanup branch from 51e2ac2 to 35d35e4 Compare July 30, 2024 14:18

arsenm merged commit 2033767 into main Jul 30, 2024
4 of 6 checks passed

arsenm deleted the users/arsenm/amdgpu-tablegen-immediate-pattern-cleanup branch July 30, 2024 14:20

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

AMDGPU: Cleanup immediate selection patterns #100787

AMDGPU: Cleanup immediate selection patterns #100787

Uh oh!

arsenm commented Jul 26, 2024

Uh oh!

arsenm commented Jul 26, 2024 •

edited

Loading

Uh oh!

llvmbot commented Jul 26, 2024 •

edited

Loading

Uh oh!

arsenm commented Jul 30, 2024 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

AMDGPU: Cleanup immediate selection patterns #100787

AMDGPU: Cleanup immediate selection patterns #100787

Uh oh!

Conversation

arsenm commented Jul 26, 2024

Uh oh!

arsenm commented Jul 26, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jul 26, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

arsenm commented Jul 30, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Merge activity

Uh oh!

Uh oh!

Uh oh!

arsenm commented Jul 26, 2024 •

edited

Loading

llvmbot commented Jul 26, 2024 •

edited

Loading

arsenm commented Jul 30, 2024 •

edited

Loading