Skip to content

riscv64: Add Zba extension instructions #6087

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Mar 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1121,9 +1121,15 @@
(let ((val Reg (value_regs_get val 0)))
(alu_rr_imm12 (AluOPRRI.Zexth) val (imm12_const 0))))

;; With `zba` we have a `zext.w` instruction
(rule 2 (extend val (ExtendOp.Zero) $I32 $I64)
(if-let $true (has_zba))
(let ((val Reg (value_regs_get val 0)))
(alu_rrr (AluOPRRR.Adduw) val (zero_reg))))

;;; Signed rules extending to I128
;; Extend the bottom part, and extract the sign bit from the bottom as the top
(rule 2 (extend val (ExtendOp.Signed) (fits_in_64 from_ty) $I128)
(rule 3 (extend val (ExtendOp.Signed) (fits_in_64 from_ty) $I128)
(let ((val Reg (value_regs_get val 0))
(low Reg (extend val (ExtendOp.Signed) from_ty $I64))
(high Reg (alu_rr_imm12 (AluOPRRI.Srai) low (imm12_const 63))))
Expand Down
14 changes: 12 additions & 2 deletions cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,18 +248,28 @@ fn test_riscv64_binemit() {
0x28755593,
));

//
insns.push(TestUnit::new(
Inst::AluRRR {
alu_op: AluOPRRR::Adduw,
rd: writable_a1(),
rs1: a0(),
rs2: zero_reg(),
},
"add.uw a1,a0,zero",
"zext.w a1,a0",
0x80505bb,
));

insns.push(TestUnit::new(
Inst::AluRRR {
alu_op: AluOPRRR::Adduw,
rd: writable_a1(),
rs1: a0(),
rs2: a1(),
},
"add.uw a1,a0,a1",
0x08b505bb,
));

insns.push(TestUnit::new(
Inst::AluRRR {
alu_op: AluOPRRR::Andn,
Expand Down
15 changes: 11 additions & 4 deletions cranelift/codegen/src/isa/riscv64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1220,10 +1220,17 @@ impl Inst {
rs1,
rs2,
} => {
let rs1 = format_reg(rs1, allocs);
let rs2 = format_reg(rs2, allocs);
let rd = format_reg(rd.to_reg(), allocs);
format!("{} {},{},{}", alu_op.op_name(), rd, rs1, rs2,)
let rs1_s = format_reg(rs1, allocs);
let rs2_s = format_reg(rs2, allocs);
let rd_s = format_reg(rd.to_reg(), allocs);
match alu_op {
AluOPRRR::Adduw if rs2 == zero_reg() => {
format!("zext.w {},{}", rd_s, rs1_s)
}
_ => {
format!("{} {},{},{}", alu_op.op_name(), rd_s, rs1_s, rs2_s)
}
}
}
&Inst::FpuRR {
frm,
Expand Down
110 changes: 96 additions & 14 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,9 @@


;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type (fits_in_32 ty) (iadd x y)))
(alu_rrr (AluOPRRR.Addw) x y))

;; Base case, simply adding things in registers.
(rule -2 (lower (has_type (fits_in_64 ty) (iadd x y)))
(rule 0 (lower (has_type (fits_in_64 ty) (iadd x y)))
(alu_add x y))

;; Special cases for when one operand is an immediate that fits in 12 bits.
Expand All @@ -40,17 +38,63 @@
(rule 2 (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y)))
(alu_rr_imm12 (select_addi ty) y x))

(rule
(lower (has_type $I128 (iadd x y)))
(let
( ;; low part.
(low Reg (alu_add (value_regs_get x 0) (value_regs_get y 0)))
;; compute carry.
(carry Reg (alu_rrr (AluOPRRR.SltU) low (value_regs_get y 0)))
;;
(high_tmp Reg (alu_add (value_regs_get x 1) (value_regs_get y 1)))
;; add carry.
(high Reg (alu_add high_tmp carry)))
;; Special case when one of the operands is uextended
;; Needs `Zba`
(rule 3 (lower (has_type $I64 (iadd x (uextend y @ (value_type $I32)))))
(if-let $true (has_zba))
(alu_rrr (AluOPRRR.Adduw) y x))

(rule 4 (lower (has_type $I64 (iadd (uextend x @ (value_type $I32)) y)))
(if-let $true (has_zba))
(alu_rrr (AluOPRRR.Adduw) x y))

;; Add with const shift. We have a few of these instructions with `Zba`.
(decl pure partial match_shnadd (Imm64) AluOPRRR)
(rule (match_shnadd (u64_from_imm64 1)) (AluOPRRR.Sh1add))
(rule (match_shnadd (u64_from_imm64 2)) (AluOPRRR.Sh2add))
(rule (match_shnadd (u64_from_imm64 3)) (AluOPRRR.Sh3add))

(rule 3 (lower (has_type $I64 (iadd x (ishl y (maybe_uextend (iconst n))))))
(if-let $true (has_zba))
(if-let shnadd (match_shnadd n))
(alu_rrr shnadd y x))

(rule 4 (lower (has_type $I64 (iadd (ishl x (maybe_uextend (iconst n))) y)))
(if-let $true (has_zba))
(if-let shnadd (match_shnadd n))
(alu_rrr shnadd x y))


;; Add with uextended const shift. We have a few of these instructions with `Zba`.
;;
;; !!! Important !!!
;; These rules only work for (ishl (uextend _) _) and not for (uextend (ishl _ _))!
;; Getting this wrong means a potential misscalculation of the shift amount.
;; Additionaly we can only ensure that this is correct if the uextend is 32 to 64 bits.
(decl pure partial match_shnadd_uw (Imm64) AluOPRRR)
(rule (match_shnadd_uw (u64_from_imm64 1)) (AluOPRRR.Sh1adduw))
(rule (match_shnadd_uw (u64_from_imm64 2)) (AluOPRRR.Sh2adduw))
(rule (match_shnadd_uw (u64_from_imm64 3)) (AluOPRRR.Sh3adduw))

(rule 5 (lower (has_type $I64 (iadd x (ishl (uextend y @ (value_type $I32)) (maybe_uextend (iconst n))))))
(if-let $true (has_zba))
(if-let shnadd_uw (match_shnadd_uw n))
(alu_rrr shnadd_uw y x))

(rule 6 (lower (has_type $I64 (iadd (ishl (uextend x @ (value_type $I32)) (maybe_uextend (iconst n))) y)))
(if-let $true (has_zba))
(if-let shnadd_uw (match_shnadd_uw n))
(alu_rrr shnadd_uw x y))

;; I128 cases
(rule 7 (lower (has_type $I128 (iadd x y)))
(let ((low Reg (alu_add (value_regs_get x 0) (value_regs_get y 0)))
;; compute carry.
(carry Reg (alu_rrr (AluOPRRR.SltU) low (value_regs_get y 0)))
;;
(high_tmp Reg (alu_add (value_regs_get x 1) (value_regs_get y 1)))
;; add carry.
(high Reg (alu_add high_tmp carry)))
(value_regs low high)))

;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;;
Expand Down Expand Up @@ -355,6 +399,38 @@
(rule (lower (has_type out_ty (sextend val @ (value_type in_ty))))
(sext val in_ty out_ty))

;; The instructions below are present in RV64I and sign-extend the result to 64 bits.

(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (iadd x y)))))
(alu_rrr (AluOPRRR.Addw) x y))

(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (isub x y)))))
(alu_rrr (AluOPRRR.Subw) x y))

(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (ishl x y)))))
(alu_rrr (AluOPRRR.Sllw) x (value_regs_get y 0)))

(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (ushr x y)))))
(alu_rrr (AluOPRRR.Srlw) x (value_regs_get y 0)))

(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (sshr x y)))))
(alu_rrr (AluOPRRR.Sraw) x (value_regs_get y 0)))


(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (iadd x (imm12_from_value y))))))
(alu_rr_imm12 (AluOPRRI.Addiw) x y))

(rule 3 (lower (has_type $I64 (sextend (has_type $I32 (iadd (imm12_from_value x) y)))))
(alu_rr_imm12 (AluOPRRI.Addiw) y x))

(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (ishl x (imm12_from_value y))))))
(alu_rr_imm12 (AluOPRRI.Slliw) x y))

(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (ushr x (imm12_from_value y))))))
(alu_rr_imm12 (AluOPRRI.SrliW) x y))

(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (sshr x (imm12_from_value y))))))
(alu_rr_imm12 (AluOPRRI.Sraiw) x y))

;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (fits_in_64 ty) (popcnt x)))
Expand Down Expand Up @@ -385,6 +461,12 @@
(rule 1 (lower (has_type $I64 (ishl x y)))
(alu_rrr (AluOPRRR.Sll) x (value_regs_get y 0)))

;; With `Zba` we have a shift that zero extends the LHS argument.
(rule 3 (lower (has_type $I64 (ishl (uextend x @ (value_type $I32)) (maybe_uextend (imm12_from_value y)))))
(if-let $true (has_zba))
(alu_rr_imm12 (AluOPRRI.SlliUw) x y))

;; I128 cases
(rule 0 (lower (has_type $I128 (ishl x y)))
(lower_i128_ishl x y))

Expand Down
179 changes: 179 additions & 0 deletions cranelift/filetests/filetests/isa/riscv64/arithmetic-extends.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
test compile precise-output
set unwind_info=false
target riscv64

function %sext_add_i32(i32, i32) -> i64 {
block0(v0: i32, v1: i32):
v2 = iadd.i32 v0, v1
v3 = sextend.i64 v2
return v3
}

; VCode:
; block0:
; addw a0,a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addw a0, a0, a1
; ret

function %sext_sub_i32(i32, i32) -> i64 {
block0(v0: i32, v1: i32):
v2 = isub.i32 v0, v1
v3 = sextend.i64 v2
return v3
}

; VCode:
; block0:
; subw a0,a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; subw a0, a0, a1
; ret

function %sext_ishl_i32(i32, i32) -> i64 {
block0(v0: i32, v1: i32):
v2 = ishl.i32 v0, v1
v3 = sextend.i64 v2
return v3
}

; VCode:
; block0:
; sllw a0,a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; sllw a0, a0, a1
; ret

function %sext_ushr_i32(i32, i32) -> i64 {
block0(v0: i32, v1: i32):
v2 = ushr.i32 v0, v1
v3 = sextend.i64 v2
return v3
}

; VCode:
; block0:
; srlw a0,a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; srlw a0, a0, a1
; ret

function %sext_sshr_i32(i32, i32) -> i64 {
block0(v0: i32, v1: i32):
v2 = sshr.i32 v0, v1
v3 = sextend.i64 v2
return v3
}

; VCode:
; block0:
; sraw a0,a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; sraw a0, a0, a1
; ret

function %sext_add_const_i32(i32) -> i64 {
block0(v0: i32):
v1 = iconst.i32 -1
v2 = iadd.i32 v0, v1
v3 = sextend.i64 v2
return v3
}

; VCode:
; block0:
; addiw a0,a0,-1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addiw a0, a0, -1
; ret

function %sext_ishl_const_i32(i32) -> i64 {
block0(v0: i32):
v1 = iconst.i32 31
v2 = ishl.i32 v0, v1
v3 = sextend.i64 v2
return v3
}

; VCode:
; block0:
; slliw a0,a0,31
; ret
;
; Disassembled:
; block0: ; offset 0x0
; slliw a0, a0, 0x1f
; ret

function %sext_ushr_const_i32(i32) -> i64 {
block0(v0: i32):
v1 = iconst.i32 31
v2 = ushr.i32 v0, v1
v3 = sextend.i64 v2
return v3
}

; VCode:
; block0:
; srliw a0,a0,31
; ret
;
; Disassembled:
; block0: ; offset 0x0
; srliw a0, a0, 0x1f
; ret

function %sext_sshr_const_i32(i32) -> i64 {
block0(v0: i32):
v1 = iconst.i32 31
v2 = sshr.i32 v0, v1
v3 = sextend.i64 v2
return v3
}

; VCode:
; block0:
; sraiw a0,a0,31
; ret
;
; Disassembled:
; block0: ; offset 0x0
; sraiw a0, a0, 0x1f
; ret


function %sext_sshr_i32_i128(i32, i128) -> i64 {
block0(v0: i32, v1: i128):
v2 = sshr.i32 v0, v1
v3 = sextend.i64 v2
return v3
}

; VCode:
; block0:
; sraw a0,a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; sraw a0, a0, a1
; ret

Loading