@@ -12913,10 +12913,112 @@ fn airShlShrBinOp(self: *CodeGen, inst: Air.Inst.Index) !void {
12913
12913
}
12914
12914
12915
12915
fn airShlSat(self: *CodeGen, inst: Air.Inst.Index) !void {
12916
+ const zcu = self.pt.zcu;
12916
12917
const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
12917
- _ = bin_op;
12918
- return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch});
12919
- //return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
12918
+ const lhs_ty = self.typeOf(bin_op.lhs);
12919
+ const rhs_ty = self.typeOf(bin_op.rhs);
12920
+
12921
+ const result: MCValue = result: {
12922
+ switch (lhs_ty.zigTypeTag(zcu)) {
12923
+ .int => {
12924
+ try self.spillRegisters(&.{.rcx});
12925
+ try self.register_manager.getKnownReg(.rcx, null);
12926
+ const lhs_mcv = try self.resolveInst(bin_op.lhs);
12927
+ const rhs_mcv = try self.resolveInst(bin_op.rhs);
12928
+
12929
+ const lhs_lock = switch (lhs_mcv) {
12930
+ .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
12931
+ else => null,
12932
+ };
12933
+ defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
12934
+
12935
+ // shift left
12936
+ const dst_mcv = try self.genShiftBinOp(.shl, null, lhs_mcv, rhs_mcv, lhs_ty, rhs_ty);
12937
+ switch (dst_mcv) {
12938
+ .register => |dst_reg| try self.truncateRegister(lhs_ty, dst_reg),
12939
+ .register_pair => |dst_regs| try self.truncateRegister(lhs_ty, dst_regs[1]),
12940
+ .load_frame => |frame_addr| {
12941
+ const tmp_reg =
12942
+ try self.register_manager.allocReg(null, abi.RegisterClass.gp);
12943
+ const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
12944
+ defer self.register_manager.unlockReg(tmp_lock);
12945
+
12946
+ const lhs_bits: u31 = @intCast(lhs_ty.bitSize(zcu));
12947
+ const tmp_ty: Type = if (lhs_bits > 64) .usize else lhs_ty;
12948
+ const off = frame_addr.off + (lhs_bits - 1) / 64 * 8;
12949
+ try self.genSetReg(
12950
+ tmp_reg,
12951
+ tmp_ty,
12952
+ .{ .load_frame = .{ .index = frame_addr.index, .off = off } },
12953
+ .{},
12954
+ );
12955
+ try self.truncateRegister(lhs_ty, tmp_reg);
12956
+ try self.genSetMem(
12957
+ .{ .frame = frame_addr.index },
12958
+ off,
12959
+ tmp_ty,
12960
+ .{ .register = tmp_reg },
12961
+ .{},
12962
+ );
12963
+ },
12964
+ else => {},
12965
+ }
12966
+ const dst_lock = switch (dst_mcv) {
12967
+ .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
12968
+ else => null,
12969
+ };
12970
+ defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
12971
+
12972
+ // shift right
12973
+ const tmp_mcv = try self.genShiftBinOp(.shr, null, dst_mcv, rhs_mcv, lhs_ty, rhs_ty);
12974
+ const tmp_lock = switch (tmp_mcv) {
12975
+ .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
12976
+ else => null,
12977
+ };
12978
+ defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock);
12979
+
12980
+ // check if overflow happens
12981
+ try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, tmp_mcv, lhs_mcv);
12982
+ const overflow_reloc = try self.genCondBrMir(lhs_ty, .{ .eflags = .ne });
12983
+
12984
+ // if overflow,
12985
+ // for unsigned integers, the saturating result is just its max
12986
+ // for signed integers,
12987
+ // if lhs is positive, the result is its max
12988
+ // if lhs is negative, it is min
12989
+ switch (lhs_ty.intInfo(zcu).signedness) {
12990
+ .unsigned => {
12991
+ const bound_mcv = try self.genTypedValue(try lhs_ty.maxIntScalar(self.pt, lhs_ty));
12992
+ try self.genCopy(lhs_ty, dst_mcv, bound_mcv, .{});
12993
+ },
12994
+ .signed => {
12995
+ // check the sign of lhs
12996
+ const sign_shift = lhs_ty.intInfo(zcu).bits - 1;
12997
+ const sign_mcv = try self.genShiftBinOp(.shr, null, lhs_mcv, try self.genTypedValue(try self.pt.intValue_u64(lhs_ty, sign_shift)), lhs_ty, lhs_ty);
12998
+ const sign_reloc_condbr = try self.genCondBrMir(lhs_ty, sign_mcv);
12999
+
13000
+ // if it is negative
13001
+ const min_mcv = try self.genTypedValue(try lhs_ty.minIntScalar(self.pt, lhs_ty));
13002
+ try self.genCopy(lhs_ty, dst_mcv, min_mcv, .{});
13003
+ const sign_reloc_br = try self.asmJmpReloc(undefined);
13004
+ self.performReloc(sign_reloc_condbr);
13005
+
13006
+ // if it is positive
13007
+ const max_mcv = try self.genTypedValue(try lhs_ty.maxIntScalar(self.pt, lhs_ty));
13008
+ try self.genCopy(lhs_ty, dst_mcv, max_mcv, .{});
13009
+ self.performReloc(sign_reloc_br);
13010
+ },
13011
+ }
13012
+
13013
+ self.performReloc(overflow_reloc);
13014
+ break :result dst_mcv;
13015
+ },
13016
+ else => {
13017
+ return self.fail("TODO implement shl_sat for {} op type {}", .{ self.target.cpu.arch, lhs_ty.zigTypeTag(zcu) });
13018
+ },
13019
+ }
13020
+ };
13021
+ return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
12920
13022
}
12921
13023
12922
13024
fn airOptionalPayload(self: *CodeGen, inst: Air.Inst.Index) !void {
0 commit comments