Skip to content

Commit 4051039

Browse files
committed
x86_64: implement integer saturating left shifting codegen
Simliarly to shl_with_overflow, we first SHL/SAL the integer, then SHR/SAR it back to compare if overflow happens. If overflow happened, set result to the upper limit to make it saturating. Bug: ziglang#17645 Signed-off-by: Bingwu Zhang <[email protected]>
1 parent 6c3cbb0 commit 4051039

File tree

2 files changed

+134
-6
lines changed

2 files changed

+134
-6
lines changed

src/arch/x86_64/CodeGen.zig

+134-5
Original file line numberDiff line numberDiff line change
@@ -85049,10 +85049,129 @@ fn airShlShrBinOp(self: *CodeGen, inst: Air.Inst.Index) !void {
8504985049
}
8505085050

8505185051
fn airShlSat(self: *CodeGen, inst: Air.Inst.Index) !void {
85052+
const zcu = self.pt.zcu;
8505285053
const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
85053-
_ = bin_op;
85054-
return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch});
85055-
//return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
85054+
const lhs_ty = self.typeOf(bin_op.lhs);
85055+
const rhs_ty = self.typeOf(bin_op.rhs);
85056+
85057+
const result: MCValue = result: {
85058+
switch (lhs_ty.zigTypeTag(zcu)) {
85059+
.int => {
85060+
const lhs_bits = lhs_ty.bitSize(zcu);
85061+
const rhs_bits = rhs_ty.bitSize(zcu);
85062+
if (!(lhs_bits <= 32 and rhs_bits <= 5) and !(lhs_bits > 32 and lhs_bits <= 64 and rhs_bits <= 6) and !(rhs_bits <= std.math.log2(lhs_bits))) {
85063+
return self.fail("TODO implement shl_sat for {} with lhs bits {}, rhs bits {}", .{ self.target.cpu.arch, lhs_bits, rhs_bits });
85064+
}
85065+
85066+
try self.spillRegisters(&.{.rcx});
85067+
const lhs_mcv = try self.resolveInst(bin_op.lhs);
85068+
var lhs_temp = try self.tempInit(lhs_ty, lhs_mcv);
85069+
const rhs_mcv = try self.resolveInst(bin_op.rhs);
85070+
85071+
const lhs_lock = switch (lhs_mcv) {
85072+
.register => |reg| self.register_manager.lockRegAssumeUnused(reg),
85073+
else => null,
85074+
};
85075+
defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
85076+
85077+
// shift left
85078+
const dst_mcv = try self.genShiftBinOp(.shl, null, lhs_mcv, rhs_mcv, lhs_ty, rhs_ty);
85079+
switch (dst_mcv) {
85080+
.register => |dst_reg| try self.truncateRegister(lhs_ty, dst_reg),
85081+
.register_pair => |dst_regs| try self.truncateRegister(lhs_ty, dst_regs[1]),
85082+
.load_frame => |frame_addr| {
85083+
const tmp_reg =
85084+
try self.register_manager.allocReg(null, abi.RegisterClass.gp);
85085+
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
85086+
defer self.register_manager.unlockReg(tmp_lock);
85087+
85088+
const lhs_bits_u31: u31 = @intCast(lhs_bits);
85089+
const tmp_ty: Type = if (lhs_bits_u31 > 64) .usize else lhs_ty;
85090+
const off = frame_addr.off + (lhs_bits_u31 - 1) / 64 * 8;
85091+
try self.genSetReg(
85092+
tmp_reg,
85093+
tmp_ty,
85094+
.{ .load_frame = .{ .index = frame_addr.index, .off = off } },
85095+
.{},
85096+
);
85097+
try self.truncateRegister(lhs_ty, tmp_reg);
85098+
try self.genSetMem(
85099+
.{ .frame = frame_addr.index },
85100+
off,
85101+
tmp_ty,
85102+
.{ .register = tmp_reg },
85103+
.{},
85104+
);
85105+
},
85106+
else => {},
85107+
}
85108+
const dst_lock = switch (dst_mcv) {
85109+
.register => |reg| self.register_manager.lockRegAssumeUnused(reg),
85110+
else => null,
85111+
};
85112+
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
85113+
85114+
// shift right
85115+
const tmp_mcv = try self.genShiftBinOp(.shr, null, dst_mcv, rhs_mcv, lhs_ty, rhs_ty);
85116+
var tmp_temp = try self.tempInit(lhs_ty, tmp_mcv);
85117+
85118+
// check if overflow happens
85119+
const cc_temp = lhs_temp.cmpInts(.neq, &tmp_temp, self) catch |err| switch (err) {
85120+
error.SelectFailed => unreachable,
85121+
else => |e| return e,
85122+
};
85123+
try tmp_temp.die(self);
85124+
const overflow_reloc = try self.genCondBrMir(lhs_ty, cc_temp.tracking(self).short);
85125+
try cc_temp.die(self);
85126+
85127+
// if overflow,
85128+
// for unsigned integers, the saturating result is just its max
85129+
// for signed integers,
85130+
// if lhs is positive, the result is its max
85131+
// if lhs is negative, it is min
85132+
switch (lhs_ty.intInfo(zcu).signedness) {
85133+
.unsigned => {
85134+
try lhs_temp.die(self);
85135+
const bound_mcv = try self.genTypedValue(try lhs_ty.maxIntScalar(self.pt, lhs_ty));
85136+
try self.genCopy(lhs_ty, dst_mcv, bound_mcv, .{});
85137+
},
85138+
.signed => {
85139+
// check the sign of lhs
85140+
// TODO: optimize this.
85141+
// we only need the highest bit so shifting the highest part of lhs_mcv
85142+
// is enough to check the signedness. other parts can be skipped here.
85143+
var zero_temp = try self.tempInit(lhs_ty, try self.genTypedValue(try self.pt.intValue(lhs_ty, 0)));
85144+
const sign_cc_temp = lhs_temp.cmpInts(.lt, &zero_temp, self) catch |err| switch (err) {
85145+
error.SelectFailed => unreachable,
85146+
else => |e| return e,
85147+
};
85148+
try zero_temp.die(self);
85149+
try lhs_temp.die(self);
85150+
const sign_reloc_condbr = try self.genCondBrMir(lhs_ty, sign_cc_temp.tracking(self).short);
85151+
try sign_cc_temp.die(self);
85152+
85153+
// if it is negative
85154+
const min_mcv = try self.genTypedValue(try lhs_ty.minIntScalar(self.pt, lhs_ty));
85155+
try self.genCopy(lhs_ty, dst_mcv, min_mcv, .{});
85156+
const sign_reloc_br = try self.asmJmpReloc(undefined);
85157+
self.performReloc(sign_reloc_condbr);
85158+
85159+
// if it is positive
85160+
const max_mcv = try self.genTypedValue(try lhs_ty.maxIntScalar(self.pt, lhs_ty));
85161+
try self.genCopy(lhs_ty, dst_mcv, max_mcv, .{});
85162+
self.performReloc(sign_reloc_br);
85163+
},
85164+
}
85165+
85166+
self.performReloc(overflow_reloc);
85167+
break :result dst_mcv;
85168+
},
85169+
else => {
85170+
return self.fail("TODO implement shl_sat for {} op type {}", .{ self.target.cpu.arch, lhs_ty.zigTypeTag(zcu) });
85171+
},
85172+
}
85173+
};
85174+
return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
8505685175
}
8505785176

8505885177
fn airOptionalPayload(self: *CodeGen, inst: Air.Inst.Index) !void {
@@ -88437,7 +88556,7 @@ fn genShiftBinOpMir(
8843788556
) !void {
8843888557
const pt = self.pt;
8843988558
const zcu = pt.zcu;
88440-
const abi_size: u32 = @intCast(lhs_ty.abiSize(zcu));
88559+
const abi_size: u31 = @intCast(lhs_ty.abiSize(zcu));
8844188560
const shift_abi_size: u32 = @intCast(rhs_ty.abiSize(zcu));
8844288561
try self.spillEflagsIfOccupied();
8844388562

@@ -88621,7 +88740,17 @@ fn genShiftBinOpMir(
8862188740
.immediate => {},
8862288741
else => self.performReloc(skip),
8862388742
}
88624-
}
88743+
} else try self.asmRegisterMemory(.{ ._, .mov }, temp_regs[2].to64(), .{
88744+
.base = .{ .frame = lhs_mcv.load_frame.index },
88745+
.mod = .{ .rm = .{
88746+
.size = .qword,
88747+
.disp = switch (tag[0]) {
88748+
._l => lhs_mcv.load_frame.off,
88749+
._r => lhs_mcv.load_frame.off + abi_size - 8,
88750+
else => unreachable,
88751+
},
88752+
} },
88753+
});
8862588754
switch (rhs_mcv) {
8862688755
.immediate => |shift_imm| try self.asmRegisterImmediate(
8862788756
tag,

test/behavior/bit_shifting.zig

-1
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,6 @@ test "comptime shift safety check" {
111111
}
112112

113113
test "Saturating Shift Left where lhs is of a computed type" {
114-
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
115114
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
116115
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
117116
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO

0 commit comments

Comments
 (0)