@@ -85049,10 +85049,117 @@ fn airShlShrBinOp(self: *CodeGen, inst: Air.Inst.Index) !void {
85049
85049
}
85050
85050
85051
85051
fn airShlSat(self: *CodeGen, inst: Air.Inst.Index) !void {
85052
+ const zcu = self.pt.zcu;
85052
85053
const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
85053
- _ = bin_op;
85054
- return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch});
85055
- //return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
85054
+ const lhs_ty = self.typeOf(bin_op.lhs);
85055
+ const rhs_ty = self.typeOf(bin_op.rhs);
85056
+
85057
+ const result: MCValue = result: {
85058
+ switch (lhs_ty.zigTypeTag(zcu)) {
85059
+ .int => {
85060
+ try self.spillRegisters(&.{.rcx});
85061
+ try self.register_manager.getKnownReg(.rcx, null);
85062
+ const lhs_mcv = try self.resolveInst(bin_op.lhs);
85063
+ var lhs_temp = try self.tempInit(lhs_ty, lhs_mcv);
85064
+ const rhs_mcv = try self.resolveInst(bin_op.rhs);
85065
+
85066
+ const lhs_lock = switch (lhs_mcv) {
85067
+ .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
85068
+ else => null,
85069
+ };
85070
+ defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
85071
+
85072
+ // shift left
85073
+ const dst_mcv = try self.genShiftBinOp(.shl, null, lhs_mcv, rhs_mcv, lhs_ty, rhs_ty);
85074
+ switch (dst_mcv) {
85075
+ .register => |dst_reg| try self.truncateRegister(lhs_ty, dst_reg),
85076
+ .register_pair => |dst_regs| try self.truncateRegister(lhs_ty, dst_regs[1]),
85077
+ .load_frame => |frame_addr| {
85078
+ const tmp_reg =
85079
+ try self.register_manager.allocReg(null, abi.RegisterClass.gp);
85080
+ const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
85081
+ defer self.register_manager.unlockReg(tmp_lock);
85082
+
85083
+ const lhs_bits: u31 = @intCast(lhs_ty.bitSize(zcu));
85084
+ const tmp_ty: Type = if (lhs_bits > 64) .usize else lhs_ty;
85085
+ const off = frame_addr.off + (lhs_bits - 1) / 64 * 8;
85086
+ try self.genSetReg(
85087
+ tmp_reg,
85088
+ tmp_ty,
85089
+ .{ .load_frame = .{ .index = frame_addr.index, .off = off } },
85090
+ .{},
85091
+ );
85092
+ try self.truncateRegister(lhs_ty, tmp_reg);
85093
+ try self.genSetMem(
85094
+ .{ .frame = frame_addr.index },
85095
+ off,
85096
+ tmp_ty,
85097
+ .{ .register = tmp_reg },
85098
+ .{},
85099
+ );
85100
+ },
85101
+ else => {},
85102
+ }
85103
+ const dst_lock = switch (dst_mcv) {
85104
+ .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
85105
+ else => null,
85106
+ };
85107
+ defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
85108
+
85109
+ // shift right
85110
+ const tmp_mcv = try self.genShiftBinOp(.shr, null, dst_mcv, rhs_mcv, lhs_ty, rhs_ty);
85111
+ var tmp_temp = try self.tempInit(lhs_ty, tmp_mcv);
85112
+
85113
+ // check if overflow happens
85114
+ const cc_temp = lhs_temp.cmpInts(.neq, &tmp_temp, self) catch |err| switch (err) {
85115
+ error.SelectFailed => unreachable,
85116
+ else => |e| return e,
85117
+ };
85118
+ try lhs_temp.die(self);
85119
+ try tmp_temp.die(self);
85120
+ const overflow_reloc = try self.genCondBrMir(lhs_ty, cc_temp.tracking(self).short);
85121
+ try cc_temp.die(self);
85122
+
85123
+ // if overflow,
85124
+ // for unsigned integers, the saturating result is just its max
85125
+ // for signed integers,
85126
+ // if lhs is positive, the result is its max
85127
+ // if lhs is negative, it is min
85128
+ switch (lhs_ty.intInfo(zcu).signedness) {
85129
+ .unsigned => {
85130
+ const bound_mcv = try self.genTypedValue(try lhs_ty.maxIntScalar(self.pt, lhs_ty));
85131
+ try self.genCopy(lhs_ty, dst_mcv, bound_mcv, .{});
85132
+ },
85133
+ .signed => {
85134
+ // check the sign of lhs
85135
+ const sign_shift = lhs_ty.intInfo(zcu).bits - 1;
85136
+ // TODO: optimize this shr. we only need the highest bit so shifting the highest part of lhs_mcv
85137
+ // is enough. other parts can be skipped here.
85138
+ const sign_mcv = try self.genShiftBinOp(.shr, null, lhs_mcv, try self.genTypedValue(try self.pt.intValue(Type.u8, @as(u8, @intCast(sign_shift)))), lhs_ty, Type.u8);
85139
+ const sign_reloc_condbr = try self.genCondBrMir(lhs_ty, sign_mcv);
85140
+
85141
+ // if it is negative
85142
+ const min_mcv = try self.genTypedValue(try lhs_ty.minIntScalar(self.pt, lhs_ty));
85143
+ try self.genCopy(lhs_ty, dst_mcv, min_mcv, .{});
85144
+ const sign_reloc_br = try self.asmJmpReloc(undefined);
85145
+ self.performReloc(sign_reloc_condbr);
85146
+
85147
+ // if it is positive
85148
+ const max_mcv = try self.genTypedValue(try lhs_ty.maxIntScalar(self.pt, lhs_ty));
85149
+ try self.genCopy(lhs_ty, dst_mcv, max_mcv, .{});
85150
+ self.performReloc(sign_reloc_br);
85151
+ },
85152
+ }
85153
+
85154
+ self.performReloc(overflow_reloc);
85155
+ break :result dst_mcv;
85156
+ },
85157
+ else => {
85158
+ return self.fail("TODO implement shl_sat for {} op type {}", .{ self.target.cpu.arch, lhs_ty.zigTypeTag(zcu) });
85159
+ },
85160
+ }
85161
+ };
85162
+ return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
85056
85163
}
85057
85164
85058
85165
fn airOptionalPayload(self: *CodeGen, inst: Air.Inst.Index) !void {
0 commit comments