Skip to content

Commit 63831d8

Browse files
spirv: saturating arithmetic implementation (only add/sub)
1 parent ee6d194 commit 63831d8

File tree

1 file changed

+63
-28
lines changed

1 file changed

+63
-28
lines changed

src/codegen/spirv.zig

Lines changed: 63 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3249,10 +3249,12 @@ const NavGen = struct {
32493249
.rem, .rem_optimized => try self.airArithOp(inst, .f_rem, .s_rem, .u_mod),
32503250
.mod, .mod_optimized => try self.airArithOp(inst, .f_mod, .s_mod, .u_mod),
32513251

3252-
.add_with_overflow => try self.airAddSubOverflow(inst, .i_add, .u_lt, .s_lt),
3253-
.sub_with_overflow => try self.airAddSubOverflow(inst, .i_sub, .u_gt, .s_gt),
3252+
.add_with_overflow => try self.airAddSubWithOverflow(inst, .i_add),
3253+
.sub_with_overflow => try self.airAddSubWithOverflow(inst, .i_sub),
32543254
.mul_with_overflow => try self.airMulOverflow(inst),
32553255
.shl_with_overflow => try self.airShlOverflow(inst),
3256+
.add_sat => try self.airAddSubSaturating(inst, .i_add),
3257+
.sub_sat => try self.airAddSubSaturating(inst, .i_sub),
32563258

32573259
.mul_add => try self.airMulAdd(inst),
32583260

@@ -3654,42 +3656,32 @@ const NavGen = struct {
36543656
}
36553657
}
36563658

3657-
fn airAddSubOverflow(
3659+
fn buildAddSub(
36583660
self: *NavGen,
3659-
inst: Air.Inst.Index,
3660-
comptime add: BinaryOp,
3661-
comptime ucmp: CmpPredicate,
3662-
comptime scmp: CmpPredicate,
3661+
lhs: Temporary,
3662+
rhs: Temporary,
3663+
result_ty: Type,
3664+
comptime op: BinaryOp,
3665+
comptime mode: enum { WithOverflow, Saturating },
36633666
) !?IdRef {
3664-
_ = scmp;
3665-
// Note: OpIAddCarry and OpISubBorrow are not really useful here: For unsigned numbers,
3666-
// there is in both cases only one extra operation required. For signed operations,
3667-
// the overflow bit is set then going from 0x80.. to 0x00.., but this doesn't actually
3668-
// normally set a carry bit. So the SPIR-V overflow operations are not particularly
3669-
// useful here.
3670-
3671-
const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
3672-
const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
3673-
3674-
const lhs = try self.temporary(extra.lhs);
3675-
const rhs = try self.temporary(extra.rhs);
3676-
3677-
const result_ty = self.typeOfIndex(inst);
3678-
36793667
const info = self.arithmeticTypeInfo(lhs.ty);
36803668
switch (info.class) {
36813669
.composite_integer => unreachable, // TODO
36823670
.strange_integer, .integer => {},
36833671
.float, .bool => unreachable,
36843672
}
36853673

3686-
const sum = try self.buildBinary(add, lhs, rhs);
3674+
const sum = try self.buildBinary(op, lhs, rhs);
36873675
const result = try self.normalize(sum, info);
36883676

36893677
const overflowed = switch (info.signedness) {
36903678
// Overflow happened if the result is smaller than either of the operands. It doesn't matter which.
36913679
// For subtraction the conditions need to be swapped.
3692-
.unsigned => try self.buildCmp(ucmp, result, lhs),
3680+
.unsigned => switch (op) {
3681+
.i_add => try self.buildCmp(.u_lt, result, lhs),
3682+
.i_sub => try self.buildCmp(.u_gt, result, lhs),
3683+
else => unreachable,
3684+
},
36933685
// For signed operations, we check the signs of the operands and the result.
36943686
.signed => blk: {
36953687
// Signed overflow detection using the sign bits of the operands and the result.
@@ -3708,7 +3700,7 @@ const NavGen = struct {
37083700
const signs_match = try self.buildCmp(.l_eq, lhs_is_neg, rhs_is_neg);
37093701
const result_sign_differs = try self.buildCmp(.l_ne, lhs_is_neg, result_is_neg);
37103702

3711-
const overflow_condition = if (add == .i_add)
3703+
const overflow_condition = if (op == .i_add)
37123704
signs_match
37133705
else // .i_sub
37143706
try self.buildUnary(.l_not, signs_match);
@@ -3717,10 +3709,53 @@ const NavGen = struct {
37173709
},
37183710
};
37193711

3720-
const ov = try self.intFromBool(overflowed);
3712+
switch (mode) {
3713+
.WithOverflow => {
3714+
const ov = try self.intFromBool(overflowed);
3715+
const struct_ty_id = try self.resolveType(result_ty, .direct);
3716+
return try self.constructComposite(struct_ty_id, &.{ try result.materialize(self), try ov.materialize(self) });
3717+
},
3718+
.Saturating => {
3719+
const sat_val_tmp = blk: {
3720+
const scalar_ty = result_ty.scalarType(self.pt.zcu);
3721+
if (info.signedness == .signed and op == .i_sub) {
3722+
const min_val: i64 = if (info.bits == 0) 0 else -(@as(i64, 1) << @as(u6, @intCast(info.bits - 1)));
3723+
const min_id = try self.constInt(scalar_ty, min_val);
3724+
break :blk Temporary.init(scalar_ty, min_id);
3725+
} else {
3726+
const max_val: u64 = if (info.bits == 0) 0 else switch (info.signedness) {
3727+
.unsigned => if (info.bits == 64) std.math.maxInt(u64) else (@as(u64, 1) << @as(u6, @intCast(info.bits))) - 1,
3728+
.signed => (@as(u64, 1) << @as(u6, @intCast(info.bits - 1))) - 1,
3729+
};
3730+
const max_id = try self.constInt(scalar_ty, max_val);
3731+
break :blk Temporary.init(scalar_ty, max_id);
3732+
}
3733+
};
3734+
const final_result = try self.buildSelect(overflowed, sat_val_tmp, result);
3735+
return try final_result.materialize(self);
3736+
},
3737+
}
3738+
}
37213739

3722-
const result_ty_id = try self.resolveType(result_ty, .direct);
3723-
return try self.constructComposite(result_ty_id, &.{ try result.materialize(self), try ov.materialize(self) });
3740+
fn airAddSubWithOverflow(self: *NavGen, inst: Air.Inst.Index, comptime op: BinaryOp) !?IdRef {
3741+
const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
3742+
const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
3743+
3744+
const lhs = try self.temporary(extra.lhs);
3745+
const rhs = try self.temporary(extra.rhs);
3746+
3747+
const result_ty = self.typeOfIndex(inst);
3748+
return self.buildAddSub(lhs, rhs, result_ty, op, .WithOverflow);
3749+
}
3750+
3751+
fn airAddSubSaturating(self: *NavGen, inst: Air.Inst.Index, comptime op: BinaryOp) !?IdRef {
3752+
const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
3753+
3754+
const lhs = try self.temporary(bin_op.lhs);
3755+
const rhs = try self.temporary(bin_op.rhs);
3756+
3757+
const result_ty = self.typeOfIndex(inst);
3758+
return self.buildAddSub(lhs, rhs, result_ty, op, .Saturating);
37243759
}
37253760

37263761
fn airMulOverflow(self: *NavGen, inst: Air.Inst.Index) !?IdRef {

0 commit comments

Comments
 (0)