Skip to content

Commit 54ed627

Browse files
committed
x86_64: implement write register splitting
1 parent f8f2a3e commit 54ed627

File tree

3 files changed

+90
-27
lines changed

3 files changed

+90
-27
lines changed

src/arch/x86_64/CodeGen.zig

Lines changed: 83 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -95074,7 +95074,7 @@ fn moveStrategy(cg: *CodeGen, ty: Type, class: Register.Class, aligned: bool) !M
9507495074
.mmx => {},
9507595075
.sse => switch (ty.zigTypeTag(zcu)) {
9507695076
else => {
95077-
const classes = std.mem.sliceTo(&abi.classifySystemV(ty, zcu, cg.target.*, .other), .none);
95077+
const classes = std.mem.sliceTo(&abi.classifySystemV(ty, zcu, cg.target, .other), .none);
9507895078
assert(std.mem.indexOfNone(abi.Class, classes, &.{
9507995079
.integer, .sse, .sseup, .memory, .float, .float_combine,
9508095080
}) == null);
@@ -99706,7 +99706,7 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void {
9970699706
const overflow_arg_area: MCValue = .{ .indirect = .{ .reg = ptr_arg_list_reg, .off = 8 } };
9970799707
const reg_save_area: MCValue = .{ .indirect = .{ .reg = ptr_arg_list_reg, .off = 16 } };
9970899708

99709-
const classes = std.mem.sliceTo(&abi.classifySystemV(promote_ty, zcu, self.target.*, .arg), .none);
99709+
const classes = std.mem.sliceTo(&abi.classifySystemV(promote_ty, zcu, self.target, .arg), .none);
9971099710
switch (classes[0]) {
9971199711
.integer => {
9971299712
assert(classes.len == 1);
@@ -100051,7 +100051,7 @@ fn resolveCallingConventionValues(
100051100051
var ret_tracking_i: usize = 0;
100052100052

100053100053
const classes = switch (cc) {
100054-
.x86_64_sysv => std.mem.sliceTo(&abi.classifySystemV(ret_ty, zcu, self.target.*, .ret), .none),
100054+
.x86_64_sysv => std.mem.sliceTo(&abi.classifySystemV(ret_ty, zcu, self.target, .ret), .none),
100055100055
.x86_64_win => &.{abi.classifyWindows(ret_ty, zcu)},
100056100056
else => unreachable,
100057100057
};
@@ -100140,7 +100140,7 @@ fn resolveCallingConventionValues(
100140100140
var arg_mcv_i: usize = 0;
100141100141

100142100142
const classes = switch (cc) {
100143-
.x86_64_sysv => std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target.*, .arg), .none),
100143+
.x86_64_sysv => std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target, .arg), .none),
100144100144
.x86_64_win => &.{abi.classifyWindows(ty, zcu)},
100145100145
else => unreachable,
100146100146
};
@@ -100444,7 +100444,7 @@ fn splitType(self: *CodeGen, comptime parts_len: usize, ty: Type) ![parts_len]Ty
100444100444
error.DivisionByZero => unreachable,
100445100445
error.UnexpectedRemainder => {},
100446100446
};
100447-
const classes = std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target.*, .other), .none);
100447+
const classes = std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target, .other), .none);
100448100448
if (classes.len == parts_len) for (&parts, classes, 0..) |*part, class, part_i| {
100449100449
part.* = switch (class) {
100450100450
.integer => if (part_i < parts_len - 1)
@@ -101443,19 +101443,19 @@ const Temp = struct {
101443101443
.disp = opts.disp,
101444101444
}),
101445101445
),
101446-
.register => |val_reg| try dst.writeRegs(opts.disp, val_ty, &.{registerAlias(
101446+
.register => |val_reg| try dst.writeReg(opts.disp, val_ty, registerAlias(
101447101447
val_reg,
101448101448
@intCast(val_ty.abiSize(cg.pt.zcu)),
101449-
)}, cg),
101449+
), cg),
101450101450
inline .register_pair,
101451101451
.register_triple,
101452101452
.register_quadruple,
101453101453
=> |val_regs| try dst.writeRegs(opts.disp, val_ty, &val_regs, cg),
101454101454
.register_offset => |val_reg_off| switch (val_reg_off.off) {
101455-
0 => try dst.writeRegs(opts.disp, val_ty, &.{registerAlias(
101455+
0 => try dst.writeReg(opts.disp, val_ty, registerAlias(
101456101456
val_reg_off.reg,
101457101457
@intCast(val_ty.abiSize(cg.pt.zcu)),
101458-
)}, cg),
101458+
), cg),
101459101459
else => continue :val_to_gpr,
101460101460
},
101461101461
.register_overflow => |val_reg_ov| {
@@ -101473,7 +101473,7 @@ const Temp = struct {
101473101473
else => std.debug.panic("{s}: {}\n", .{ @src().fn_name, val_ty.fmt(cg.pt) }),
101474101474
});
101475101475
const first_size: u31 = @intCast(first_ty.abiSize(cg.pt.zcu));
101476-
try dst.writeRegs(opts.disp, first_ty, &.{registerAlias(val_reg_ov.reg, first_size)}, cg);
101476+
try dst.writeReg(opts.disp, first_ty, registerAlias(val_reg_ov.reg, first_size), cg);
101477101477
try cg.asmSetccMemory(
101478101478
val_reg_ov.eflags,
101479101479
try dst.tracking(cg).short.mem(cg, .{
@@ -101564,17 +101564,79 @@ const Temp = struct {
101564101564
}));
101565101565
}
101566101566

101567+
fn writeReg(dst: Temp, disp: i32, src_ty: Type, src_reg: Register, cg: *CodeGen) InnerError!void {
101568+
const src_abi_size: u31 = @intCast(src_ty.abiSize(cg.pt.zcu));
101569+
const src_rc = src_reg.class();
101570+
if (src_rc == .x87 or std.math.isPowerOfTwo(src_abi_size)) {
101571+
const strat = try cg.moveStrategy(src_ty, src_rc, false);
101572+
try strat.write(cg, try dst.tracking(cg).short.mem(cg, .{
101573+
.size = .fromBitSize(@min(8 * src_abi_size, src_reg.bitSize())),
101574+
.disp = disp,
101575+
}), registerAlias(src_reg, src_abi_size));
101576+
} else {
101577+
const frame_size = std.math.ceilPowerOfTwoAssert(u32, src_abi_size);
101578+
const frame_index = try cg.allocFrameIndex(.init(.{
101579+
.size = frame_size,
101580+
.alignment = .fromNonzeroByteUnits(frame_size),
101581+
}));
101582+
const strat = try cg.moveStrategy(src_ty, src_rc, true);
101583+
try strat.write(cg, .{
101584+
.base = .{ .frame = frame_index },
101585+
.mod = .{ .rm = .{ .size = .fromSize(frame_size) } },
101586+
}, registerAlias(src_reg, frame_size));
101587+
var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address());
101588+
try dst_ptr.toOffset(disp, cg);
101589+
var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } });
101590+
var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size });
101591+
try dst_ptr.memcpy(&src_ptr, &len, cg);
101592+
try dst_ptr.die(cg);
101593+
try src_ptr.die(cg);
101594+
try len.die(cg);
101595+
}
101596+
}
101597+
101567101598
fn writeRegs(dst: Temp, disp: i32, src_ty: Type, src_regs: []const Register, cg: *CodeGen) InnerError!void {
101599+
const zcu = cg.pt.zcu;
101600+
const classes = std.mem.sliceTo(&abi.classifySystemV(src_ty, zcu, cg.target, .other), .none);
101601+
var next_class_index: u4 = 0;
101568101602
var part_disp = disp;
101569-
var src_abi_size: u32 = @intCast(src_ty.abiSize(cg.pt.zcu));
101603+
var remaining_abi_size = src_ty.abiSize(zcu);
101570101604
for (src_regs) |src_reg| {
101571-
const src_rc = src_reg.class();
101572-
const part_bit_size = @min(8 * src_abi_size, src_reg.bitSize());
101573-
const part_size = @divExact(part_bit_size, 8);
101574-
if (src_rc == .x87 or std.math.isPowerOfTwo(part_size)) {
101575-
const strat = try cg.moveStrategy(src_ty, src_rc, false);
101605+
const class_index = next_class_index;
101606+
const class = classes[class_index];
101607+
next_class_index = @intCast(switch (class) {
101608+
.integer, .memory, .float, .float_combine => class_index + 1,
101609+
.sse => std.mem.indexOfNonePos(abi.Class, classes, class_index + 1, &.{.sseup}) orelse classes.len,
101610+
.x87 => std.mem.indexOfNonePos(abi.Class, classes, class_index + 1, &.{.x87up}) orelse classes.len,
101611+
.sseup, .x87up, .complex_x87, .none, .win_i128, .integer_per_element => unreachable,
101612+
});
101613+
const part_size = switch (class) {
101614+
.integer, .sse, .memory => @min(8 * @as(u7, next_class_index - class_index), remaining_abi_size),
101615+
.x87 => 16,
101616+
.float => 4,
101617+
.float_combine => 8,
101618+
.sseup, .x87up, .complex_x87, .none, .win_i128, .integer_per_element => unreachable,
101619+
};
101620+
const part_ty: Type = switch (class) {
101621+
.integer => .u64,
101622+
.sse => switch (part_size) {
101623+
else => unreachable,
101624+
8 => .f64,
101625+
16 => .vector_2_f64,
101626+
32 => .vector_4_f64,
101627+
},
101628+
.x87 => .f80,
101629+
.float => .f32,
101630+
.float_combine => .vector_2_f32,
101631+
.sseup, .x87up, .complex_x87, .memory, .none, .win_i128, .integer_per_element => unreachable,
101632+
};
101633+
if (class == .x87 or std.math.isPowerOfTwo(part_size)) {
101634+
const strat = try cg.moveStrategy(part_ty, src_reg.class(), false);
101576101635
try strat.write(cg, try dst.tracking(cg).short.mem(cg, .{
101577-
.size = .fromBitSize(part_bit_size),
101636+
.size = switch (class) {
101637+
else => .fromSize(part_size),
101638+
.x87 => .tbyte,
101639+
},
101578101640
.disp = part_disp,
101579101641
}), registerAlias(src_reg, part_size));
101580101642
} else {
@@ -101583,23 +101645,24 @@ const Temp = struct {
101583101645
.size = frame_size,
101584101646
.alignment = .fromNonzeroByteUnits(frame_size),
101585101647
}));
101586-
const strat = try cg.moveStrategy(src_ty, src_rc, true);
101648+
const strat = try cg.moveStrategy(part_ty, src_reg.class(), true);
101587101649
try strat.write(cg, .{
101588101650
.base = .{ .frame = frame_index },
101589101651
.mod = .{ .rm = .{ .size = .fromSize(frame_size) } },
101590101652
}, registerAlias(src_reg, frame_size));
101591101653
var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address());
101592101654
try dst_ptr.toOffset(part_disp, cg);
101593101655
var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } });
101594-
var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size });
101656+
var len = try cg.tempInit(.usize, .{ .immediate = part_size });
101595101657
try dst_ptr.memcpy(&src_ptr, &len, cg);
101596101658
try dst_ptr.die(cg);
101597101659
try src_ptr.die(cg);
101598101660
try len.die(cg);
101599101661
}
101600101662
part_disp += part_size;
101601-
src_abi_size -= part_size;
101663+
remaining_abi_size -= part_size;
101602101664
}
101665+
assert(next_class_index == classes.len);
101603101666
}
101604101667

101605101668
fn memcpy(dst: *Temp, src: *Temp, len: *Temp, cg: *CodeGen) InnerError!void {

src/arch/x86_64/abi.zig

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ pub const Context = enum { ret, arg, field, other };
100100

101101
/// There are a maximum of 8 possible return slots. Returned values are in
102102
/// the beginning of the array; unused slots are filled with .none.
103-
pub fn classifySystemV(ty: Type, zcu: *Zcu, target: std.Target, ctx: Context) [8]Class {
103+
pub fn classifySystemV(ty: Type, zcu: *Zcu, target: *const std.Target, ctx: Context) [8]Class {
104104
const memory_class = [_]Class{
105105
.memory, .none, .none, .none,
106106
.none, .none, .none, .none,
@@ -148,7 +148,7 @@ pub fn classifySystemV(ty: Type, zcu: *Zcu, target: std.Target, ctx: Context) [8
148148
result[0] = .integer;
149149
return result;
150150
},
151-
.float => switch (ty.floatBits(target)) {
151+
.float => switch (ty.floatBits(target.*)) {
152152
16 => {
153153
if (ctx == .field) {
154154
result[0] = .memory;
@@ -330,7 +330,7 @@ fn classifySystemVStruct(
330330
starting_byte_offset: u64,
331331
loaded_struct: InternPool.LoadedStructType,
332332
zcu: *Zcu,
333-
target: std.Target,
333+
target: *const std.Target,
334334
) u64 {
335335
const ip = &zcu.intern_pool;
336336
var byte_offset = starting_byte_offset;
@@ -379,7 +379,7 @@ fn classifySystemVUnion(
379379
starting_byte_offset: u64,
380380
loaded_union: InternPool.LoadedUnionType,
381381
zcu: *Zcu,
382-
target: std.Target,
382+
target: *const std.Target,
383383
) u64 {
384384
const ip = &zcu.intern_pool;
385385
for (0..loaded_union.field_types.len) |field_index| {

src/codegen/llvm.zig

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12071,7 +12071,7 @@ fn firstParamSRet(fn_info: InternPool.Key.FuncType, zcu: *Zcu, target: std.Targe
1207112071
}
1207212072

1207312073
fn firstParamSRetSystemV(ty: Type, zcu: *Zcu, target: std.Target) bool {
12074-
const class = x86_64_abi.classifySystemV(ty, zcu, target, .ret);
12074+
const class = x86_64_abi.classifySystemV(ty, zcu, &target, .ret);
1207512075
if (class[0] == .memory) return true;
1207612076
if (class[0] == .x87 and class[2] != .none) return true;
1207712077
return false;
@@ -12181,7 +12181,7 @@ fn lowerSystemVFnRetTy(o: *Object, fn_info: InternPool.Key.FuncType) Allocator.E
1218112181
return o.lowerType(return_type);
1218212182
}
1218312183
const target = zcu.getTarget();
12184-
const classes = x86_64_abi.classifySystemV(return_type, zcu, target, .ret);
12184+
const classes = x86_64_abi.classifySystemV(return_type, zcu, &target, .ret);
1218512185
if (classes[0] == .memory) return .void;
1218612186
var types_index: u32 = 0;
1218712187
var types_buffer: [8]Builder.Type = undefined;
@@ -12459,7 +12459,7 @@ const ParamTypeIterator = struct {
1245912459
const zcu = it.object.pt.zcu;
1246012460
const ip = &zcu.intern_pool;
1246112461
const target = zcu.getTarget();
12462-
const classes = x86_64_abi.classifySystemV(ty, zcu, target, .arg);
12462+
const classes = x86_64_abi.classifySystemV(ty, zcu, &target, .arg);
1246312463
if (classes[0] == .memory) {
1246412464
it.zig_index += 1;
1246512465
it.llvm_index += 1;

0 commit comments

Comments
 (0)