Skip to content

Commit 3ad2529

Browse files
committed
WIP: implement sret_union ABI for pointer-ful types
This is a combination the existing: - `sret` ABI (which can stack-allocate a _single_ pointerful type) - `union` ABI (which can stack-allocate many _pointer-free_ types) This provides some nice speed-ups for temporary "wrappers": ```julia const v = Any[] @noinline maybe_wrapped(i) = (i % 32 != 0) ? Some(v) : nothing function foo() count = 0 for i = 1:1_000_000 count += (maybe_wrapped(i) !== nothing) ? 1 : 0 end return count end ``` On this PR this gives: ```julia julia> @Btime foo() 1.675 ms (0 allocations: 0 bytes) 968750 ``` compared to current master: ```julia julia> @Btime foo() 6.877 ms (968750 allocations: 14.78 MiB) 968750 ``` The most outstanding TODO here is what to do about PHI nodes. Right now, if the incoming `Union{...}` type has a pointer-containing type then the object is forced to be boxed, even if the object at run-time is actually pointer-free. But that's just a band-aid - it means we introduce new boxes where we didn't have them before, which is a regression that almost certainly to be fixed before landing this.
1 parent 4b4468a commit 3ad2529

File tree

3 files changed

+209
-81
lines changed

3 files changed

+209
-81
lines changed

src/cgutils.cpp

Lines changed: 95 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -948,7 +948,7 @@ static bool for_each_uniontype_small(
948948
allunbox &= for_each_uniontype_small(f, ((jl_uniontype_t*)ty)->b, counter);
949949
return allunbox;
950950
}
951-
else if (jl_is_pointerfree(ty)) {
951+
else if (!deserves_unionbox(ty)) {
952952
f(++counter, (jl_datatype_t*)ty);
953953
return true;
954954
}
@@ -1105,6 +1105,22 @@ static void emit_memcpy(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst
11051105
emit_memcpy_llvm(ctx, dst, dst_ai, data_pointer(ctx, src), src_ai, sz, align_dst, align_src, is_volatile);
11061106
}
11071107

1108+
static void emit_sret_roots(jl_codectx_t &ctx, bool isptr, Value *Src, Type *T, Value *Shadow, Type *ShadowT, unsigned count)
1109+
{
1110+
assert(T != NULL);
1111+
unsigned emitted = TrackWithShadow(Src, T, isptr, Shadow, ShadowT, ctx.builder); //This comes from Late-GC-Lowering??
1112+
if (emitted < count) {
1113+
Value *ToZero = ctx.builder.CreateConstInBoundsGEP1_32(
1114+
ctx.types().T_prjlvalue, Shadow, emitted);
1115+
ctx.builder.CreateMemSet(ToZero,
1116+
/* Val */ ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0),
1117+
/* Size */ ConstantInt::get(ctx.types().T_size, (count - emitted) * sizeof(jl_value_t *)),
1118+
/* Align */ Align(sizeof(jl_value_t *)));
1119+
emitted += count - emitted;
1120+
}
1121+
assert(emitted <= count); (void)emitted; (void)count;
1122+
}
1123+
11081124
static Value *emit_tagfrom(jl_codectx_t &ctx, jl_datatype_t *dt)
11091125
{
11101126
if (dt->smalltag)
@@ -3317,44 +3333,55 @@ static Value *compute_tindex_unboxed(jl_codectx_t &ctx, const jl_cgval_t &val, j
33173333

33183334

33193335
static void union_alloca_type(jl_uniontype_t *ut,
3320-
bool &allunbox, size_t &nbytes, size_t &align, size_t &min_align)
3336+
bool &allunbox, size_t &nbytes, size_t &align, size_t &min_align, size_t &return_roots)
33213337
{
33223338
nbytes = 0;
33233339
align = 0;
33243340
min_align = MAX_ALIGN;
3341+
return_roots = 0;
33253342
// compute the size of the union alloca that could hold this type
33263343
unsigned counter = 0;
33273344
allunbox = for_each_uniontype_small(
33283345
[&](unsigned idx, jl_datatype_t *jt) {
33293346
if (!jl_is_datatype_singleton(jt)) {
33303347
size_t nb1 = jl_datatype_size(jt);
33313348
size_t align1 = jl_datatype_align(jt);
3349+
size_t n_roots = jl_datatype_layout(jt)->npointers;
33323350
if (nb1 > nbytes)
33333351
nbytes = nb1;
33343352
if (align1 > align)
33353353
align = align1;
33363354
if (align1 < min_align)
33373355
min_align = align1;
3356+
if (n_roots > return_roots)
3357+
return_roots = n_roots;
33383358
}
33393359
},
33403360
(jl_value_t*)ut,
33413361
counter);
33423362
}
33433363

3344-
static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut, bool &allunbox, size_t &min_align, size_t &nbytes)
3364+
static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut, bool &allunbox, size_t &min_align, size_t &nbytes, /* TODO: remove */bool require_pointerfree, const char *name)
33453365
{
3346-
size_t align;
3347-
union_alloca_type(ut, allunbox, nbytes, align, min_align);
3348-
if (nbytes > 0) {
3366+
size_t align, return_roots;
3367+
union_alloca_type(ut, allunbox, nbytes, align, min_align, return_roots);
3368+
if (nbytes > 0 && !(require_pointerfree && return_roots != 0)) {
33493369
// at least some of the values can live on the stack
33503370
// try to pick an Integer type size such that SROA will emit reasonable code
33513371
Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * min_align), (nbytes + min_align - 1) / min_align);
33523372
AllocaInst *lv = emit_static_alloca(ctx, AT);
3353-
setName(ctx.emission_context, lv, "unionalloca");
3373+
setName(ctx.emission_context, lv, name);
33543374
if (align > 1)
33553375
lv->setAlignment(Align(align));
3376+
if (return_roots > 0) {
3377+
StoreInst *SI = new StoreInst(Constant::getNullValue(AT), lv, false, Align(sizeof(void*)));
3378+
SI->insertAfter(ctx.topalloca);
3379+
}
33563380
return lv;
33573381
}
3382+
allunbox = false;
3383+
nbytes = 0;
3384+
min_align = 0;
33583385
return NULL;
33593386
}
33603387

@@ -3566,18 +3593,50 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab
35663593
}
35673594

35683595
// copy src to dest, if src is justbits. if skip is true, the value of dest is undefined
3569-
static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, const jl_cgval_t &src, Value *skip, bool isVolatile=false)
3596+
static void emit_unionmove(jl_codectx_t &ctx, Value *dest, jl_cgval_shadow_t *shadow,
3597+
MDNode *tbaa_dst, const jl_cgval_t &src, Value *skip, bool isVolatile=false)
35703598
{
35713599
if (AllocaInst *ai = dyn_cast<AllocaInst>(dest))
35723600
// TODO: make this a lifetime_end & dereferenceable annotation?
35733601
ctx.builder.CreateAlignedStore(UndefValue::get(ai->getAllocatedType()), ai, ai->getAlign());
3602+
35743603
if (jl_is_concrete_type(src.typ) || src.constant) {
35753604
jl_value_t *typ = src.constant ? jl_typeof(src.constant) : src.typ;
3576-
assert(skip || jl_is_pointerfree(typ));
3577-
if (jl_is_pointerfree(typ)) {
3605+
// FIXME: This assumes that the `skip` is used to skip any boxed union types, which are any !pointerfree types
3606+
//
3607+
// This should really be skipped by the caller instead (and/or emit a trap)
3608+
assert(skip || !deserves_unionbox(typ)); //jl_is_pointerfree(typ));
3609+
if (!deserves_unionbox(typ)) {
35783610
unsigned alignment = julia_alignment(typ);
35793611
if (!src.ispointer() || src.constant) {
3580-
emit_unbox_store(ctx, src, dest, tbaa_dst, alignment, isVolatile);
3612+
if (!src.isghost) {
3613+
// inlined version of emit_unbox_store
3614+
if (!src.ispointer()) {
3615+
Value *unboxed = zext_struct(ctx, src.V);
3616+
3617+
if (shadow != nullptr && !jl_is_pointerfree(typ)) {
3618+
assert(jl_is_concrete_type(typ));
3619+
Type *store_ty = julia_type_to_llvm(ctx, typ);
3620+
emit_sret_roots(ctx, /* isptr */ false, unboxed, store_ty, shadow->value, shadow->typ, shadow->n_roots);
3621+
}
3622+
Type *dest_ty = unboxed->getType()->getPointerTo();
3623+
if (dest->getType() != dest_ty)
3624+
dest = emit_bitcast(ctx, dest, dest_ty);
3625+
StoreInst *store = ctx.builder.CreateAlignedStore(unboxed, dest, Align(alignment));
3626+
store->setVolatile(isVolatile);
3627+
jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst);
3628+
ai.decorateInst(store);
3629+
} else {
3630+
Value *src_ptr = data_pointer(ctx, src);
3631+
if (shadow != nullptr && !jl_is_pointerfree(typ)) {
3632+
Type *store_ty = julia_type_to_llvm(ctx, typ);
3633+
emit_sret_roots(ctx, /* isptr */ true, src_ptr, store_ty, shadow->value, shadow->typ, shadow->n_roots);
3634+
}
3635+
emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src_ptr,
3636+
jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), jl_datatype_size(src.typ), alignment, julia_alignment(src.typ), isVolatile);
3637+
}
3638+
}
3639+
// emit_unbox_store(ctx, src, dest, tbaa_dst, alignment, isVolatile);
35813640
}
35823641
else {
35833642
Value *src_ptr = data_pointer(ctx, src);
@@ -3586,6 +3645,11 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
35863645
// select copy dest -> dest to simulate an undef value / conditional copy
35873646
// if (skip) src_ptr = ctx.builder.CreateSelect(skip, dest, src_ptr);
35883647
auto f = [&] {
3648+
if (shadow != nullptr && !jl_is_pointerfree(typ)) {
3649+
assert(jl_is_concrete_type(typ));
3650+
Type *store_ty = julia_type_to_llvm(ctx, typ);
3651+
emit_sret_roots(ctx, /* isptr */ true, src_ptr, store_ty, shadow->value, shadow->typ, shadow->n_roots);
3652+
}
35893653
(void)emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src_ptr,
35903654
jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, alignment, alignment, isVolatile);
35913655
return nullptr;
@@ -3621,6 +3685,11 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
36213685
ctx.builder.CreateUnreachable();
36223686
return;
36233687
} else {
3688+
// Maybe assert here (TBD)
3689+
if (shadow != nullptr && !jl_is_pointerfree((jl_value_t *)jt)) {
3690+
Type *store_ty = julia_type_to_llvm(ctx, (jl_value_t *)jt);
3691+
emit_sret_roots(ctx, /* isptr */ true, src_ptr, store_ty, shadow->value, shadow->typ, shadow->n_roots);
3692+
}
36243693
emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src_ptr,
36253694
jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, alignment, alignment, isVolatile);
36263695
}
@@ -3645,10 +3714,18 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
36453714
else {
36463715
assert(src.isboxed && "expected boxed value for sizeof/alignment computation");
36473716
auto f = [&] {
3648-
Value *datatype = emit_typeof(ctx, src, false, false);
3649-
Value *copy_bytes = emit_datatype_size(ctx, datatype);
3650-
(void)emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), data_pointer(ctx, src),
3651-
jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), copy_bytes, 1, 1, isVolatile);
3717+
if (shadow == nullptr) {
3718+
Value *datatype = emit_typeof(ctx, src, false, false);
3719+
Value *copy_bytes = emit_datatype_size(ctx, datatype);
3720+
(void)emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), data_pointer(ctx, src),
3721+
jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), copy_bytes, 1, 1, isVolatile);
3722+
} else {
3723+
Function *trap_func = Intrinsic::getDeclaration(
3724+
ctx.f->getParent(),
3725+
Intrinsic::trap);
3726+
ctx.builder.CreateCall(trap_func);
3727+
ctx.builder.CreateUnreachable();
3728+
}
36523729
return nullptr;
36533730
};
36543731
if (skip)
@@ -3807,7 +3884,7 @@ static jl_cgval_t union_store(jl_codectx_t &ctx,
38073884
ai.decorateInst(ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
38083885
// copy data
38093886
if (!rhs.isghost) {
3810-
emit_unionmove(ctx, ptr, tbaa, rhs, nullptr);
3887+
emit_unionmove(ctx, ptr, /*shadow*/nullptr, tbaa, rhs, /*skip*/nullptr);
38113888
}
38123889
if (isreplacefield || ismodifyfield) {
38133890
ctx.builder.CreateBr(DoneBB);
@@ -4001,7 +4078,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
40014078
AllocaInst *lv = emit_static_alloca(ctx, ET);
40024079
setName(ctx.emission_context, lv, "unioninit");
40034080
lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz1 + al - 1) / al));
4004-
emit_unionmove(ctx, lv, ctx.tbaa().tbaa_stack, fval_info, nullptr);
4081+
emit_unionmove(ctx, lv, /* shadow */ nullptr, ctx.tbaa().tbaa_stack, fval_info, /* skip */ nullptr);
40054082
// emit all of the align-sized words
40064083
unsigned i = 0;
40074084
for (; i < fsz1 / al; i++) {
@@ -4031,7 +4108,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
40314108
jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
40324109
ai.decorateInst(ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
40334110
if (!rhs_union.isghost)
4034-
emit_unionmove(ctx, dest, ctx.tbaa().tbaa_stack, fval_info, nullptr);
4111+
emit_unionmove(ctx, dest, /*shadow*/nullptr, ctx.tbaa().tbaa_stack, fval_info, /*skip*/nullptr);
40354112
}
40364113
}
40374114
else {

0 commit comments

Comments
 (0)