Skip to content

Commit 940928e

Browse files
authored
codegen: cleanup gcstack call frames somewhat earlier (#57410)
Slightly reduces the amount of work these optimization passes need to do later, in most typical cases, and avoids putting an unknown call at the top of all of our functions, which can inhibit some optimizations of otherwise trivial functions. Fixes #57400
1 parent 7e69710 commit 940928e

8 files changed

+110
-105
lines changed

src/ccall.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2162,12 +2162,12 @@ jl_cgval_t function_sig_t::emit_a_ccall(
21622162
}
21632163
}
21642164

2165-
// Potentially we could drop `jl_roots(gc_uses)` in the presence of `gc-transition(gc_uses)`
2165+
// Potentially we could add gc_uses to `gc-transition`, instead of emitting them separately as jl_roots
21662166
SmallVector<OperandBundleDef, 2> bundles;
21672167
if (!gc_uses.empty())
21682168
bundles.push_back(OperandBundleDef("jl_roots", gc_uses));
21692169
if (gc_safe)
2170-
bundles.push_back(OperandBundleDef("gc-transition", ArrayRef<Value*> {}));
2170+
bundles.push_back(OperandBundleDef("gc-transition", get_current_ptls(ctx)));
21712171
// the actual call
21722172
CallInst *ret = ctx.builder.CreateCall(functype, llvmf,
21732173
argvals,

src/codegen.cpp

Lines changed: 61 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -7002,8 +7002,9 @@ static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0, bool or_new=fal
70027002
// allocate a placeholder gc instruction
70037003
// this will require the runtime, but it gets deleted later if unused
70047004
ctx.topalloca = ctx.builder.CreateCall(prepare_call(or_new ? jladoptthread_func : jlpgcstack_func));
7005-
ctx.pgcstack = ctx.topalloca;
7006-
ctx.pgcstack->setName("pgcstack");
7005+
ctx.topalloca->setName("pgcstack");
7006+
if (ctx.pgcstack == nullptr)
7007+
ctx.pgcstack = ctx.topalloca;
70077008
}
70087009

70097010
static Value *get_current_task(jl_codectx_t &ctx)
@@ -7150,16 +7151,17 @@ static void emit_specsig_to_specsig(
71507151
ctx.builder.SetInsertPoint(b0);
71517152
DebugLoc noDbg;
71527153
ctx.builder.SetCurrentDebugLocation(noDbg);
7153-
allocate_gc_frame(ctx, b0);
71547154
Function::arg_iterator AI = gf_thunk->arg_begin();
71557155
SmallVector<jl_cgval_t, 0> myargs(nargs);
71567156
if (cc == jl_returninfo_t::SRet || cc == jl_returninfo_t::Union)
71577157
++AI;
71587158
if (return_roots)
71597159
++AI;
71607160
if (JL_FEAT_TEST(ctx,gcstack_arg)) {
7161+
ctx.pgcstack = AI;
71617162
++AI; // gcstack_arg
71627163
}
7164+
allocate_gc_frame(ctx, b0);
71637165
for (size_t i = 0; i < nargs; i++) {
71647166
if (i == 0 && is_for_opaque_closure) {
71657167
// `jt` would be wrong here (it is the captures type), so is not used used for
@@ -7266,6 +7268,10 @@ static void emit_specsig_to_specsig(
72667268
break;
72677269
}
72687270
}
7271+
if (ctx.topalloca != ctx.pgcstack && ctx.topalloca->use_empty()) {
7272+
ctx.topalloca->eraseFromParent();
7273+
ctx.topalloca = nullptr;
7274+
}
72697275
}
72707276

72717277
void emit_specsig_to_fptr1(
@@ -8122,6 +8128,10 @@ static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *abi, jl_va
81228128
CreateTrap(ctx.builder, false);
81238129
else
81248130
ctx.builder.CreateRet(boxed(ctx, retval));
8131+
if (ctx.topalloca != ctx.pgcstack && ctx.topalloca->use_empty()) {
8132+
ctx.topalloca->eraseFromParent();
8133+
ctx.topalloca = nullptr;
8134+
}
81258135
}
81268136

81278137
static jl_returninfo_t get_specsig_function(jl_codegen_params_t &params, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure,
@@ -8778,7 +8788,53 @@ static jl_llvm_functions_t
87788788
ctx.spvals_ptr = &*AI++;
87798789
}
87808790
}
8781-
// step 6. set up GC frame
8791+
// step 6. set up GC frame and special arguments
8792+
Function::arg_iterator AI = f->arg_begin();
8793+
SmallVector<AttributeSet, 0> attrs(f->arg_size()); // function declaration attributes
8794+
8795+
if (has_sret) {
8796+
Argument *Arg = &*AI;
8797+
++AI;
8798+
AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
8799+
if (returninfo.cc == jl_returninfo_t::Union) {
8800+
param.addAttribute(Attribute::NonNull);
8801+
// The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
8802+
param.addDereferenceableAttr(returninfo.union_bytes);
8803+
param.addAlignmentAttr(returninfo.union_align);
8804+
}
8805+
else {
8806+
const DataLayout &DL = jl_Module->getDataLayout();
8807+
Type *RT = Arg->getParamStructRetType();
8808+
TypeSize sz = DL.getTypeAllocSize(RT);
8809+
Align al = DL.getPrefTypeAlign(RT);
8810+
if (al > MAX_ALIGN)
8811+
al = Align(MAX_ALIGN);
8812+
param.addAttribute(Attribute::NonNull);
8813+
// The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
8814+
param.addDereferenceableAttr(sz);
8815+
param.addAlignmentAttr(al);
8816+
}
8817+
attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
8818+
}
8819+
if (returninfo.return_roots) {
8820+
Argument *Arg = &*AI;
8821+
++AI;
8822+
AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
8823+
param.addAttribute(Attribute::NonNull);
8824+
// The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
8825+
size_t size = returninfo.return_roots * sizeof(jl_value_t*);
8826+
param.addDereferenceableAttr(size);
8827+
param.addAlignmentAttr(Align(sizeof(jl_value_t*)));
8828+
attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
8829+
}
8830+
if (specsig && JL_FEAT_TEST(ctx, gcstack_arg)) {
8831+
Argument *Arg = &*AI;
8832+
ctx.pgcstack = Arg;
8833+
++AI;
8834+
AttrBuilder param(ctx.builder.getContext());
8835+
attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param);
8836+
}
8837+
87828838
allocate_gc_frame(ctx, b0);
87838839
Value *last_age = NULL;
87848840
Value *world_age_field = NULL;
@@ -8921,9 +8977,6 @@ static jl_llvm_functions_t
89218977
}
89228978

89238979
// step 8. move args into local variables
8924-
Function::arg_iterator AI = f->arg_begin();
8925-
SmallVector<AttributeSet, 0> attrs(f->arg_size()); // function declaration attributes
8926-
89278980
auto get_specsig_arg = [&](jl_value_t *argType, Type *llvmArgType, bool isboxed) {
89288981
if (type_is_ghost(llvmArgType)) { // this argument is not actually passed
89298982
return ghostValue(ctx, argType);
@@ -8956,47 +9009,6 @@ static jl_llvm_functions_t
89569009
return theArg;
89579010
};
89589011

8959-
if (has_sret) {
8960-
Argument *Arg = &*AI;
8961-
++AI;
8962-
AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
8963-
if (returninfo.cc == jl_returninfo_t::Union) {
8964-
param.addAttribute(Attribute::NonNull);
8965-
// The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
8966-
param.addDereferenceableAttr(returninfo.union_bytes);
8967-
param.addAlignmentAttr(returninfo.union_align);
8968-
}
8969-
else {
8970-
const DataLayout &DL = jl_Module->getDataLayout();
8971-
Type *RT = Arg->getParamStructRetType();
8972-
TypeSize sz = DL.getTypeAllocSize(RT);
8973-
Align al = DL.getPrefTypeAlign(RT);
8974-
if (al > MAX_ALIGN)
8975-
al = Align(MAX_ALIGN);
8976-
param.addAttribute(Attribute::NonNull);
8977-
// The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
8978-
param.addDereferenceableAttr(sz);
8979-
param.addAlignmentAttr(al);
8980-
}
8981-
attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
8982-
}
8983-
if (returninfo.return_roots) {
8984-
Argument *Arg = &*AI;
8985-
++AI;
8986-
AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
8987-
param.addAttribute(Attribute::NonNull);
8988-
// The `dereferenceable` below does not imply `nonnull` for non addrspace(0) pointers.
8989-
size_t size = returninfo.return_roots * sizeof(jl_value_t*);
8990-
param.addDereferenceableAttr(size);
8991-
param.addAlignmentAttr(Align(sizeof(jl_value_t*)));
8992-
attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param); // function declaration attributes
8993-
}
8994-
if (specsig && JL_FEAT_TEST(ctx, gcstack_arg)){
8995-
Argument *Arg = &*AI;
8996-
++AI;
8997-
AttrBuilder param(ctx.builder.getContext());
8998-
attrs[Arg->getArgNo()] = AttributeSet::get(Arg->getContext(), param);
8999-
}
90009012
for (i = 0; i < nreq && i < vinfoslen; i++) {
90019013
jl_sym_t *s = slot_symbol(ctx, i);
90029014
jl_varinfo_t &vi = ctx.slots[i];
@@ -9964,7 +9976,7 @@ static jl_llvm_functions_t
99649976
}
99659977
}
99669978

9967-
if (ctx.topalloca->use_empty()) {
9979+
if (ctx.topalloca != ctx.pgcstack && ctx.topalloca->use_empty()) {
99689980
ctx.topalloca->eraseFromParent();
99699981
ctx.topalloca = nullptr;
99709982
}

src/llvm-final-gc-lowering.cpp

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -161,24 +161,31 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
161161
target->replaceAllUsesWith(newI);
162162
target->eraseFromParent();
163163
}
164-
bool FinalLowerGC::shouldRunFinalGC(Function &F)
164+
165+
static bool hasUse(const JuliaPassContext &ctx, const jl_intrinsics::IntrinsicDescription &v)
166+
{
167+
auto Intr = ctx.getOrNull(v);
168+
return Intr && !Intr->use_empty();
169+
}
170+
171+
bool FinalLowerGC::shouldRunFinalGC()
165172
{
166173
bool should_run = 0;
167-
should_run |= getOrNull(jl_intrinsics ::newGCFrame) != nullptr;
168-
should_run |= getOrNull(jl_intrinsics ::getGCFrameSlot) != nullptr;
169-
should_run |= getOrNull(jl_intrinsics ::pushGCFrame) != nullptr;
170-
should_run |= getOrNull(jl_intrinsics ::popGCFrame) != nullptr;
171-
should_run |= getOrNull(jl_intrinsics ::GCAllocBytes) != nullptr;
172-
should_run |= getOrNull(jl_intrinsics ::queueGCRoot) != nullptr;
173-
should_run |= getOrNull(jl_intrinsics ::safepoint) != nullptr;
174+
should_run |= hasUse(*this, jl_intrinsics::newGCFrame);
175+
should_run |= hasUse(*this, jl_intrinsics::getGCFrameSlot);
176+
should_run |= hasUse(*this, jl_intrinsics::pushGCFrame);
177+
should_run |= hasUse(*this, jl_intrinsics::popGCFrame);
178+
should_run |= hasUse(*this, jl_intrinsics::GCAllocBytes);
179+
should_run |= hasUse(*this, jl_intrinsics::queueGCRoot);
180+
should_run |= hasUse(*this, jl_intrinsics::safepoint);
174181
return should_run;
175182
}
176183

177184
bool FinalLowerGC::runOnFunction(Function &F)
178185
{
179186
initAll(*F.getParent());
180187
pgcstack = getPGCstack(F);
181-
if (!shouldRunFinalGC(F))
188+
if (!pgcstack || !shouldRunFinalGC())
182189
goto verify_skip;
183190

184191
LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");
@@ -232,7 +239,7 @@ bool FinalLowerGC::runOnFunction(Function &F)
232239
auto IS_INTRINSIC = [&](auto intrinsic) {
233240
auto intrinsic2 = getOrNull(intrinsic);
234241
if (intrinsic2 == callee) {
235-
errs() << "Final-GC-lowering didn't eliminate all intrinsics'" << F.getName() << "', dumping entire module!\n\n";
242+
errs() << "Final-GC-lowering didn't eliminate all intrinsics from '" << F.getName() << "', dumping entire module!\n\n";
236243
errs() << *F.getParent() << "\n";
237244
abort();
238245
}

src/llvm-gc-interface-passes.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ struct LateLowerGCFrame: private JuliaPassContext {
328328
bool runOnFunction(Function &F, bool *CFGModified = nullptr);
329329

330330
private:
331-
CallInst *pgcstack;
331+
Value *pgcstack;
332332
Function *smallAllocFunc;
333333

334334
void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const ArrayRef<int> &SafepointsSoFar,
@@ -388,7 +388,7 @@ struct FinalLowerGC: private JuliaPassContext {
388388
Function *smallAllocFunc;
389389
Function *bigAllocFunc;
390390
Function *allocTypedFunc;
391-
Instruction *pgcstack;
391+
Value *pgcstack;
392392
Type *T_size;
393393

394394
// Lowers a `julia.new_gc_frame` intrinsic.
@@ -411,8 +411,9 @@ struct FinalLowerGC: private JuliaPassContext {
411411

412412
// Lowers a `julia.safepoint` intrinsic.
413413
void lowerSafepoint(CallInst *target, Function &F);
414+
414415
// Check if the pass should be run
415-
bool shouldRunFinalGC(Function &F);
416+
bool shouldRunFinalGC();
416417
};
417418

418419
#endif // LLVM_GC_PASSES_H

src/llvm-late-gc-lowering.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2219,20 +2219,21 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
22192219
SmallVector<OperandBundleDef,2> bundles;
22202220
CI->getOperandBundlesAsDefs(bundles);
22212221
bool gc_transition = false;
2222+
Value *ptls;
22222223
for (auto &bundle: bundles)
2223-
if (bundle.getTag() == "gc-transition")
2224+
if (bundle.getTag() == "gc-transition") {
22242225
gc_transition = true;
2226+
ptls = bundle.inputs()[0];
2227+
}
22252228

22262229
// In theory LLVM wants us to lower this using RewriteStatepointsForGC
22272230
if (gc_transition) {
22282231
// Insert the operations to switch to gc_safe if necessary.
22292232
IRBuilder<> builder(CI);
2230-
Value *pgcstack = getOrAddPGCstack(F);
2231-
assert(pgcstack);
2233+
assert(ptls);
22322234
// We dont use emit_state_set here because safepoints are unconditional for any code that reaches this
22332235
// We are basically guaranteed to go from gc_unsafe to gc_safe and back, and both transitions need a safepoint
22342236
// We also can't add any BBs here, so just avoiding the branches is good
2235-
Value *ptls = get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, pgcstack), tbaa_gcframe);
22362237
unsigned offset = offsetof(jl_tls_states_t, gc_state);
22372238
Value *gc_state = builder.CreateConstInBoundsGEP1_32(Type::getInt8Ty(builder.getContext()), ptls, offset, "gc_state");
22382239
LoadInst *last_gc_state = builder.CreateAlignedLoad(Type::getInt8Ty(builder.getContext()), gc_state, Align(sizeof(void*)));
@@ -2249,7 +2250,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
22492250
++it;
22502251
continue;
22512252
} else {
2252-
// remove operand bundle
2253+
// remove all operand bundles
22532254
CallInst *NewCall = CallInst::Create(CI, None, CI);
22542255
NewCall->takeName(CI);
22552256
NewCall->copyMetadata(*CI);
@@ -2395,7 +2396,10 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(ArrayRef<int> Colors, int PreAss
23952396
auto pushGcframe = CallInst::Create(
23962397
getOrDeclare(jl_intrinsics::pushGCFrame),
23972398
{gcframe, ConstantInt::get(T_int32, 0)});
2398-
pushGcframe->insertAfter(pgcstack);
2399+
if (isa<Argument>(pgcstack))
2400+
pushGcframe->insertAfter(gcframe);
2401+
else
2402+
pushGcframe->insertAfter(cast<Instruction>(pgcstack));
23992403

24002404
// we don't run memsetopt after this, so run a basic approximation of it
24012405
// that removes any redundant memset calls in the prologue since getGCFrameSlot already includes the null store
@@ -2513,8 +2517,6 @@ bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) {
25132517
initAll(*F.getParent());
25142518
smallAllocFunc = getOrDeclare(jl_well_known::GCSmallAlloc);
25152519
LLVM_DEBUG(dbgs() << "GC ROOT PLACEMENT: Processing function " << F.getName() << "\n");
2516-
if (!pgcstack_getter && !adoptthread_func)
2517-
return CleanupIR(F, nullptr, CFGModified);
25182520

25192521
pgcstack = getPGCstack(F);
25202522
if (!pgcstack)

src/llvm-pass-helpers.cpp

Lines changed: 10 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -72,25 +72,9 @@ void JuliaPassContext::initAll(Module &M)
7272
T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
7373
}
7474

75-
llvm::CallInst *JuliaPassContext::getPGCstack(llvm::Function &F) const
75+
llvm::Value *JuliaPassContext::getPGCstack(llvm::Function &F) const
7676
{
77-
if (!pgcstack_getter && !adoptthread_func)
78-
return nullptr;
79-
for (auto &I : F.getEntryBlock()) {
80-
if (CallInst *callInst = dyn_cast<CallInst>(&I)) {
81-
Value *callee = callInst->getCalledOperand();
82-
if ((pgcstack_getter && callee == pgcstack_getter) ||
83-
(adoptthread_func && callee == adoptthread_func)) {
84-
return callInst;
85-
}
86-
}
87-
}
88-
return nullptr;
89-
}
90-
91-
llvm::CallInst *JuliaPassContext::getOrAddPGCstack(llvm::Function &F)
92-
{
93-
if (pgcstack_getter || adoptthread_func)
77+
if (pgcstack_getter || adoptthread_func) {
9478
for (auto &I : F.getEntryBlock()) {
9579
if (CallInst *callInst = dyn_cast<CallInst>(&I)) {
9680
Value *callee = callInst->getCalledOperand();
@@ -100,13 +84,14 @@ llvm::CallInst *JuliaPassContext::getOrAddPGCstack(llvm::Function &F)
10084
}
10185
}
10286
}
103-
IRBuilder<> builder(&F.getEntryBlock().front());
104-
if (pgcstack_getter)
105-
return builder.CreateCall(pgcstack_getter);
106-
auto FT = FunctionType::get(PointerType::get(F.getContext(), 0), false);
107-
auto F2 = Function::Create(FT, Function::ExternalLinkage, "julia.get_pgcstack", F.getParent());
108-
pgcstack_getter = F2;
109-
return builder.CreateCall( F2);
87+
}
88+
if (F.getCallingConv() == CallingConv::Swift) {
89+
for (auto &arg : F.args()) {
90+
if (arg.hasSwiftSelfAttr())
91+
return &arg;
92+
}
93+
}
94+
return nullptr;
11095
}
11196

11297
llvm::Function *JuliaPassContext::getOrNull(

src/llvm-pass-helpers.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,12 +85,10 @@ struct JuliaPassContext {
8585

8686
// Gets a call to the `julia.get_pgcstack' intrinsic in the entry
8787
// point of the given function, if there exists such a call.
88+
// Otherwise, gets a swiftself argument, if there exists such an argument.
8889
// Otherwise, `nullptr` is returned.
89-
llvm::CallInst *getPGCstack(llvm::Function &F) const;
90-
// Gets a call to the `julia.get_pgcstack' intrinsic in the entry
91-
// point of the given function, if there exists such a call.
92-
// Otherwise, creates a new call to the intrinsic
93-
llvm::CallInst *getOrAddPGCstack(llvm::Function &F);
90+
llvm::Value *getPGCstack(llvm::Function &F) const;
91+
9492
// Gets the intrinsic or well-known function that conforms to
9593
// the given description if it exists in the module. If not,
9694
// `nullptr` is returned.

0 commit comments

Comments
 (0)