Skip to content

Commit 4c45f29

Browse files
Enable allocation hoisting out of loops (#43057)
1 parent 77d3d30 commit 4c45f29

File tree

7 files changed

+586
-385
lines changed

7 files changed

+586
-385
lines changed

src/Makefile

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ CODEGEN_SRCS := codegen llvm-ptls
6161
RUNTIME_CODEGEN_SRCS := jitlayers aotcompile debuginfo disasm llvm-simdloop llvm-muladd \
6262
llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering \
6363
llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces \
64-
llvm-multiversioning llvm-alloc-opt cgmemmgr llvm-remove-addrspaces \
64+
llvm-multiversioning llvm-alloc-opt llvm-alloc-helpers cgmemmgr llvm-remove-addrspaces \
6565
llvm-remove-ni llvm-julia-licm llvm-demote-float16 llvm-cpufeatures
6666
FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
6767
CG_LLVM_LIBS := all
@@ -290,9 +290,11 @@ $(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/builtin_pro
290290
$(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/codegen_shared.h
291291
$(BUILDDIR)/jltypes.o $(BUILDDIR)/jltypes.dbg.obj: $(SRCDIR)/builtin_proto.h
292292
$(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/codegen_shared.h $(BUILDDIR)/julia_version.h
293-
$(BUILDDIR)/llvm-alloc-opt.o $(BUILDDIR)/llvm-alloc-opt.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/llvm-pass-helpers.h
293+
$(BUILDDIR)/llvm-alloc-helpers.o $(BUILDDIR)/llvm-alloc-helpers.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h
294+
$(BUILDDIR)/llvm-alloc-opt.o $(BUILDDIR)/llvm-alloc-opt.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h
294295
$(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h
295296
$(BUILDDIR)/llvm-gc-invariant-verifier.o $(BUILDDIR)/llvm-gc-invariant-verifier.dbg.obj: $(SRCDIR)/codegen_shared.h
297+
$(BUILDDIR)/llvm-julia-licm.o $(BUILDDIR)/llvm-julia-licm.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/llvm-alloc-helpers.h $(SRCDIR)/llvm-pass-helpers.h
296298
$(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/codegen_shared.h
297299
$(BUILDDIR)/llvm-lower-handlers.o $(BUILDDIR)/llvm-lower-handlers.dbg.obj: $(SRCDIR)/codegen_shared.h
298300
$(BUILDDIR)/llvm-multiversioning.o $(BUILDDIR)/llvm-multiversioning.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/processor.h

src/codegen.cpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -296,23 +296,22 @@ static inline void add_named_global(JuliaVariable *name, void *addr)
296296

297297
struct JuliaFunction {
298298
public:
299-
StringLiteral name;
300-
FunctionType *(*_type)(LLVMContext &C);
301-
AttributeList (*_attrs)(LLVMContext &C);
299+
llvm::StringLiteral name;
300+
llvm::FunctionType *(*_type)(llvm::LLVMContext &C);
301+
llvm::AttributeList (*_attrs)(llvm::LLVMContext &C);
302302

303303
JuliaFunction(const JuliaFunction&) = delete;
304304
JuliaFunction(const JuliaFunction&&) = delete;
305-
Function *realize(Module *m) {
306-
if (GlobalValue *V = m->getNamedValue(name))
307-
return cast<Function>(V);
308-
Function *F = Function::Create(_type(m->getContext()),
309-
Function::ExternalLinkage,
305+
llvm::Function *realize(llvm::Module *m) {
306+
if (llvm::GlobalValue *V = m->getNamedValue(name))
307+
return llvm::cast<llvm::Function>(V);
308+
llvm::Function *F = llvm::Function::Create(_type(m->getContext()),
309+
llvm::Function::ExternalLinkage,
310310
name, m);
311311
if (_attrs)
312312
F->setAttributes(_attrs(m->getContext()));
313313
return F;
314314
}
315-
Function *realize(jl_codectx_t &ctx);
316315
};
317316

318317
template<typename T>

src/llvm-alloc-helpers.cpp

Lines changed: 303 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,303 @@
1+
#include "llvm-alloc-helpers.h"
2+
3+
#include "codegen_shared.h"
4+
5+
#include "julia_assert.h"
6+
7+
using namespace llvm;
8+
using namespace jl_alloc;
9+
10+
static bool hasObjref(Type *ty)
11+
{
12+
if (auto ptrty = dyn_cast<PointerType>(ty))
13+
return ptrty->getAddressSpace() == AddressSpace::Tracked;
14+
if (isa<ArrayType>(ty) || isa<VectorType>(ty))
15+
return hasObjref(GetElementPtrInst::getTypeAtIndex(ty, (uint64_t)0));
16+
if (auto structty = dyn_cast<StructType>(ty)) {
17+
for (auto elty: structty->elements()) {
18+
if (hasObjref(elty)) {
19+
return true;
20+
}
21+
}
22+
}
23+
return false;
24+
}
25+
26+
std::pair<const uint32_t,Field>&
27+
AllocUseInfo::getField(uint32_t offset, uint32_t size, Type *elty)
28+
{
29+
auto it = findLowerField(offset);
30+
auto end = memops.end();
31+
auto lb = end; // first overlap
32+
auto ub = end; // last overlap
33+
if (it != end) {
34+
// The slot found contains the current location
35+
if (it->first + it->second.size >= offset + size) {
36+
if (it->second.elty != elty)
37+
it->second.elty = nullptr;
38+
assert(it->second.elty == nullptr || (it->first == offset && it->second.size == size));
39+
return *it;
40+
}
41+
if (it->first + it->second.size > offset) {
42+
lb = it;
43+
ub = it;
44+
}
45+
}
46+
else {
47+
it = memops.begin();
48+
}
49+
// Now find the last slot that overlaps with the current memory location.
50+
// Also set `lb` if we didn't find any above.
51+
for (; it != end && it->first < offset + size; ++it) {
52+
if (lb == end)
53+
lb = it;
54+
ub = it;
55+
}
56+
// no overlap found just create a new one.
57+
if (lb == end)
58+
return *memops.emplace(offset, Field(size, elty)).first;
59+
// We find overlapping but not containing slot we need to merge slot/create new one
60+
uint32_t new_offset = std::min(offset, lb->first);
61+
uint32_t new_addrub = std::max(offset + uint32_t(size), ub->first + ub->second.size);
62+
uint32_t new_size = new_addrub - new_offset;
63+
Field field(new_size, nullptr);
64+
field.multiloc = true;
65+
++ub;
66+
for (it = lb; it != ub; ++it) {
67+
field.hasobjref |= it->second.hasobjref;
68+
field.hasload |= it->second.hasload;
69+
field.hasaggr |= it->second.hasaggr;
70+
field.accesses.append(it->second.accesses.begin(), it->second.accesses.end());
71+
}
72+
memops.erase(lb, ub);
73+
return *memops.emplace(new_offset, std::move(field)).first;
74+
}
75+
76+
bool AllocUseInfo::addMemOp(Instruction *inst, unsigned opno, uint32_t offset,
77+
Type *elty, bool isstore, const DataLayout &DL)
78+
{
79+
MemOp memop(inst, opno);
80+
memop.offset = offset;
81+
uint64_t size = DL.getTypeStoreSize(elty);
82+
if (size >= UINT32_MAX - offset)
83+
return false;
84+
memop.size = size;
85+
memop.isaggr = isa<StructType>(elty) || isa<ArrayType>(elty) || isa<VectorType>(elty);
86+
memop.isobjref = hasObjref(elty);
87+
auto &field = getField(offset, size, elty);
88+
if (field.second.hasobjref != memop.isobjref)
89+
field.second.multiloc = true; // can't split this field, since it contains a mix of references and bits
90+
if (!isstore)
91+
field.second.hasload = true;
92+
if (memop.isobjref) {
93+
if (isstore) {
94+
refstore = true;
95+
}
96+
else {
97+
refload = true;
98+
}
99+
if (memop.isaggr)
100+
field.second.hasaggr = true;
101+
field.second.hasobjref = true;
102+
}
103+
else if (memop.isaggr) {
104+
field.second.hasaggr = true;
105+
}
106+
field.second.accesses.push_back(memop);
107+
return true;
108+
}
109+
110+
JL_USED_FUNC void AllocUseInfo::dump()
111+
{
112+
jl_safe_printf("escaped: %d\n", escaped);
113+
jl_safe_printf("addrescaped: %d\n", addrescaped);
114+
jl_safe_printf("returned: %d\n", returned);
115+
jl_safe_printf("haserror: %d\n", haserror);
116+
jl_safe_printf("hasload: %d\n", hasload);
117+
jl_safe_printf("haspreserve: %d\n", haspreserve);
118+
jl_safe_printf("hasunknownmem: %d\n", hasunknownmem);
119+
jl_safe_printf("hastypeof: %d\n", hastypeof);
120+
jl_safe_printf("refload: %d\n", refload);
121+
jl_safe_printf("refstore: %d\n", refstore);
122+
jl_safe_printf("Uses: %d\n", (unsigned)uses.size());
123+
for (auto inst: uses)
124+
llvm_dump(inst);
125+
if (!preserves.empty()) {
126+
jl_safe_printf("Preserves: %d\n", (unsigned)preserves.size());
127+
for (auto inst: preserves) {
128+
llvm_dump(inst);
129+
}
130+
}
131+
if (!memops.empty()) {
132+
jl_safe_printf("Memops: %d\n", (unsigned)memops.size());
133+
for (auto &field: memops) {
134+
jl_safe_printf(" Field %d @ %d\n", field.second.size, field.first);
135+
jl_safe_printf(" Accesses:\n");
136+
for (auto memop: field.second.accesses) {
137+
jl_safe_printf(" ");
138+
llvm_dump(memop.inst);
139+
}
140+
}
141+
}
142+
}
143+
144+
void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArgs required, EscapeAnalysisOptionalArgs options) {
145+
required.use_info.reset();
146+
if (I->use_empty())
147+
return;
148+
CheckInst::Frame cur{I, 0, I->use_begin(), I->use_end()};
149+
required.check_stack.clear();
150+
151+
// Recursion
152+
auto push_inst = [&] (Instruction *inst) {
153+
if (cur.use_it != cur.use_end)
154+
required.check_stack.push_back(cur);
155+
cur.parent = inst;
156+
cur.use_it = inst->use_begin();
157+
cur.use_end = inst->use_end();
158+
};
159+
160+
auto check_inst = [&] (Instruction *inst, Use *use) {
161+
if (isa<LoadInst>(inst)) {
162+
required.use_info.hasload = true;
163+
if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, 0, cur.offset,
164+
inst->getType(),
165+
false, required.DL))
166+
required.use_info.hasunknownmem = true;
167+
return true;
168+
}
169+
if (auto call = dyn_cast<CallInst>(inst)) {
170+
// TODO handle `memcmp`
171+
// None of the intrinsics should care if the memory is stack or heap allocated.
172+
auto callee = call->getCalledOperand();
173+
if (auto II = dyn_cast<IntrinsicInst>(call)) {
174+
if (auto id = II->getIntrinsicID()) {
175+
if (id == Intrinsic::memset) {
176+
assert(call->arg_size() == 4);
177+
if (cur.offset == UINT32_MAX ||
178+
!isa<ConstantInt>(call->getArgOperand(2)) ||
179+
!isa<ConstantInt>(call->getArgOperand(1)) ||
180+
(cast<ConstantInt>(call->getArgOperand(2))->getLimitedValue() >=
181+
UINT32_MAX - cur.offset))
182+
required.use_info.hasunknownmem = true;
183+
return true;
184+
}
185+
if (id == Intrinsic::lifetime_start || id == Intrinsic::lifetime_end ||
186+
isa<DbgInfoIntrinsic>(II))
187+
return true;
188+
required.use_info.addrescaped = true;
189+
return true;
190+
}
191+
if (required.pass.gc_preserve_begin_func == callee) {
192+
for (auto user: call->users())
193+
required.use_info.uses.insert(cast<Instruction>(user));
194+
required.use_info.preserves.insert(call);
195+
required.use_info.haspreserve = true;
196+
return true;
197+
}
198+
}
199+
if (required.pass.pointer_from_objref_func == callee) {
200+
required.use_info.addrescaped = true;
201+
return true;
202+
}
203+
if (required.pass.typeof_func == callee) {
204+
required.use_info.hastypeof = true;
205+
assert(use->get() == I);
206+
return true;
207+
}
208+
if (required.pass.write_barrier_func == callee)
209+
return true;
210+
auto opno = use->getOperandNo();
211+
// Uses in `jl_roots` operand bundle are not counted as escaping, everything else is.
212+
if (!call->isBundleOperand(opno) ||
213+
call->getOperandBundleForOperand(opno).getTagName() != "jl_roots") {
214+
if (isa<UnreachableInst>(call->getParent()->getTerminator())) {
215+
required.use_info.haserror = true;
216+
return true;
217+
}
218+
required.use_info.escaped = true;
219+
return false;
220+
}
221+
required.use_info.haspreserve = true;
222+
return true;
223+
}
224+
if (auto store = dyn_cast<StoreInst>(inst)) {
225+
// Only store value count
226+
if (use->getOperandNo() != StoreInst::getPointerOperandIndex()) {
227+
required.use_info.escaped = true;
228+
return false;
229+
}
230+
auto storev = store->getValueOperand();
231+
if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, use->getOperandNo(),
232+
cur.offset, storev->getType(),
233+
true, required.DL))
234+
required.use_info.hasunknownmem = true;
235+
return true;
236+
}
237+
if (isa<AtomicCmpXchgInst>(inst) || isa<AtomicRMWInst>(inst)) {
238+
// Only store value count
239+
if (use->getOperandNo() != isa<AtomicCmpXchgInst>(inst) ? AtomicCmpXchgInst::getPointerOperandIndex() : AtomicRMWInst::getPointerOperandIndex()) {
240+
required.use_info.escaped = true;
241+
return false;
242+
}
243+
required.use_info.hasload = true;
244+
auto storev = isa<AtomicCmpXchgInst>(inst) ? cast<AtomicCmpXchgInst>(inst)->getNewValOperand() : cast<AtomicRMWInst>(inst)->getValOperand();
245+
if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, use->getOperandNo(),
246+
cur.offset, storev->getType(),
247+
true, required.DL))
248+
required.use_info.hasunknownmem = true;
249+
required.use_info.refload = true;
250+
return true;
251+
}
252+
if (isa<AddrSpaceCastInst>(inst) || isa<BitCastInst>(inst)) {
253+
push_inst(inst);
254+
return true;
255+
}
256+
if (auto gep = dyn_cast<GetElementPtrInst>(inst)) {
257+
uint64_t next_offset = cur.offset;
258+
if (cur.offset != UINT32_MAX) {
259+
APInt apoffset(sizeof(void*) * 8, cur.offset, true);
260+
if (!gep->accumulateConstantOffset(required.DL, apoffset) || apoffset.isNegative()) {
261+
next_offset = UINT32_MAX;
262+
}
263+
else {
264+
next_offset = apoffset.getLimitedValue();
265+
if (next_offset > UINT32_MAX) {
266+
next_offset = UINT32_MAX;
267+
}
268+
}
269+
}
270+
push_inst(inst);
271+
cur.offset = (uint32_t)next_offset;
272+
return true;
273+
}
274+
if (isa<ReturnInst>(inst)) {
275+
required.use_info.returned = true;
276+
return true;
277+
}
278+
required.use_info.escaped = true;
279+
return false;
280+
};
281+
282+
while (true) {
283+
assert(cur.use_it != cur.use_end);
284+
auto use = &*cur.use_it;
285+
auto inst = dyn_cast<Instruction>(use->getUser());
286+
++cur.use_it;
287+
if (!inst) {
288+
required.use_info.escaped = true;
289+
return;
290+
}
291+
if (!options.valid_set || options.valid_set->contains(inst->getParent())) {
292+
if (!check_inst(inst, use))
293+
return;
294+
required.use_info.uses.insert(inst);
295+
}
296+
if (cur.use_it == cur.use_end) {
297+
if (required.check_stack.empty())
298+
return;
299+
cur = required.check_stack.back();
300+
required.check_stack.pop_back();
301+
}
302+
}
303+
}

0 commit comments

Comments
 (0)