Skip to content

Commit 17857ca

Browse files
make memorynew intrinsic (#56803)
Attempt to split up #55913 into 2 pieces. This piece now only adds the `memorynew` intrinsic without any of the optimizations enabled by #55913. As such, this PR should be ready to merge now. (and will make #55913 smaller and simpler) --------- Co-authored-by: gbaraldi <baraldigabriel@gmail.com>
1 parent 13f446e commit 17857ca

File tree

15 files changed

+264
-92
lines changed

15 files changed

+264
-92
lines changed

Compiler/src/tfuncs.jl

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2017,6 +2017,12 @@ function tuple_tfunc(𝕃::AbstractLattice, argtypes::Vector{Any})
20172017
return anyinfo ? PartialStruct(𝕃, typ, argtypes) : typ
20182018
end
20192019

2020+
@nospecs function memorynew_tfunc(𝕃::AbstractLattice, memtype, m)
2021+
hasintersect(widenconst(m), Int) || return Bottom
2022+
return tmeet(𝕃, instanceof_tfunc(memtype, true)[1], GenericMemory)
2023+
end
2024+
add_tfunc(Core.memorynew, 2, 2, memorynew_tfunc, 10)
2025+
20202026
@nospecs function memoryrefget_tfunc(𝕃::AbstractLattice, mem, order, boundscheck)
20212027
memoryref_builtin_common_errorcheck(mem, order, boundscheck) || return Bottom
20222028
return memoryref_elemtype(mem)
@@ -2238,13 +2244,31 @@ end
22382244
return boundscheck Bool && memtype GenericMemoryRef && order Symbol
22392245
end
22402246

2247+
@nospecs function memorynew_nothrow(argtypes::Vector{Any})
2248+
if !(argtypes[1] isa Const && argtypes[2] isa Const)
2249+
return false
2250+
end
2251+
MemT = argtypes[1].val
2252+
if !(isconcretetype(MemT) && MemT <: GenericMemory)
2253+
return false
2254+
end
2255+
len = argtypes[2].val
2256+
if !(len isa Int && 0 <= len < typemax(Int))
2257+
return false
2258+
end
2259+
elsz = datatype_layoutsize(MemT)
2260+
overflows = checked_smul_int(len, elsz)[2]
2261+
return !overflows
2262+
end
22412263
# Query whether the given builtin is guaranteed not to throw given the `argtypes`.
22422264
# `argtypes` can be assumed not to contain varargs.
22432265
function _builtin_nothrow(𝕃::AbstractLattice, @nospecialize(f::Builtin), argtypes::Vector{Any},
22442266
@nospecialize(rt))
22452267
= partialorder(𝕃)
22462268
na = length(argtypes)
2247-
if f === memoryrefnew
2269+
if f === Core.memorynew
2270+
return memorynew_nothrow(argtypes)
2271+
elseif f === memoryrefnew
22482272
return memoryref_builtin_common_nothrow(argtypes)
22492273
elseif f === memoryrefoffset
22502274
length(argtypes) == 1 || return false
@@ -2347,6 +2371,7 @@ const _EFFECT_FREE_BUILTINS = [
23472371
isa,
23482372
UnionAll,
23492373
getfield,
2374+
Core.memorynew,
23502375
memoryrefnew,
23512376
memoryrefoffset,
23522377
memoryrefget,
@@ -2381,6 +2406,7 @@ const _INACCESSIBLEMEM_BUILTINS = Any[
23812406
compilerbarrier,
23822407
Core._typevar,
23832408
donotdelete,
2409+
Core.memorynew,
23842410
]
23852411

23862412
const _ARGMEM_BUILTINS = Any[
@@ -2543,7 +2569,7 @@ function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), argty
25432569
consistent = ALWAYS_TRUE
25442570
elseif f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned
25452571
consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY
2546-
elseif f === Core._typevar
2572+
elseif f === Core._typevar || f === Core.memorynew
25472573
consistent = CONSISTENT_IF_NOTRETURNED
25482574
else
25492575
consistent = ALWAYS_FALSE

Compiler/test/irpasses.jl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,12 +1083,13 @@ end
10831083
# test `flags_for_effects` and DCE
10841084
# ================================
10851085

1086-
let # effect-freeness computation for array allocation
1086+
@testset "effect-freeness computation for array allocation" begin
10871087

10881088
# should eliminate dead allocations
10891089
good_dims = [1, 2, 3, 4, 10]
10901090
Ns = [1, 2, 3, 4, 10]
1091-
for dim = good_dims, N = Ns
1091+
Ts = Any[Int, Union{Missing,Nothing}, Nothing, Any]
1092+
@testset "$dim, $N" for dim in good_dims, N in Ns
10921093
Int64(dim)^N > typemax(Int) && continue
10931094
dims = ntuple(i->dim, N)
10941095
@test @eval fully_eliminated() do
@@ -1099,7 +1100,7 @@ let # effect-freeness computation for array allocation
10991100

11001101
# shouldn't eliminate erroneous dead allocations
11011102
bad_dims = [-1, typemax(Int)]
1102-
for dim in bad_dims, N in [1, 2, 3, 4, 10], T in Any[Int, Union{Missing,Nothing}, Nothing, Any]
1103+
@testset "$dim, $N, $T" for dim in bad_dims, N in Ns, T in Ts
11031104
dims = ntuple(i->dim, N)
11041105
@test @eval !fully_eliminated() do
11051106
Array{$T,$N}(undef, $(dims...))

base/boot.jl

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -575,12 +575,7 @@ struct UndefInitializer end
575575
const undef = UndefInitializer()
576576

577577
# type and dimensionality specified
578-
(self::Type{GenericMemory{kind,T,addrspace}})(::UndefInitializer, m::Int) where {T,addrspace,kind} =
579-
if isdefined(self, :instance) && m === 0
580-
self.instance
581-
else
582-
ccall(:jl_alloc_genericmemory, Ref{GenericMemory{kind,T,addrspace}}, (Any, Int), self, m)
583-
end
578+
(self::Type{GenericMemory{kind,T,addrspace}})(::UndefInitializer, m::Int) where {T,addrspace,kind} = memorynew(self, m)
584579
(self::Type{GenericMemory{kind,T,addrspace}})(::UndefInitializer, d::NTuple{1,Int}) where {T,kind,addrspace} = self(undef, getfield(d,1))
585580
# empty vector constructor
586581
(self::Type{GenericMemory{kind,T,addrspace}})() where {T,kind,addrspace} = self(undef, 0)

doc/src/manual/performance-tips.md

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,12 +1058,12 @@ the output. As a trivial example, compare
10581058

10591059
```jldoctest prealloc
10601060
julia> function xinc(x)
1061-
return [x, x+1, x+2]
1061+
return [x + i for i in 1:3000]
10621062
end;
10631063
10641064
julia> function loopinc()
10651065
y = 0
1066-
for i = 1:10^7
1066+
for i = 1:10^5
10671067
ret = xinc(i)
10681068
y += ret[2]
10691069
end
@@ -1075,16 +1075,16 @@ with
10751075

10761076
```jldoctest prealloc
10771077
julia> function xinc!(ret::AbstractVector{T}, x::T) where T
1078-
ret[1] = x
1079-
ret[2] = x+1
1080-
ret[3] = x+2
1078+
for i in 1:3000
1079+
ret[i] = x+i
1080+
end
10811081
nothing
10821082
end;
10831083
10841084
julia> function loopinc_prealloc()
1085-
ret = Vector{Int}(undef, 3)
1085+
ret = Vector{Int}(undef, 3000)
10861086
y = 0
1087-
for i = 1:10^7
1087+
for i = 1:10^5
10881088
xinc!(ret, i)
10891089
y += ret[2]
10901090
end
@@ -1096,12 +1096,12 @@ Timing results:
10961096

10971097
```jldoctest prealloc; filter = r"[0-9\.]+ seconds \(.*?\)"
10981098
julia> @time loopinc()
1099-
0.529894 seconds (40.00 M allocations: 1.490 GiB, 12.14% gc time)
1100-
50000015000000
1099+
0.297454 seconds (200.00 k allocations: 2.239 GiB, 39.80% gc time)
1100+
5000250000
11011101
11021102
julia> @time loopinc_prealloc()
1103-
0.030850 seconds (6 allocations: 288 bytes)
1104-
50000015000000
1103+
0.009410 seconds (2 allocations: 23.477 KiB)
1104+
5000250000
11051105
```
11061106

11071107
Preallocation has other advantages, for example by allowing the caller to control the "output"

src/array.c

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,6 @@
1616
extern "C" {
1717
#endif
1818

19-
#if defined(_P64) && defined(UINT128MAX)
20-
typedef __uint128_t wideint_t;
21-
#else
22-
typedef uint64_t wideint_t;
23-
#endif
24-
2519
#define MAXINTVAL (((size_t)-1)>>1)
2620

2721
JL_DLLEXPORT int jl_array_validate_dims(size_t *nel, uint32_t ndims, size_t *dims)
@@ -30,10 +24,9 @@ JL_DLLEXPORT int jl_array_validate_dims(size_t *nel, uint32_t ndims, size_t *dim
3024
size_t _nel = 1;
3125
for (i = 0; i < ndims; i++) {
3226
size_t di = dims[i];
33-
wideint_t prod = (wideint_t)_nel * (wideint_t)di;
34-
if (prod >= (wideint_t) MAXINTVAL || di >= MAXINTVAL)
27+
int overflow = __builtin_mul_overflow(_nel, di, &_nel);
28+
if (overflow || di >= MAXINTVAL)
3529
return 1;
36-
_nel = prod;
3730
}
3831
*nel = _nel;
3932
return 0;
@@ -204,7 +197,7 @@ JL_DLLEXPORT void jl_array_grow_end(jl_array_t *a, size_t inc)
204197
int isbitsunion = jl_genericmemory_isbitsunion(a->ref.mem);
205198
size_t newnrows = n + inc;
206199
if (!isbitsunion && elsz == 0) {
207-
jl_genericmemory_t *newmem = jl_alloc_genericmemory(mtype, MAXINTVAL - 1);
200+
jl_genericmemory_t *newmem = jl_alloc_genericmemory(mtype, MAXINTVAL - 2);
208201
a->ref.mem = newmem;
209202
jl_gc_wb(a, newmem);
210203
a->dimsize[0] = newnrows;

src/builtin_proto.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ DECLARE_BUILTIN(is);
4646
DECLARE_BUILTIN(isa);
4747
DECLARE_BUILTIN(isdefined);
4848
DECLARE_BUILTIN(issubtype);
49+
DECLARE_BUILTIN(memorynew);
4950
DECLARE_BUILTIN(memoryref);
5051
DECLARE_BUILTIN(memoryref_isassigned);
5152
DECLARE_BUILTIN(memoryrefget);

src/builtins.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1675,6 +1675,15 @@ JL_CALLABLE(jl_f__typevar)
16751675
}
16761676

16771677
// genericmemory ---------------------------------------------------------------------
1678+
JL_CALLABLE(jl_f_memorynew)
1679+
{
1680+
JL_NARGS(memorynew, 2, 2);
1681+
jl_datatype_t *jl_genericmemory_type_type = jl_datatype_type;
1682+
JL_TYPECHK(memorynew, genericmemory_type, args[0]);
1683+
JL_TYPECHK(memorynew, long, args[1]);
1684+
size_t nel = jl_unbox_long(args[1]);
1685+
return (jl_value_t*)jl_alloc_genericmemory(args[0], nel);
1686+
}
16781687

16791688
JL_CALLABLE(jl_f_memoryref)
16801689
{
@@ -2441,6 +2450,7 @@ void jl_init_primitives(void) JL_GC_DISABLED
24412450
jl_builtin_setglobalonce = add_builtin_func("setglobalonce!", jl_f_setglobalonce);
24422451

24432452
// memory primitives
2453+
jl_builtin_memorynew = add_builtin_func("memorynew", jl_f_memorynew);
24442454
jl_builtin_memoryref = add_builtin_func("memoryrefnew", jl_f_memoryref);
24452455
jl_builtin_memoryrefoffset = add_builtin_func("memoryrefoffset", jl_f_memoryrefoffset);
24462456
jl_builtin_memoryrefget = add_builtin_func("memoryrefget", jl_f_memoryrefget);

src/ccall.cpp

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1877,33 +1877,6 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
18771877
JL_GC_POP();
18781878
return mark_julia_type(ctx, obj, true, jl_any_type);
18791879
}
1880-
else if (is_libjulia_func(jl_alloc_genericmemory)) {
1881-
++CCALL_STAT(jl_alloc_genericmemory);
1882-
assert(lrt == ctx.types().T_prjlvalue);
1883-
assert(!isVa && !llvmcall && nccallargs == 2);
1884-
const jl_cgval_t &typ = argv[0];
1885-
const jl_cgval_t &nel = argv[1];
1886-
auto arg_typename = [&] JL_NOTSAFEPOINT {
1887-
auto istyp = argv[0].constant;
1888-
std::string type_str;
1889-
if (istyp && jl_is_datatype(istyp) && jl_is_genericmemory_type(istyp)){
1890-
auto eltype = jl_tparam1(istyp);
1891-
if (jl_is_datatype(eltype))
1892-
type_str = jl_symbol_name(((jl_datatype_t*)eltype)->name->name);
1893-
else if (jl_is_uniontype(eltype))
1894-
type_str = "Union";
1895-
else
1896-
type_str = "<unknown type>";
1897-
}
1898-
else
1899-
type_str = "<unknown type>";
1900-
return "Memory{" + type_str + "}[]";
1901-
};
1902-
auto alloc = ctx.builder.CreateCall(prepare_call(jl_allocgenericmemory), { boxed(ctx,typ), emit_unbox(ctx, ctx.types().T_size, nel, (jl_value_t*)jl_ulong_type)});
1903-
setName(ctx.emission_context, alloc, arg_typename);
1904-
JL_GC_POP();
1905-
return mark_julia_type(ctx, alloc, true, jl_any_type);
1906-
}
19071880
else if (is_libjulia_func(memcpy) && (rt == (jl_value_t*)jl_nothing_type || jl_is_cpointer_type(rt))) {
19081881
++CCALL_STAT(memcpy);
19091882
const jl_cgval_t &dst = argv[0];

src/cgutils.cpp

Lines changed: 108 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1548,19 +1548,24 @@ static void emit_error(jl_codectx_t &ctx, const Twine &txt)
15481548
}
15491549

15501550
// DO NOT PASS IN A CONST CONDITION!
1551-
static void error_unless(jl_codectx_t &ctx, Value *cond, const Twine &msg)
1551+
static void error_unless(jl_codectx_t &ctx, Function *F, Value *cond, const Twine &msg)
15521552
{
15531553
++EmittedConditionalErrors;
15541554
BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f);
15551555
BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "pass");
15561556
ctx.builder.CreateCondBr(cond, passBB, failBB);
15571557
ctx.builder.SetInsertPoint(failBB);
1558-
just_emit_error(ctx, prepare_call(jlerror_func), msg);
1558+
just_emit_error(ctx, F, msg);
15591559
ctx.builder.CreateUnreachable();
15601560
passBB->insertInto(ctx.f);
15611561
ctx.builder.SetInsertPoint(passBB);
15621562
}
15631563

1564+
static void error_unless(jl_codectx_t &ctx, Value *cond, const Twine &msg)
1565+
{
1566+
error_unless(ctx, prepare_call(jlerror_func), cond, msg);
1567+
}
1568+
15641569
static void raise_exception(jl_codectx_t &ctx, Value *exc,
15651570
BasicBlock *contBB=nullptr)
15661571
{
@@ -4427,6 +4432,107 @@ static int compare_cgparams(const jl_cgparams_t *a, const jl_cgparams_t *b)
44274432
}
44284433
#endif
44294434

4435+
static jl_cgval_t emit_memorynew(jl_codectx_t &ctx, jl_datatype_t *typ, jl_cgval_t nel, jl_genericmemory_t *inst)
4436+
{
4437+
emit_typecheck(ctx, nel, (jl_value_t*)jl_long_type, "memorynew");
4438+
nel = update_julia_type(ctx, nel, (jl_value_t*)jl_long_type);
4439+
if (nel.typ == jl_bottom_type)
4440+
return jl_cgval_t();
4441+
4442+
const jl_datatype_layout_t *layout = ((jl_datatype_t*)typ)->layout;
4443+
assert(((jl_datatype_t*)typ)->has_concrete_subtype && layout != NULL);
4444+
size_t elsz = layout->size;
4445+
int isboxed = layout->flags.arrayelem_isboxed;
4446+
int isunion = layout->flags.arrayelem_isunion;
4447+
int zi = ((jl_datatype_t*)typ)->zeroinit;
4448+
if (isboxed)
4449+
elsz = sizeof(void*);
4450+
4451+
auto ptls = get_current_ptls(ctx);
4452+
auto T_size = ctx.types().T_size;
4453+
auto int8t = getInt8Ty(ctx.builder.getContext());
4454+
BasicBlock *emptymemBB, *nonemptymemBB, *retvalBB;
4455+
emptymemBB = BasicBlock::Create(ctx.builder.getContext(), "emptymem");
4456+
nonemptymemBB = BasicBlock::Create(ctx.builder.getContext(), "nonemptymem");
4457+
retvalBB = BasicBlock::Create(ctx.builder.getContext(), "retval");
4458+
auto nel_unboxed = emit_unbox(ctx, ctx.types().T_size, nel, (jl_value_t*)jl_long_type);
4459+
Value *memorynew_empty = ctx.builder.CreateICmpEQ(nel_unboxed, ConstantInt::get(T_size, 0));
4460+
setName(ctx.emission_context, memorynew_empty, "memorynew_empty");
4461+
ctx.builder.CreateCondBr(memorynew_empty, emptymemBB, nonemptymemBB);
4462+
// if nel == 0
4463+
emptymemBB->insertInto(ctx.f);
4464+
ctx.builder.SetInsertPoint(emptymemBB);
4465+
auto emptyalloc = track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)inst));
4466+
ctx.builder.CreateBr(retvalBB);
4467+
nonemptymemBB->insertInto(ctx.f);
4468+
ctx.builder.SetInsertPoint(nonemptymemBB);
4469+
// else actually allocate mem
4470+
auto arg_typename = [&] JL_NOTSAFEPOINT {
4471+
std::string type_str;
4472+
auto eltype = jl_tparam1(typ);
4473+
if (jl_is_datatype(eltype))
4474+
type_str = jl_symbol_name(((jl_datatype_t*)eltype)->name->name);
4475+
else if (jl_is_uniontype(eltype))
4476+
type_str = "Union";
4477+
else
4478+
type_str = "<unknown type>";
4479+
return "Memory{" + type_str + "}[]";
4480+
};
4481+
auto cg_typ = literal_pointer_val(ctx, (jl_value_t*) typ);
4482+
auto cg_elsz = ConstantInt::get(T_size, elsz);
4483+
4484+
FunctionCallee intr = Intrinsic::getDeclaration(jl_Module, Intrinsic::smul_with_overflow, ArrayRef<Type*>(T_size));
4485+
// compute nbytes with possible overflow
4486+
Value *prod_with_overflow = ctx.builder.CreateCall(intr, {nel_unboxed, cg_elsz});
4487+
Value *nbytes = ctx.builder.CreateExtractValue(prod_with_overflow, 0);
4488+
Value *overflow = ctx.builder.CreateExtractValue(prod_with_overflow, 1);
4489+
if (isunion) {
4490+
// if isunion, we need to allocate the union selector bytes as well
4491+
intr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sadd_with_overflow, ArrayRef<Type*>(T_size));
4492+
Value *add_with_overflow = ctx.builder.CreateCall(intr, {nel_unboxed, nbytes});
4493+
nbytes = ctx.builder.CreateExtractValue(add_with_overflow, 0);
4494+
Value *overflow1 = ctx.builder.CreateExtractValue(add_with_overflow, 1);
4495+
overflow = ctx.builder.CreateOr(overflow, overflow1);
4496+
}
4497+
Value *negnel = ctx.builder.CreateICmpSLT(nel_unboxed, ConstantInt::get(T_size, 0));
4498+
overflow = ctx.builder.CreateOr(overflow, negnel);
4499+
auto cg_typemax_int = ConstantInt::get(T_size, (((size_t)-1)>>1)-1);
4500+
Value *tobignel = ctx.builder.CreateICmpSLT(cg_typemax_int, elsz == 0 ? nel_unboxed: nbytes);
4501+
overflow = ctx.builder.CreateOr(overflow, tobignel);
4502+
Value *notoverflow = ctx.builder.CreateNot(overflow);
4503+
error_unless(ctx, prepare_call(jlargumenterror_func), notoverflow, "invalid GenericMemory size: the number of elements is either negative or too large for system address width");
4504+
// actually allocate
4505+
auto call = prepare_call(jl_alloc_genericmemory_unchecked_func);
4506+
Value *alloc = ctx.builder.CreateCall(call, { ptls, nbytes, cg_typ});
4507+
// set length (jl_alloc_genericmemory_unchecked_func doesn't have it)
4508+
Value *decay_alloc = decay_derived(ctx, alloc);
4509+
Value *len_field = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, decay_alloc, 0);
4510+
auto len_store = ctx.builder.CreateAlignedStore(nel_unboxed, len_field, Align(sizeof(void*)));
4511+
auto aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memorylen);
4512+
aliasinfo.decorateInst(len_store);
4513+
//This avoids the length store from being deleted which is illegal
4514+
ctx.builder.CreateFence(AtomicOrdering::Release, SyncScope::SingleThread);
4515+
// zeroinit pointers and unions
4516+
if (zi) {
4517+
Value *memory_ptr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, decay_alloc, 1);
4518+
auto *load = ctx.builder.CreateAlignedLoad(ctx.types().T_ptr, memory_ptr, Align(sizeof(void*)));
4519+
aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryptr);
4520+
aliasinfo.decorateInst(load);
4521+
ctx.builder.CreateMemSet(load, ConstantInt::get(int8t, 0), nbytes, Align(sizeof(void*)));
4522+
}
4523+
4524+
setName(ctx.emission_context, alloc, arg_typename);
4525+
ctx.builder.CreateBr(retvalBB);
4526+
nonemptymemBB = ctx.builder.GetInsertBlock();
4527+
// phi node to choose which side of branch
4528+
retvalBB->insertInto(ctx.f);
4529+
ctx.builder.SetInsertPoint(retvalBB);
4530+
auto phi = ctx.builder.CreatePHI(ctx.types().T_prjlvalue, 2);
4531+
phi->addIncoming(emptyalloc, emptymemBB);
4532+
phi->addIncoming(alloc, nonemptymemBB);
4533+
return mark_julia_type(ctx, phi, true, typ);
4534+
}
4535+
44304536
static jl_cgval_t _emit_memoryref(jl_codectx_t &ctx, Value *mem, Value *data, const jl_datatype_layout_t *layout, jl_value_t *typ)
44314537
{
44324538
//jl_cgval_t argv[] = {

0 commit comments

Comments
 (0)