diff --git a/base/runtime_internals.jl b/base/runtime_internals.jl index 98dd111ccbf68..b180600eb3cad 100644 --- a/base/runtime_internals.jl +++ b/base/runtime_internals.jl @@ -528,6 +528,10 @@ struct DataTypeLayout # fielddesc_type : 2; # arrayelem_isboxed : 1; # arrayelem_isunion : 1; + # arrayelem_isatomic : 1; + # arrayelem_islocked : 1; + # isbitsegal : 1; + # padding : 8; end """ diff --git a/src/cgutils.cpp b/src/cgutils.cpp index ef36eeb728689..1fd8b6ff6fcad 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -4820,9 +4820,10 @@ static jl_cgval_t emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &ref, jl_cg setName(ctx.emission_context, ovflw, "memoryref_ovflw"); } #endif - Type *elty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jl_tparam1(ref.typ)); - newdata = ctx.builder.CreateGEP(elty, data, offset); - setName(ctx.emission_context, newdata, "memoryref_data_offset"); + boffset = ctx.builder.CreateMul(offset, elsz); + setName(ctx.emission_context, boffset, "memoryref_byteoffset"); + newdata = ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()), data, boffset); + setName(ctx.emission_context, newdata, "memoryref_data_byteoffset"); (void)boffset; // LLVM is very bad at handling GEP with types different from the load if (bc) { BasicBlock *failBB, *endBB; diff --git a/src/codegen.cpp b/src/codegen.cpp index 2c38ea4e5d907..555f25661ef19 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -3862,8 +3862,8 @@ static bool emit_f_opmemory(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout; bool isboxed = layout->flags.arrayelem_isboxed; bool isunion = layout->flags.arrayelem_isunion; - bool isatomic = kind == (jl_value_t*)jl_atomic_sym; - bool needlock = isatomic && layout->size > MAX_ATOMIC_SIZE; + bool isatomic = layout->flags.arrayelem_isatomic || layout->flags.arrayelem_islocked; + bool needlock = layout->flags.arrayelem_islocked; size_t elsz = layout->size; size_t al = layout->alignment; if (al > JL_HEAP_ALIGNMENT) @@ -4231,7 +4231,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, size_t al = layout->alignment; if (al > JL_HEAP_ALIGNMENT) al = JL_HEAP_ALIGNMENT; - bool needlock = isatomic && !isboxed && elsz > MAX_ATOMIC_SIZE; + bool needlock = layout->flags.arrayelem_islocked; AtomicOrdering Order = (needlock || order <= jl_memory_order_notatomic) ? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic) : get_llvm_atomic_order(order); @@ -4317,7 +4317,8 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, *ret = jl_cgval_t(); // unreachable return true; } - bool isatomic = kind == (jl_value_t*)jl_atomic_sym; + const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout; + bool isatomic = layout->flags.arrayelem_isatomic || layout->flags.arrayelem_islocked; if (!isatomic && order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) { emit_atomic_error(ctx, "memoryref_isassigned: non-atomic memory cannot be accessed atomically"); *ret = jl_cgval_t(); // unreachable @@ -4333,13 +4334,12 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, } jl_value_t *boundscheck = argv[3].constant; emit_typecheck(ctx, argv[3], (jl_value_t*)jl_bool_type, fname); - const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty_dt)->layout; Value *mem = emit_memoryref_mem(ctx, ref, layout); Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ); Value *oob = bounds_check_enabled(ctx, boundscheck) ? ctx.builder.CreateIsNull(mlen) : nullptr; bool isboxed = layout->flags.arrayelem_isboxed; if (isboxed || layout->first_ptr >= 0) { - bool needlock = isatomic && !isboxed && layout->size > MAX_ATOMIC_SIZE; + bool needlock = layout->flags.arrayelem_islocked; AtomicOrdering Order = (needlock || order <= jl_memory_order_notatomic) ? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic) : get_llvm_atomic_order(order); @@ -4359,13 +4359,12 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, ctx.builder.SetInsertPoint(passBB); } Value *elem = emit_memoryref_ptr(ctx, ref, layout); - if (needlock) { + if (!isboxed) + elem = emit_ptrgep(ctx, elem, layout->first_ptr * sizeof(void*)); + else if (needlock) // n.b. no actual lock acquire needed, as the check itself only needs to load a single pointer and check for null // elem += sizeof(lock); elem = emit_ptrgep(ctx, elem, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT)); - } - if (!isboxed) - elem = emit_ptrgep(ctx, elem, layout->first_ptr * sizeof(void*)); // emit this using the same type as BUILTIN(memoryrefget) // so that LLVM may be able to load-load forward them and fold the result auto tbaa = isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf; diff --git a/src/datatype.c b/src/datatype.c index 2df2bdb2aaa71..450bf89748474 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -239,8 +239,10 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz, flddesc->flags.haspadding = haspadding; flddesc->flags.isbitsegal = isbitsegal; flddesc->flags.fielddesc_type = fielddesc_type; - flddesc->flags.arrayelem_isboxed = arrayelem == 1; - flddesc->flags.arrayelem_isunion = arrayelem == 2; + flddesc->flags.arrayelem_isboxed = (arrayelem & 1) != 0; + flddesc->flags.arrayelem_isunion = (arrayelem & 2) != 0; + flddesc->flags.arrayelem_isatomic = (arrayelem & 4) != 0; + flddesc->flags.arrayelem_islocked = (arrayelem & 8) != 0; flddesc->flags.padding = 0; flddesc->npointers = npointers; flddesc->first_ptr = first_ptr; @@ -537,6 +539,7 @@ void jl_get_genericmemory_layout(jl_datatype_t *st) uint32_t *pointers = &first_ptr; int needlock = 0; + const jl_datatype_layout_t *el_layout = NULL; if (isunboxed) { elsz = LLT_ALIGN(elsz, al); if (kind == (jl_value_t*)jl_atomic_sym) { @@ -551,12 +554,12 @@ void jl_get_genericmemory_layout(jl_datatype_t *st) else { assert(jl_is_datatype(eltype)); zi = ((jl_datatype_t*)eltype)->zeroinit; - const jl_datatype_layout_t *layout = ((jl_datatype_t*)eltype)->layout; - if (layout->first_ptr >= 0) { - first_ptr = layout->first_ptr; - npointers = layout->npointers; - if (layout->flags.fielddesc_type == 2) { - pointers = (uint32_t*)jl_dt_layout_ptrs(layout); + el_layout = ((jl_datatype_t*)eltype)->layout; + if (el_layout->first_ptr >= 0) { + first_ptr = el_layout->first_ptr; + npointers = el_layout->npointers; + if (el_layout->flags.fielddesc_type == 2 && !needlock) { + pointers = (uint32_t*)jl_dt_layout_ptrs(el_layout); } else { pointers = (uint32_t*)alloca(npointers * sizeof(uint32_t)); @@ -568,10 +571,22 @@ void jl_get_genericmemory_layout(jl_datatype_t *st) } if (needlock) { assert(al <= JL_SMALL_BYTE_ALIGNMENT); - size_t offset = LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT); - elsz += offset; + size_t lock_offset = LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT); + elsz += lock_offset; + if (al < sizeof(void*)) { + al = sizeof(void*); + elsz = LLT_ALIGN(elsz, al); + } haspadding = 1; zi = 1; + // Adjust pointer offsets to account for the lock at the beginning + if (first_ptr != -1) { + uint32_t lock_offset_words = lock_offset / sizeof(void*); + first_ptr += lock_offset_words; + for (int j = 0; j < npointers; j++) { + pointers[j] += lock_offset_words; + } + } } } else { @@ -580,13 +595,17 @@ void jl_get_genericmemory_layout(jl_datatype_t *st) zi = 1; } - int arrayelem; + // arrayelem is a bitfield: 1=isboxed, 2=isunion, 4=isatomic, 8=islocked + int arrayelem = 0; if (!isunboxed) - arrayelem = 1; - else if (isunion) - arrayelem = 2; - else - arrayelem = 0; + arrayelem |= 1; // arrayelem_isboxed + if (isunion) + arrayelem |= 2; // arrayelem_isunion + if (kind == (jl_value_t*)jl_atomic_sym) { + arrayelem |= 4; // arrayelem_isatomic + if (needlock) + arrayelem |= 8; // arrayelem_islocked + } assert(!st->layout); st->layout = jl_get_layout(elsz, nfields, npointers, al, haspadding, isbitsegal, arrayelem, NULL, pointers); st->zeroinit = zi; @@ -647,17 +666,17 @@ void jl_compute_field_offsets(jl_datatype_t *st) // if we have no fields, we can trivially skip the rest if (st == jl_symbol_type || st == jl_string_type) { // opaque layout - heap-allocated blob - static const jl_datatype_layout_t opaque_byte_layout = {0, 0, 1, -1, 1, { .haspadding = 0, .fielddesc_type=0, .isbitsegal=1, .arrayelem_isboxed=0, .arrayelem_isunion=0 }}; + static const jl_datatype_layout_t opaque_byte_layout = {0, 0, 1, -1, 1, { .isbitsegal=1 }}; st->layout = &opaque_byte_layout; return; } else if (st == jl_simplevector_type || st == jl_module_type) { - static const jl_datatype_layout_t opaque_ptr_layout = {0, 0, 1, -1, sizeof(void*), { .haspadding = 0, .fielddesc_type=0, .isbitsegal=1, .arrayelem_isboxed=0, .arrayelem_isunion=0 }}; + static const jl_datatype_layout_t opaque_ptr_layout = {0, 0, 1, -1, sizeof(void*), { .isbitsegal=1 }}; st->layout = &opaque_ptr_layout; return; } else { - static const jl_datatype_layout_t singleton_layout = {0, 0, 0, -1, 1, { .haspadding = 0, .fielddesc_type=0, .isbitsegal=1, .arrayelem_isboxed=0, .arrayelem_isunion=0 }}; + static const jl_datatype_layout_t singleton_layout = {0, 0, 0, -1, 1, { .isbitsegal=1 }}; st->layout = &singleton_layout; } } @@ -1001,6 +1020,8 @@ JL_DLLEXPORT jl_datatype_t * jl_new_foreign_type(jl_sym_t *name, layout->flags.padding = 0; layout->flags.arrayelem_isboxed = 0; layout->flags.arrayelem_isunion = 0; + layout->flags.arrayelem_isatomic = 0; + layout->flags.arrayelem_islocked = 0; jl_fielddescdyn_t * desc = (jl_fielddescdyn_t *) ((char *)layout + sizeof(*layout)); desc->markfunc = markfunc; diff --git a/src/genericmemory.c b/src/genericmemory.c index a81a4a199956c..35ef2b545a32d 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -262,8 +262,8 @@ JL_DLLEXPORT void jl_genericmemory_copyto(jl_genericmemory_t *dest, char* destda JL_DLLEXPORT jl_value_t *jl_genericmemoryref(jl_genericmemory_t *mem, size_t i) { - int isatomic = (jl_tparam0(jl_typetagof(mem)) == (jl_value_t*)jl_atomic_sym); const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(mem))->layout; + int isatomic = layout->flags.arrayelem_isatomic || layout->flags.arrayelem_islocked; jl_genericmemoryref_t m; m.mem = mem; m.ptr_or_offset = (layout->flags.arrayelem_isunion || layout->size == 0) ? (void*)i : (void*)((char*)mem->ptr + layout->size * i); @@ -342,8 +342,8 @@ static jl_value_t *jl_ptrmemrefget(jl_genericmemoryref_t m JL_PROPAGATES_ROOT, i JL_DLLEXPORT jl_value_t *jl_memoryrefget(jl_genericmemoryref_t m, int isatomic) { - assert(isatomic == (jl_tparam0(jl_typetagof(m.mem)) == (jl_value_t*)jl_atomic_sym)); const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout; + assert(isatomic == (layout->flags.arrayelem_isatomic || layout->flags.arrayelem_islocked)); if (layout->flags.arrayelem_isboxed) return jl_ptrmemrefget(m, isatomic); jl_value_t *eltype = jl_tparam1(jl_typetagof(m.mem)); @@ -365,7 +365,7 @@ JL_DLLEXPORT jl_value_t *jl_memoryrefget(jl_genericmemoryref_t m, int isatomic) assert(data - (char*)m.mem->ptr < layout->size * m.mem->length); jl_value_t *r; size_t fsz = jl_datatype_size(eltype); - int needlock = isatomic && fsz > MAX_ATOMIC_SIZE; + int needlock = layout->flags.arrayelem_islocked; if (isatomic && !needlock) { r = jl_atomic_new_bits(eltype, data); } @@ -393,9 +393,6 @@ static int _jl_memoryref_isassigned(jl_genericmemoryref_t m, int isatomic) if (layout->flags.arrayelem_isboxed) { } else if (layout->first_ptr >= 0) { - int needlock = isatomic && layout->size > MAX_ATOMIC_SIZE; - if (needlock) - elem = elem + LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT) / sizeof(jl_value_t*); elem = &elem[layout->first_ptr]; } else { @@ -411,7 +408,8 @@ JL_DLLEXPORT jl_value_t *jl_memoryref_isassigned(jl_genericmemoryref_t m, int is JL_DLLEXPORT void jl_memoryrefset(jl_genericmemoryref_t m JL_ROOTING_ARGUMENT, jl_value_t *rhs JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, int isatomic) { - assert(isatomic == (jl_tparam0(jl_typetagof(m.mem)) == (jl_value_t*)jl_atomic_sym)); + const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout; + assert(isatomic == (layout->flags.arrayelem_isatomic || layout->flags.arrayelem_islocked)); jl_value_t *eltype = jl_tparam1(jl_typetagof(m.mem)); if (eltype != (jl_value_t*)jl_any_type && !jl_typeis(rhs, eltype)) { JL_GC_PUSH1(&rhs); @@ -419,7 +417,6 @@ JL_DLLEXPORT void jl_memoryrefset(jl_genericmemoryref_t m JL_ROOTING_ARGUMENT, j jl_type_error("memoryrefset!", eltype, rhs); JL_GC_POP(); } - const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m.mem))->layout; if (layout->flags.arrayelem_isboxed) { assert((char*)m.ptr_or_offset - (char*)m.mem->ptr < sizeof(jl_value_t*) * m.mem->length); if (isatomic) @@ -449,7 +446,7 @@ JL_DLLEXPORT void jl_memoryrefset(jl_genericmemoryref_t m JL_ROOTING_ARGUMENT, j } if (layout->size != 0) { assert(data - (char*)m.mem->ptr < layout->size * m.mem->length); - int needlock = isatomic && layout->size > MAX_ATOMIC_SIZE; + int needlock = layout->flags.arrayelem_islocked; size_t fsz = jl_datatype_size((jl_datatype_t*)jl_typeof(rhs)); // need to shrink-wrap the final copy if (isatomic && !needlock) { jl_atomic_store_bits(data, rhs, fsz); diff --git a/src/julia.h b/src/julia.h index 19c8408f22b4a..b83af2fe8367c 100644 --- a/src/julia.h +++ b/src/julia.h @@ -574,10 +574,12 @@ typedef struct { // metadata bit only for GenericMemory eltype layout uint16_t arrayelem_isboxed : 1; uint16_t arrayelem_isunion : 1; + uint16_t arrayelem_isatomic : 1; + uint16_t arrayelem_islocked : 1; // If set, this type's egality can be determined entirely by comparing // the non-padding bits of this datatype. uint16_t isbitsegal : 1; - uint16_t padding : 10; + uint16_t padding : 8; } flags; // union { // jl_fielddesc8_t field8[nfields]; @@ -1667,6 +1669,8 @@ static inline int jl_field_isconst(jl_datatype_t *st, int i) JL_NOTSAFEPOINT #define jl_is_addrspacecore(v) jl_typetagis(v,jl_addrspacecore_type) #define jl_is_abioverride(v) jl_typetagis(v,jl_abioverride_type) #define jl_genericmemory_isbitsunion(a) (((jl_datatype_t*)jl_typetagof(a))->layout->flags.arrayelem_isunion) +#define jl_genericmemory_isatomic(a) (((jl_datatype_t*)jl_typetagof(a))->layout->flags.arrayelem_isatomic) +#define jl_genericmemory_islocked(a) (((jl_datatype_t*)jl_typetagof(a))->layout->flags.arrayelem_islocked) #define jl_is_array_any(v) jl_typetagis(v,jl_array_any_type) JL_DLLEXPORT int jl_subtype(jl_value_t *a, jl_value_t *b); diff --git a/src/rtutils.c b/src/rtutils.c index e3672ab5d887e..6eb5c82f1f244 100644 --- a/src/rtutils.c +++ b/src/rtutils.c @@ -1290,10 +1290,20 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt for (size_t j = 0; j < tlen; j++) { if (layout->flags.arrayelem_isboxed) { jl_value_t **ptr = ((jl_value_t**)m->ptr) + j; + if (layout->flags.arrayelem_islocked) { + // Skip the lock at the beginning for locked arrays + size_t lock_size = sizeof(jl_mutex_t); + ptr = (jl_value_t**)((char*)ptr + lock_size); + } n += jl_static_show_x(out, *ptr, depth, ctx); } else { char *ptr = ((char*)m->ptr) + j * layout->size; + if (layout->flags.arrayelem_islocked) { + // Skip the lock at the beginning for locked arrays + size_t lock_size = sizeof(jl_mutex_t); + ptr += lock_size; + } n += jl_static_show_x_(out, (jl_value_t*)ptr, (jl_datatype_t*)(typetagdata ? jl_nth_union_component(el_type, typetagdata[j]) : el_type), depth, ctx); diff --git a/test/atomics.jl b/test/atomics.jl index 2d4a713b1d30d..3572824741459 100644 --- a/test/atomics.jl +++ b/test/atomics.jl @@ -111,6 +111,7 @@ Base.show(io::IO, x::Int24) = print(io, "Int24(", Core.Intrinsics.zext_int(Int, ## Fields @noinline function _test_field_operators(r) + GC.gc(false) r = r[] TT = fieldtype(typeof(r), :x) T = typeof(getfield(r, :x)) @@ -147,6 +148,7 @@ test_field_operators(ARefxy{Float64}(123_10, 123_20)) @noinline function _test_field_orderings(r, x, y) @nospecialize x y + GC.gc(false) r = r[] TT = fieldtype(typeof(r), :x) @@ -328,8 +330,9 @@ test_field_orderings(ARefxy{Any}(true, false), true, false) test_field_orderings(ARefxy{Union{Nothing,Missing}}(nothing, missing), nothing, missing) test_field_orderings(ARefxy{Union{Nothing,Int}}(nothing, 123_1), nothing, 123_1) test_field_orderings(Complex{Int128}(10, 30), Complex{Int128}(20, 40)) -test_field_orderings(Complex{Real}(10, 30), Complex{Real}(20, 40)) +test_field_orderings(Complex{Real}(10.5, 30.5), Complex{Real}(20.5, 40.5)) test_field_orderings(Complex{Rational{Integer}}(10, 30), Complex{Rational{Integer}}(20, 40)) +test_field_orderings(Pair{NTuple{3,Float64},NTuple{3,Real}}((10.5,11.5,12.5), (30.5,40.5,50.5)), Pair{NTuple{3,Float64},NTuple{3,Real}}((110.5,111.5,112.5), (130.5,140.5,150.5))) test_field_orderings(10.0, 20.0) test_field_orderings(NaN, Inf) @@ -705,7 +708,7 @@ test_global_orderings(Any, true, false) test_global_orderings(Union{Nothing,Missing}, nothing, missing) test_global_orderings(Union{Nothing,Int}, nothing, 123_1) test_global_orderings(Complex{Int128}, Complex{Int128}(10, 30), Complex{Int128}(20, 40)) -test_global_orderings(Complex{Real}, Complex{Real}(10, 30), Complex{Real}(20, 40)) +test_global_orderings(Complex{Real}, Complex{Real}(10.5, 30.5), Complex{Real}(20.5, 40.5)) test_global_orderings(Float64, 10.0, 20.0) test_global_orderings(Float64, NaN, Inf) @@ -1024,15 +1027,17 @@ test_memory_operators(Float64) end @noinline function test_memory_orderings(T::Type, x, y) @nospecialize - xr = GenericMemoryRef(AtomicMemory{T}(undef, 1)) - memoryrefset!(xr, x, :unordered, true) # @atomic xr[] = x - yr = GenericMemoryRef(Memory{T}(undef, 1)) + xr = GenericMemoryRef(AtomicMemory{T}(undef, 2), 2) + memoryrefset!(xr, x, :unordered, true) # @atomic xr[2] = x + yr = GenericMemoryRef(Memory{T}(undef, 2), 2) yr[] = y + GC.gc(false) _test_memory_orderings(Ref(xr), Ref(yr), x, y) - xr = GenericMemoryRef(AtomicMemory{T}(undef, 1)) - memoryrefset!(xr, x, :unordered, true) # @atomic xr[] = x - yr = GenericMemoryRef(Memory{T}(undef, 1)) + xr = GenericMemoryRef(AtomicMemory{T}(undef, 2), 2) + memoryrefset!(xr, x, :unordered, true) # @atomic xr[2] = x + yr = GenericMemoryRef(Memory{T}(undef, 2), 2) yr[] = y + GC.gc(false) _test_memory_orderings(Ref{Any}(xr), Ref{Any}(yr), x, y) nothing end @@ -1047,7 +1052,8 @@ test_memory_orderings(Any, true, false) test_memory_orderings(Union{Nothing,Missing}, nothing, missing) test_memory_orderings(Union{Nothing,Int}, nothing, 123_1) test_memory_orderings(Complex{Int128}(10, 30), Complex{Int128}(20, 40)) -test_memory_orderings(Complex{Real}(10, 30), Complex{Real}(20, 40)) +test_memory_orderings(Complex{Real}(10.5, 30.5), Complex{Real}(20.5, 40.5)) +test_memory_orderings(Pair{NTuple{3,Float64},NTuple{3,Real}}((10.5,11.5,12.5), (30.5,40.5,50.5)), Pair{NTuple{3,Float64},NTuple{3,Real}}((110.5,111.5,112.5), (130.5,140.5,150.5))) test_memory_orderings(10.0, 20.0) test_memory_orderings(NaN, Inf)