-
-
Notifications
You must be signed in to change notification settings - Fork 5.6k
Open
Labels
compiler:optimizerOptimization passes (mostly in base/compiler/ssair/)Optimization passes (mostly in base/compiler/ssair/)performanceMust go fasterMust go fasterregression 1.12Regression in the 1.12 releaseRegression in the 1.12 release
Milestone
Description
I'm seeing a lot more generated LLVM (and much worse performance) with Base.setindex(nt::NamedTuple, value, key::Symbol)
on 1.12 and nightly.
This may depend somewhat on the system, since @MasonProtter tried this and wasn't able to reproduce the poor codegen on his machine. For reference:
Platform Info:
OS: Linux (x86_64-linux-gnu)
CPU: 20 × 13th Gen Intel(R) Core(TM) i5-13600K
WORD_SIZE: 64
LLVM: libLLVM-16.0.6 (ORCJIT, alderlake)
Threads: 18 default, 0 interactive, 9 GC (on 20 virtual cores)
Environment:
JULIA_NUM_THREADS = 18
JULIA_PROJECT = @.
JULIA_EDITOR = emacs -nw
JULIA_PKG_PRESERVE_TIERED_INSTALLED = true
Now that that's out of the way, this is what I'm seeing on 1.10 and 1.11:
julia> @code_llvm ((a, b) -> Base.setindex(a, b, :next))((next = zero(UInt32), prev = zero(UInt32)), 2)
; Function Signature: var"#1"(NamedTuple{(:next, :prev), Tuple{UInt32, UInt32}}, Int64)
; @ REPL[1]:1 within `#1`
define nonnull ptr @"julia_#1_773"(ptr nocapture noundef nonnull readonly align 4 dereferenceable(8) %"a::NamedTuple", i64 signext %"b::Int64") #0 {
top:
%0 = call nonnull ptr @j_setindex_776(ptr nocapture nonnull readonly %"a::NamedTuple", i64 signext %"b::Int64", ptr nonnull @"jl_sym#next#777.jit")
ret ptr %0
}
and on 1.12/nightly
Lots of LLVM
julia> @code_llvm ((a, b) -> Base.setindex(a, b, :next))((next = zero(UInt32), prev = zero(UInt32)), 2)
; Function Signature: var"#2"(NamedTuple{(:next, :prev), Tuple{UInt32, UInt32}}, Int64)
; @ REPL[1]:1 within `#2`
define nonnull ptr @"julia_#2_1195"(ptr nocapture noundef nonnull readonly align 4 dereferenceable(8) %"a::NamedTuple", i64 signext %"b::Int64") #0 {
top:
%jlcallframe1 = alloca [5 x ptr], align 8
%gcframe2 = alloca [5 x ptr], align 16
call void @llvm.memset.p0.i64(ptr align 16 %gcframe2, i8 0, i64 40, i1 true)
%thread_ptr = call ptr asm "movq %fs:0, $0", "=r"() #12
%tls_ppgcstack = getelementptr inbounds i8, ptr %thread_ptr, i64 -8
%tls_pgcstack = load ptr, ptr %tls_ppgcstack, align 8
store i64 12, ptr %gcframe2, align 8
%frame.prev = getelementptr inbounds ptr, ptr %gcframe2, i64 1
%task.gcstack = load ptr, ptr %tls_pgcstack, align 8
store ptr %task.gcstack, ptr %frame.prev, align 8
store ptr %gcframe2, ptr %tls_pgcstack, align 8
; ┌ @ namedtuple.jl:484 within `setindex`
; │┌ @ boot.jl:792 within `NamedTuple`
%0 = call ptr @jl_get_builtin_fptr(ptr nonnull @"+Core.#_compute_sparams#1197.jit")
%ptls_field = getelementptr inbounds i8, ptr %tls_pgcstack, i64 16
%ptls_load = load ptr, ptr %ptls_field, align 8
%"box::NamedTuple" = call noalias nonnull align 8 dereferenceable(16) ptr @ijl_gc_small_alloc(ptr %ptls_load, i32 360, i32 16, i64 139744856668560) #8
%"box::NamedTuple.tag_addr" = getelementptr inbounds i64, ptr %"box::NamedTuple", i64 -1
store atomic i64 139744856668560, ptr %"box::NamedTuple.tag_addr" unordered, align 8
%1 = load i64, ptr %"a::NamedTuple", align 4
store i64 %1, ptr %"box::NamedTuple", align 8
%gc_slot_addr_1 = getelementptr inbounds ptr, ptr %gcframe2, i64 3
store ptr %"box::NamedTuple", ptr %gc_slot_addr_1, align 8
%ptls_load35 = load ptr, ptr %ptls_field, align 8
%"box::NamedTuple3" = call noalias nonnull align 8 dereferenceable(16) ptr @ijl_gc_small_alloc(ptr %ptls_load35, i32 360, i32 16, i64 139744871045968) #8
%"box::NamedTuple3.tag_addr" = getelementptr inbounds i64, ptr %"box::NamedTuple3", i64 -1
store atomic i64 139744871045968, ptr %"box::NamedTuple3.tag_addr" unordered, align 8
store i64 %"b::Int64", ptr %"box::NamedTuple3", align 8
%gc_slot_addr_0 = getelementptr inbounds ptr, ptr %gcframe2, i64 2
store ptr %"box::NamedTuple3", ptr %gc_slot_addr_0, align 8
store ptr @"-Main.Base.merge#1199.jit", ptr %jlcallframe1, align 8
%2 = getelementptr inbounds ptr, ptr %jlcallframe1, i64 1
store ptr @"jl_global#1200.jit", ptr %2, align 8
%3 = getelementptr inbounds ptr, ptr %jlcallframe1, i64 2
store ptr %"box::NamedTuple", ptr %3, align 8
%4 = getelementptr inbounds ptr, ptr %jlcallframe1, i64 3
store ptr %"box::NamedTuple3", ptr %4, align 8
%Builtin_ret = call nonnull ptr %0(ptr nonnull @"jl_global#1198.jit", ptr nonnull %jlcallframe1, i32 4)
store ptr null, ptr %gc_slot_addr_1, align 8
store ptr %Builtin_ret, ptr %gc_slot_addr_0, align 8
; │└
; │┌ @ namedtuple.jl:338 within `merge`
store ptr %Builtin_ret, ptr %jlcallframe1, align 8
store ptr @"jl_global#1203.jit", ptr %2, align 8
%jl_f__svec_ref_ret = call nonnull ptr @jl_f__svec_ref(ptr null, ptr nonnull %jlcallframe1, i32 2)
%jl_f__svec_ref_ret.tag_addr = getelementptr inbounds i64, ptr %jl_f__svec_ref_ret, i64 -1
%jl_f__svec_ref_ret.tag = load atomic i64, ptr %jl_f__svec_ref_ret.tag_addr unordered, align 8
%5 = and i64 %jl_f__svec_ref_ret.tag, -16
%6 = inttoptr i64 %5 to ptr
%7 = icmp ult ptr %6, inttoptr (i64 1024 to ptr)
br i1 %7, label %guard_pass, label %guard_exit
L6: ; preds = %guard_exit
store ptr %"box::NamedTuple16", ptr %jlcallframe1, align 8
store ptr %"box::NamedTuple20", ptr %2, align 8
store ptr @"jl_global#1207.jit", ptr %3, align 8
store ptr %jl_f__svec_ref_ret, ptr %4, align 8
%8 = call nonnull ptr @j1_merge_fallback_1205(ptr nonnull @"jl_global#1206.jit", ptr nonnull %jlcallframe1, i32 4)
%frame.prev49 = load ptr, ptr %frame.prev, align 8
store ptr %frame.prev49, ptr %tls_pgcstack, align 8
ret ptr %8
L9: ; preds = %guard_exit
store ptr @"jl_global#1206.jit", ptr %jlcallframe1, align 8
store ptr %"box::NamedTuple16", ptr %2, align 8
store ptr %"box::NamedTuple20", ptr %3, align 8
store ptr @"jl_global#1207.jit", ptr %4, align 8
%9 = getelementptr inbounds ptr, ptr %jlcallframe1, i64 4
store ptr %jl_f__svec_ref_ret, ptr %9, align 8
%jl_f_throw_methoderror_ret = call nonnull ptr @jl_f_throw_methoderror(ptr null, ptr nonnull %jlcallframe1, i32 5)
call void @llvm.trap()
unreachable
guard_pass: ; preds = %top
%10 = getelementptr inbounds i8, ptr @jl_small_typeof, i64 %5
%11 = load ptr, ptr %10, align 8
br label %guard_exit
guard_exit: ; preds = %guard_pass, %top
%typeof = phi ptr [ %6, %top ], [ %11, %guard_pass ]
store ptr null, ptr %gc_slot_addr_0, align 8
%gc_slot_addr_2 = getelementptr inbounds ptr, ptr %gcframe2, i64 4
store ptr %jl_f__svec_ref_ret, ptr %gc_slot_addr_2, align 8
%12 = call i32 @ijl_subtype(ptr nonnull %typeof, ptr nonnull @"+Core.Tuple#1204.jit")
%.not = icmp eq i32 %12, 0
%ptls_load45 = load ptr, ptr %ptls_field, align 8
%"box::NamedTuple16" = call noalias nonnull align 8 dereferenceable(16) ptr @ijl_gc_small_alloc(ptr %ptls_load45, i32 360, i32 16, i64 139744856668560) #8
%"box::NamedTuple16.tag_addr" = getelementptr inbounds i64, ptr %"box::NamedTuple16", i64 -1
store atomic i64 139744856668560, ptr %"box::NamedTuple16.tag_addr" unordered, align 8
%13 = load i64, ptr %"a::NamedTuple", align 4
store i64 %13, ptr %"box::NamedTuple16", align 8
store ptr %"box::NamedTuple16", ptr %gc_slot_addr_1, align 8
%ptls_load48 = load ptr, ptr %ptls_field, align 8
%"box::NamedTuple20" = call noalias nonnull align 8 dereferenceable(16) ptr @ijl_gc_small_alloc(ptr %ptls_load48, i32 360, i32 16, i64 139744871045968) #8
%"box::NamedTuple20.tag_addr" = getelementptr inbounds i64, ptr %"box::NamedTuple20", i64 -1
store atomic i64 139744871045968, ptr %"box::NamedTuple20.tag_addr" unordered, align 8
store i64 %"b::Int64", ptr %"box::NamedTuple20", align 8
store ptr %"box::NamedTuple20", ptr %gc_slot_addr_0, align 8
br i1 %.not, label %L9, label %L6
; └└
}
Metadata
Metadata
Assignees
Labels
compiler:optimizerOptimization passes (mostly in base/compiler/ssair/)Optimization passes (mostly in base/compiler/ssair/)performanceMust go fasterMust go fasterregression 1.12Regression in the 1.12 releaseRegression in the 1.12 release