Skip to content

Poor codegen with setindex on a NamedTuple #58915

@tecosaur

Description

@tecosaur

I'm seeing a lot more generated LLVM (and much worse performance) with Base.setindex(nt::NamedTuple, value, key::Symbol) on 1.12 and nightly.

This may depend somewhat on the system, since @MasonProtter tried this and wasn't able to reproduce the poor codegen on his machine. For reference:

Platform Info:
  OS: Linux (x86_64-linux-gnu)
  CPU: 20 × 13th Gen Intel(R) Core(TM) i5-13600K
  WORD_SIZE: 64
  LLVM: libLLVM-16.0.6 (ORCJIT, alderlake)
Threads: 18 default, 0 interactive, 9 GC (on 20 virtual cores)
Environment:
  JULIA_NUM_THREADS = 18
  JULIA_PROJECT = @.
  JULIA_EDITOR = emacs -nw
  JULIA_PKG_PRESERVE_TIERED_INSTALLED = true

Now that that's out of the way, this is what I'm seeing on 1.10 and 1.11:

julia> @code_llvm ((a, b) -> Base.setindex(a, b, :next))((next = zero(UInt32), prev = zero(UInt32)), 2)
; Function Signature: var"#1"(NamedTuple{(:next, :prev), Tuple{UInt32, UInt32}}, Int64)
;  @ REPL[1]:1 within `#1`
define nonnull ptr @"julia_#1_773"(ptr nocapture noundef nonnull readonly align 4 dereferenceable(8) %"a::NamedTuple", i64 signext %"b::Int64") #0 {
top:
  %0 = call nonnull ptr @j_setindex_776(ptr nocapture nonnull readonly %"a::NamedTuple", i64 signext %"b::Int64", ptr nonnull @"jl_sym#next#777.jit")
  ret ptr %0
}

and on 1.12/nightly

Lots of LLVM
julia> @code_llvm ((a, b) -> Base.setindex(a, b, :next))((next = zero(UInt32), prev = zero(UInt32)), 2)
; Function Signature: var"#2"(NamedTuple{(:next, :prev), Tuple{UInt32, UInt32}}, Int64)
;  @ REPL[1]:1 within `#2`
define nonnull ptr @"julia_#2_1195"(ptr nocapture noundef nonnull readonly align 4 dereferenceable(8) %"a::NamedTuple", i64 signext %"b::Int64") #0 {
top:
  %jlcallframe1 = alloca [5 x ptr], align 8
  %gcframe2 = alloca [5 x ptr], align 16
  call void @llvm.memset.p0.i64(ptr align 16 %gcframe2, i8 0, i64 40, i1 true)
  %thread_ptr = call ptr asm "movq %fs:0, $0", "=r"() #12
  %tls_ppgcstack = getelementptr inbounds i8, ptr %thread_ptr, i64 -8
  %tls_pgcstack = load ptr, ptr %tls_ppgcstack, align 8
  store i64 12, ptr %gcframe2, align 8
  %frame.prev = getelementptr inbounds ptr, ptr %gcframe2, i64 1
  %task.gcstack = load ptr, ptr %tls_pgcstack, align 8
  store ptr %task.gcstack, ptr %frame.prev, align 8
  store ptr %gcframe2, ptr %tls_pgcstack, align 8
; ┌ @ namedtuple.jl:484 within `setindex`
; │┌ @ boot.jl:792 within `NamedTuple`
    %0 = call ptr @jl_get_builtin_fptr(ptr nonnull @"+Core.#_compute_sparams#1197.jit")
    %ptls_field = getelementptr inbounds i8, ptr %tls_pgcstack, i64 16
    %ptls_load = load ptr, ptr %ptls_field, align 8
    %"box::NamedTuple" = call noalias nonnull align 8 dereferenceable(16) ptr @ijl_gc_small_alloc(ptr %ptls_load, i32 360, i32 16, i64 139744856668560) #8
    %"box::NamedTuple.tag_addr" = getelementptr inbounds i64, ptr %"box::NamedTuple", i64 -1
    store atomic i64 139744856668560, ptr %"box::NamedTuple.tag_addr" unordered, align 8
    %1 = load i64, ptr %"a::NamedTuple", align 4
    store i64 %1, ptr %"box::NamedTuple", align 8
    %gc_slot_addr_1 = getelementptr inbounds ptr, ptr %gcframe2, i64 3
    store ptr %"box::NamedTuple", ptr %gc_slot_addr_1, align 8
    %ptls_load35 = load ptr, ptr %ptls_field, align 8
    %"box::NamedTuple3" = call noalias nonnull align 8 dereferenceable(16) ptr @ijl_gc_small_alloc(ptr %ptls_load35, i32 360, i32 16, i64 139744871045968) #8
    %"box::NamedTuple3.tag_addr" = getelementptr inbounds i64, ptr %"box::NamedTuple3", i64 -1
    store atomic i64 139744871045968, ptr %"box::NamedTuple3.tag_addr" unordered, align 8
    store i64 %"b::Int64", ptr %"box::NamedTuple3", align 8
    %gc_slot_addr_0 = getelementptr inbounds ptr, ptr %gcframe2, i64 2
    store ptr %"box::NamedTuple3", ptr %gc_slot_addr_0, align 8
    store ptr @"-Main.Base.merge#1199.jit", ptr %jlcallframe1, align 8
    %2 = getelementptr inbounds ptr, ptr %jlcallframe1, i64 1
    store ptr @"jl_global#1200.jit", ptr %2, align 8
    %3 = getelementptr inbounds ptr, ptr %jlcallframe1, i64 2
    store ptr %"box::NamedTuple", ptr %3, align 8
    %4 = getelementptr inbounds ptr, ptr %jlcallframe1, i64 3
    store ptr %"box::NamedTuple3", ptr %4, align 8
    %Builtin_ret = call nonnull ptr %0(ptr nonnull @"jl_global#1198.jit", ptr nonnull %jlcallframe1, i32 4)
    store ptr null, ptr %gc_slot_addr_1, align 8
    store ptr %Builtin_ret, ptr %gc_slot_addr_0, align 8
; │└
; │┌ @ namedtuple.jl:338 within `merge`
    store ptr %Builtin_ret, ptr %jlcallframe1, align 8
    store ptr @"jl_global#1203.jit", ptr %2, align 8
    %jl_f__svec_ref_ret = call nonnull ptr @jl_f__svec_ref(ptr null, ptr nonnull %jlcallframe1, i32 2)
    %jl_f__svec_ref_ret.tag_addr = getelementptr inbounds i64, ptr %jl_f__svec_ref_ret, i64 -1
    %jl_f__svec_ref_ret.tag = load atomic i64, ptr %jl_f__svec_ref_ret.tag_addr unordered, align 8
    %5 = and i64 %jl_f__svec_ref_ret.tag, -16
    %6 = inttoptr i64 %5 to ptr
    %7 = icmp ult ptr %6, inttoptr (i64 1024 to ptr)
    br i1 %7, label %guard_pass, label %guard_exit

L6:                                               ; preds = %guard_exit
    store ptr %"box::NamedTuple16", ptr %jlcallframe1, align 8
    store ptr %"box::NamedTuple20", ptr %2, align 8
    store ptr @"jl_global#1207.jit", ptr %3, align 8
    store ptr %jl_f__svec_ref_ret, ptr %4, align 8
    %8 = call nonnull ptr @j1_merge_fallback_1205(ptr nonnull @"jl_global#1206.jit", ptr nonnull %jlcallframe1, i32 4)
    %frame.prev49 = load ptr, ptr %frame.prev, align 8
    store ptr %frame.prev49, ptr %tls_pgcstack, align 8
    ret ptr %8

L9:                                               ; preds = %guard_exit
    store ptr @"jl_global#1206.jit", ptr %jlcallframe1, align 8
    store ptr %"box::NamedTuple16", ptr %2, align 8
    store ptr %"box::NamedTuple20", ptr %3, align 8
    store ptr @"jl_global#1207.jit", ptr %4, align 8
    %9 = getelementptr inbounds ptr, ptr %jlcallframe1, i64 4
    store ptr %jl_f__svec_ref_ret, ptr %9, align 8
    %jl_f_throw_methoderror_ret = call nonnull ptr @jl_f_throw_methoderror(ptr null, ptr nonnull %jlcallframe1, i32 5)
    call void @llvm.trap()
    unreachable

guard_pass:                                       ; preds = %top
    %10 = getelementptr inbounds i8, ptr @jl_small_typeof, i64 %5
    %11 = load ptr, ptr %10, align 8
    br label %guard_exit

guard_exit:                                       ; preds = %guard_pass, %top
    %typeof = phi ptr [ %6, %top ], [ %11, %guard_pass ]
    store ptr null, ptr %gc_slot_addr_0, align 8
    %gc_slot_addr_2 = getelementptr inbounds ptr, ptr %gcframe2, i64 4
    store ptr %jl_f__svec_ref_ret, ptr %gc_slot_addr_2, align 8
    %12 = call i32 @ijl_subtype(ptr nonnull %typeof, ptr nonnull @"+Core.Tuple#1204.jit")
    %.not = icmp eq i32 %12, 0
    %ptls_load45 = load ptr, ptr %ptls_field, align 8
    %"box::NamedTuple16" = call noalias nonnull align 8 dereferenceable(16) ptr @ijl_gc_small_alloc(ptr %ptls_load45, i32 360, i32 16, i64 139744856668560) #8
    %"box::NamedTuple16.tag_addr" = getelementptr inbounds i64, ptr %"box::NamedTuple16", i64 -1
    store atomic i64 139744856668560, ptr %"box::NamedTuple16.tag_addr" unordered, align 8
    %13 = load i64, ptr %"a::NamedTuple", align 4
    store i64 %13, ptr %"box::NamedTuple16", align 8
    store ptr %"box::NamedTuple16", ptr %gc_slot_addr_1, align 8
    %ptls_load48 = load ptr, ptr %ptls_field, align 8
    %"box::NamedTuple20" = call noalias nonnull align 8 dereferenceable(16) ptr @ijl_gc_small_alloc(ptr %ptls_load48, i32 360, i32 16, i64 139744871045968) #8
    %"box::NamedTuple20.tag_addr" = getelementptr inbounds i64, ptr %"box::NamedTuple20", i64 -1
    store atomic i64 139744871045968, ptr %"box::NamedTuple20.tag_addr" unordered, align 8
    store i64 %"b::Int64", ptr %"box::NamedTuple20", align 8
    store ptr %"box::NamedTuple20", ptr %gc_slot_addr_0, align 8
    br i1 %.not, label %L9, label %L6
; └└
}

Metadata

Metadata

Assignees

Labels

compiler:optimizerOptimization passes (mostly in base/compiler/ssair/)performanceMust go fasterregression 1.12Regression in the 1.12 release

Type

No type

Projects

No projects

Relationships

None yet

Development

No branches or pull requests

Issue actions