Description
There seems to be an issue with thread allocations on GPUs.
Steps to reproduce, in ClimaAtmos, on clima
import ClimaAtmos as CA; CA.ClimaComms.@import_required_backends
simulation = CA.get_simulation(CA.AtmosConfig("config/model_configs/single_column_radiative_equilibrium_gray.yml"))
Error:
ERROR: Number of threads in z-dimension exceeds device limit (70 > 64).
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:35
[2]
@ CUDA ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:97
[3] launch(::CUDA.CuFunction, ::CUDA.KernelState, ::ClimaCore.DataLayouts.VIJFH{…}, ::Base.Broadcast.Broadcasted{…}; blocks::Tuple{…}, threads::Tuple{…}, cooperative::Bool, shmem::Int64, stream::CUDA.CuStream)
@ CUDA ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:73
[4] launch
@ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:52 [inlined]
[5] #972
@ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:189 [inlined]
[6] macro expansion
@ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:149 [inlined]
[7] macro expansion
@ ./none:0 [inlined]
[8] convert_arguments
@ ./none:0 [inlined]
[9] #cudacall#971
@ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:191 [inlined]
[10] cudacall
@ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:187 [inlined]
[11] macro expansion
@ ~/.julia/packages/CUDA/75aiI/src/compiler/execution.jl:268 [inlined]
[12] macro expansion
@ ./none:0 [inlined]
[13] call
@ ./none:0 [inlined]
[14] (::CUDA.HostKernel{…})(::ClimaCore.DataLayouts.VIJFH{…}, ::Base.Broadcast.Broadcasted{…}; threads::Tuple{…}, blocks::Tuple{…}, kwargs::@Kwargs{})
@ CUDA ~/.julia/packages/CUDA/75aiI/src/compiler/execution.jl:390
[15] kwcall(::NamedTuple, kernel::CUDA.HostKernel, args::Vararg{Any, N}) where N
@ CUDA ~/.julia/packages/CUDA/75aiI/src/compiler/execution.jl:389 [inlined]
[16] macro expansion
@ ~/.julia/packages/CUDA/75aiI/src/compiler/execution.jl:114 [inlined]
[17] auto_launch!(f!::typeof(ClimaCoreCUDAExt.knl_copyto!), args::Tuple{…}, data::ClimaCore.DataLayouts.VIJFH{…}; auto::Bool, threads_s::Tuple{…}, blocks_s::Tuple{…}, always_inline::Bool, caller::Symbol)
@ ClimaCoreCUDAExt ~/.julia/packages/ClimaCore/ANgUC/ext/cuda/cuda_utils.jl:58
[18] auto_launch!
@ ~/.julia/packages/ClimaCore/ANgUC/ext/cuda/cuda_utils.jl:38 [inlined]
[19] copyto!
@ ~/.julia/packages/ClimaCore/ANgUC/ext/cuda/data_layouts.jl:71 [inlined]
[20] copy
@ ./broadcast.jl:928 [inlined]
[21] materialize
@ ./broadcast.jl:903 [inlined]
[22] _ExtrudedFiniteDifferenceGrid(horizontal_grid::ClimaCore.Grids.SpectralElementGrid2D{…}, vertical_grid::ClimaCore.Grids.FiniteDifferenceGrid{…}, hypsography::ClimaCore.Grids.Flat, global_geometry::ClimaCore.Geometry.CartesianGlobalGeometry)
@ ClimaCore.Grids ~/.julia/packages/ClimaCore/ANgUC/src/Grids/extruded.jl:102
[23] #18
@ ~/.julia/packages/ClimaCore/ANgUC/src/Grids/extruded.jl:86 [inlined]
[24] get!(default::ClimaCore.Grids.var"#18#19"{…}, h::Dict{…}, key::Tuple{…})
@ Base ./dict.jl:479
[25] ExtrudedFiniteDifferenceGrid
@ ClimaCore.Grids ~/.julia/packages/ClimaCore/ANgUC/src/Grids/extruded.jl:76 [inlined] [26] #ExtrudedFiniteDifferenceGrid#17
@ ClimaCore.Grids ~/.julia/packages/ClimaCore/ANgUC/src/Grids/extruded.jl:61 [inlined] [27] make_hybrid_spaces(h_space::ClimaCore.Spaces.SpectralElementSpace2D{…}, z_max::Float32, z_elem::Int64, z_stretch::ClimaCore.Meshes.GeneralizedExponentialStretching{…}; surface_warp::Nothing, topo_smoothing::Bool, deep::Bool, parsed_args::Dict{…})
@ ClimaAtmos ~/ClimaAtmos.jl/src/utils/common_spaces.jl:124
[28] get_spaces(parsed_args::Dict{…}, params::ClimaAtmos.Parameters.ClimaAtmosParameters{…}, comms_ctx::ClimaComms.SingletonCommsContext{…})
@ ClimaAtmos ~/ClimaAtmos.jl/src/solver/type_getters.jl:224
[29] get_simulation(config::ClimaAtmos.AtmosConfig{…})
@ ClimaAtmos ~/ClimaAtmos.jl/src/solver/type_getters.jl:628
[30] top-level scope
@ REPL[6]:1
caused by: CUDA error: invalid argument (code 1, ERROR_INVALID_VALUE)
Stacktrace:
[1] throw_api_error(res::CUDA.cudaError_enum)
@ CUDA ~/.julia/packages/CUDA/75aiI/lib/cudadrv/libcuda.jl:30
[2] check
@ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/libcuda.jl:37 [inlined]
[3] cuLaunchKernel
@ ~/.julia/packages/CUDA/75aiI/lib/utils/call.jl:34 [inlined]
[4] (::CUDA.var"#966#967"{…})(kernelParams::Vector{…})
@ CUDA ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:66
[5] macro expansion
@ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:33 [inlined]
[6] macro expansion
@ ./none:0 [inlined]
[7] pack_arguments(::CUDA.var"#966#967"{…}, ::CUDA.KernelState, ::ClimaCore.DataLayouts.VIJFH{…}, ::Base.Broadcast.Broadcasted{…})
@ CUDA ./none:0
[8] launch(::CUDA.CuFunction, ::CUDA.KernelState, ::ClimaCore.DataLayouts.VIJFH{…}, ::Base.Broadcast.Broadcasted{…}; blocks::Tuple{…}, threads::Tuple{…}, cooperative::Bool, shmem::Int64, stream::CUDA.CuStream)
@ CUDA ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:59
[9] launch
@ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:52 [inlined]
[10] #972
@ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:189 [inlined]
[11] macro expansion
@ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:149 [inlined]
[12] macro expansion
@ ./none:0 [inlined]
[13] convert_arguments
@ ./none:0 [inlined]
[14] #cudacall#971
@ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:191 [inlined]
[15] cudacall
@ ~/.julia/packages/CUDA/75aiI/lib/cudadrv/execution.jl:187 [inlined]
[16] macro expansion
@ ~/.julia/packages/CUDA/75aiI/src/compiler/execution.jl:268 [inlined]
[17] macro expansion
@ ./none:0 [inlined]
[18] call
@ ./none:0 [inlined]
[19] (::CUDA.HostKernel{…})(::ClimaCore.DataLayouts.VIJFH{…}, ::Base.Broadcast.Broadcasted{…}; threads::Tuple{…}, blocks::Tuple{…}, kwargs::@Kwargs{})
@ CUDA ~/.julia/packages/CUDA/75aiI/src/compiler/execution.jl:390
[20] kwcall(::NamedTuple, kernel::CUDA.HostKernel, args::Vararg{Any, N}) where N
@ CUDA ~/.julia/packages/CUDA/75aiI/src/compiler/execution.jl:389 [inlined]
[21] macro expansion
@ ~/.julia/packages/CUDA/75aiI/src/compiler/execution.jl:114 [inlined]
[22] auto_launch!(f!::typeof(ClimaCoreCUDAExt.knl_copyto!), args::Tuple{…}, data::ClimaCore.DataLayouts.VIJFH{…}; auto::Bool, threads_s::Tuple{…}, blocks_s::Tuple{…}, always_inline::Bool, caller::Symbol)
@ ClimaCoreCUDAExt ~/.julia/packages/ClimaCore/ANgUC/ext/cuda/cuda_utils.jl:58
[23] auto_launch!
@ ~/.julia/packages/ClimaCore/ANgUC/ext/cuda/cuda_utils.jl:38 [inlined]
[24] copyto!
@ ~/.julia/packages/ClimaCore/ANgUC/ext/cuda/data_layouts.jl:71 [inlined]
[25] copy
@ ./broadcast.jl:928 [inlined]
[26] materialize
@ ./broadcast.jl:903 [inlined]
[27] _ExtrudedFiniteDifferenceGrid(horizontal_grid::ClimaCore.Grids.SpectralElementGrid2D{…}, vertical_grid::ClimaCore.Grids.FiniteDifferenceGrid{…}, hypsography::ClimaCore.Grids.Flat, global_geometry::ClimaCore.Geometry.CartesianGlobalGeometry)
@ ClimaCore.Grids ~/.julia/packages/ClimaCore/ANgUC/src/Grids/extruded.jl:102
[28] #18
@ ~/.julia/packages/ClimaCore/ANgUC/src/Grids/extruded.jl:86 [inlined]
[29] get!(default::ClimaCore.Grids.var"#18#19"{…}, h::Dict{…}, key::Tuple{…})
@ Base ./dict.jl:479
[30] ExtrudedFiniteDifferenceGrid
@ ClimaCore.Grids ~/.julia/packages/ClimaCore/ANgUC/src/Grids/extruded.jl:76 [inlined] [31] #ExtrudedFiniteDifferenceGrid#17
@ ClimaCore.Grids ~/.julia/packages/ClimaCore/ANgUC/src/Grids/extruded.jl:61 [inlined] [32] make_hybrid_spaces(h_space::ClimaCore.Spaces.SpectralElementSpace2D{…}, z_max::Float32, z_elem::Int64, z_stretch::ClimaCore.Meshes.GeneralizedExponentialStretching{…}; surface_warp::Nothing, topo_smoothing::Bool, deep::Bool, parsed_args::Dict{…})
@ ClimaAtmos ~/ClimaAtmos.jl/src/utils/common_spaces.jl:124
[33] get_spaces(parsed_args::Dict{…}, params::ClimaAtmos.Parameters.ClimaAtmosParameters{…}, comms_ctx::ClimaComms.SingletonCommsContext{…})
@ ClimaAtmos ~/ClimaAtmos.jl/src/solver/type_getters.jl:224
[34] get_simulation(config::ClimaAtmos.AtmosConfig{…})
@ ClimaAtmos ~/ClimaAtmos.jl/src/solver/type_getters.jl:628
[35] top-level scope
@ REPL[6]:1
Some type information was truncated. Use `show(err)` to see complete types.
Maybe related to auto_launch!
?