-
Notifications
You must be signed in to change notification settings - Fork 2
Description
Update: see below for error on lastest 1.8-beta3
During Installation
When trying to install Pytorch deps with GPU support, pip
kept installing a different PyTorch version in the second command
that installs functorch
. This was fixed by installing both in one command: run(`$(PyCall.pyprogramname) -m pip install torch==1.11.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html functorch`)
.
isfunctional
At least on my version of CUDA.jl, isfunctional
does not exist, only CUDA.functional()
works
Error taking gradients when running the last line, i.e. grad = Zygote.gradient(m->loss(m, input, target), jlwrap)
This one I haven't been able to fix. Would be helpful to know if this is just on my machine.
Full stacktrace
julia> grad = Zygote.gradient(m->loss(m, input, target), jlwrap) WARNING: Error while freeing DeviceBuffer(4 bytes at 0x0000000302005000): CUDA.CuError(code=CUDA.cudaError_enum(0x000002bc), meta=nothing)
Stacktrace:
[1] throw_api_error(res::CUDA.cudaError_enum)
@ CUDA ~/.julia/packages/CUDA/5jdFl/lib/cudadrv/error.jl:91
[2] macro expansion
@ ~/.julia/packages/CUDA/5jdFl/lib/cudadrv/error.jl:101 [inlined]
[3] cuMemFreeAsync(dptr::CUDA.Mem.DeviceBuffer, hStream::CuStream)
@ CUDA ~/.julia/packages/CUDA/5jdFl/lib/utils/call.jl:26
[4] #free#2
@ ~/.julia/packages/CUDA/5jdFl/lib/cudadrv/memory.jl:97 [inlined]
[5] macro expansion
@ ~/.julia/packages/CUDA/5jdFl/src/pool.jl:58 [inlined]
[6] macro expansion
@ ./timing.jl:359 [inlined]
[7] #actual_free#189
@ ~/.julia/packages/CUDA/5jdFl/src/pool.jl:57 [inlined]
[8] #_free#207
@ ~/.julia/packages/CUDA/5jdFl/src/pool.jl:375 [inlined]
[9] macro expansion
@ ~/.julia/packages/CUDA/5jdFl/src/pool.jl:340 [inlined]
[10] macro expansion
@ ./timing.jl:359 [inlined]
[11] #free#206
@ ~/.julia/packages/CUDA/5jdFl/src/pool.jl:339 [inlined]
[12] #212
@ ~/.julia/packages/CUDA/5jdFl/src/array.jl:79 [inlined]
[13] context!(f::CUDA.var"#212#213"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuStream}, ctx::CuContext; skip_destroyed::Bool)
@ CUDA ~/.julia/packages/CUDA/5jdFl/lib/cudadrv/state.jl:164
[14] unsafe_free!(xs::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, stream::CuStream)
@ CUDA ~/.julia/packages/CUDA/5jdFl/src/array.jl:78
[15] unsafe_finalize!(xs::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer})
@ CUDA ~/.julia/packages/CUDA/5jdFl/src/array.jl:99
[16] synchronize_cuda_tasks(ex::Expr)
@ CUDA ~/.julia/packages/CUDA/5jdFl/src/initialization.jl:50
[17] #invokelatest#2
@ ./essentials.jl:729 [inlined]
[18] invokelatest
@ ./essentials.jl:727 [inlined]
[19] eval_user_input(ast::Any, backend::REPL.REPLBackend)
@ REPL/.julia/juliaup/julia-1.8.0-beta1+0x64/share/julia/stdlib/v1.8/REPL/src/REPL.jl:149
[20] repl_backend_loop(backend::REPL.REPLBackend)
@ REPL/.julia/juliaup/julia-1.8.0-beta1+0x64/share/julia/stdlib/v1.8/REPL/src/REPL.jl:247
[21] start_repl_backend(backend::REPL.REPLBackend, consumer::Any)
@ REPL/.julia/juliaup/julia-1.8.0-beta1+0x64/share/julia/stdlib/v1.8/REPL/src/REPL.jl:232
[22] run_repl(repl::REPL.AbstractREPL, consumer::Any; backend_on_current_task::Bool)
@ REPL/.julia/juliaup/julia-1.8.0-beta1+0x64/share/julia/stdlib/v1.8/REPL/src/REPL.jl:369
[23] run_repl(repl::REPL.AbstractREPL, consumer::Any)
@ REPL/.julia/juliaup/julia-1.8.0-beta1+0x64/share/julia/stdlib/v1.8/REPL/src/REPL.jl:356
[24] (::Base.var"#960#962"{Bool, Bool, Bool})(REPL::Module)
@ Base ./client.jl:419
[25] #invokelatest#2
@ ./essentials.jl:729 [inlined]
[26] invokelatest
@ ./essentials.jl:727 [inlined]
[27] run_main_repl(interactive::Bool, quiet::Bool, banner::Bool, history_file::Bool, color_set::Bool)
@ Base ./client.jl:404
[28] exec_options(opts::Base.JLOptions)
@ Base ./client.jl:318
[29] _start()
@ Base ./client.jl:522
WARNING: Error while freeing DeviceBuffer(4.000 KiB at 0x0000000302004000):
CUDA.CuError(code=CUDA.cudaError_enum(0x000002bc), meta=nothing)Stacktrace:
[1] throw_api_error(res::CUDA.cudaError_enum)
@ CUDA ~/.julia/packages/CUDA/5jdFl/lib/cudadrv/error.jl:91
[2] macro expansion
@ ~/.julia/packages/CUDA/5jdFl/lib/cudadrv/error.jl:101 [inlined]
[3] cuMemFreeAsync(dptr::CUDA.Mem.DeviceBuffer, hStream::CuStream)
@ CUDA ~/.julia/packages/CUDA/5jdFl/lib/utils/call.jl:26
[4] #free#2
@ ~/.julia/packages/CUDA/5jdFl/lib/cudadrv/memory.jl:97 [inlined]
[5] macro expansion
@ ~/.julia/packages/CUDA/5jdFl/src/pool.jl:58 [inlined]
[6] macro expansion
@ ./timing.jl:359 [inlined]
[7] #actual_free#189
@ ~/.julia/packages/CUDA/5jdFl/src/pool.jl:57 [inlined]
[8] #_free#207
@ ~/.julia/packages/CUDA/5jdFl/src/pool.jl:375 [inlined]
[9] macro expansion
@ ~/.julia/packages/CUDA/5jdFl/src/pool.jl:340 [inlined]
[10] macro expansion
@ ./timing.jl:359 [inlined]
[11] #free#206
@ ~/.julia/packages/CUDA/5jdFl/src/pool.jl:339 [inlined]
[12] #212
@ ~/.julia/packages/CUDA/5jdFl/src/array.jl:79 [inlined]
[13] context!(f::CUDA.var"#212#213"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuStream}, ctx::CuContext; skip_destroyed::Bool)
@ CUDA ~/.julia/packages/CUDA/5jdFl/lib/cudadrv/state.jl:164
[14] unsafe_free!(xs::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, stream::CuStream)
@ CUDA ~/.julia/packages/CUDA/5jdFl/src/array.jl:78
[15] unsafe_finalize!(xs::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer})
@ CUDA ~/.julia/packages/CUDA/5jdFl/src/array.jl:99
[16] synchronize_cuda_tasks(ex::Expr)
@ CUDA ~/.julia/packages/CUDA/5jdFl/src/initialization.jl:50
[17] #invokelatest#2
@ ./essentials.jl:729 [inlined]
[18] invokelatest
@ ./essentials.jl:727 [inlined]
[19] eval_user_input(ast::Any, backend::REPL.REPLBackend)
@ REPL/.julia/juliaup/julia-1.8.0-beta1+0x64/share/julia/stdlib/v1.8/REPL/src/REPL.jl:149
[20] repl_backend_loop(backend::REPL.REPLBackend)
@ REPL/.julia/juliaup/julia-1.8.0-beta1+0x64/share/julia/stdlib/v1.8/REPL/src/REPL.jl:247
[21] start_repl_backend(backend::REPL.REPLBackend, consumer::Any)
@ REPL/.julia/juliaup/julia-1.8.0-beta1+0x64/share/julia/stdlib/v1.8/REPL/src/REPL.jl:232
[22] run_repl(repl::REPL.AbstractREPL, consumer::Any; backend_on_current_task::Bool)
@ REPL/.julia/juliaup/julia-1.8.0-beta1+0x64/share/julia/stdlib/v1.8/REPL/src/REPL.jl:369
[23] run_repl(repl::REPL.AbstractREPL, consumer::Any)
@ REPL/.julia/juliaup/julia-1.8.0-beta1+0x64/share/julia/stdlib/v1.8/REPL/src/REPL.jl:356
[24] (::Base.var"#960#962"{Bool, Bool, Bool})(REPL::Module)
@ Base ./client.jl:419
[25] #invokelatest#2
@ ./essentials.jl:729 [inlined]
[26] invokelatest
@ ./essentials.jl:727 [inlined]
[27] run_main_repl(interactive::Bool, quiet::Bool, banner::Bool, history_file::Bool, color_set::Bool)
@ Base ./client.jl:404
[28] exec_options(opts::Base.JLOptions)
@ Base ./client.jl:318
[29] _start()
@ Base ./client.jl:522
ERROR: PyError ($(Expr(:escape, :(ccall(#= /home/lorenz/.julia/packages/PyCall/7a7w0/src/pyfncall.jl:43 =# @pysym(:PyObject_Call), PyPtr, (PyPtr, PyPtr, PyPtr), o, pyargsptr, kw))))) <class 'RuntimeError'>
RuntimeError('CUDA error: an illegal memory access was encountered\nCUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.\nFor debugging consider passing CUDA_LAUNCH_BLOCKING=1.')
File "/home/lorenz/anaconda3/envs/pycall/lib/python3.8/site-packages/functorch/_src/eager_transforms.py", line 243, in vjp
primals_out = func(*diff_primals)
File "/home/lorenz/.julia/packages/PyCall/7a7w0/src/pyeval.jl", line 3, in newfn
const Py_eval_input = 258
File "/home/lorenz/anaconda3/envs/pycall/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/lorenz/anaconda3/envs/pycall/lib/python3.8/site-packages/functorch/_src/make_functional.py", line 259, in forward
return self.stateless_model(*args, **kwargs)
File "/home/lorenz/anaconda3/envs/pycall/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/lorenz/anaconda3/envs/pycall/lib/python3.8/site-packages/torch/nn/modules/linear.py", line 103, in forward
return F.linear(input, self.weight, self.bias)
Stacktrace:
[1] pyerr_check
@ ~/.julia/packages/PyCall/7a7w0/src/exception.jl:62 [inlined]
[2] pyerr_check
@ ~/.julia/packages/PyCall/7a7w0/src/exception.jl:66 [inlined]
[3] _handle_error(msg::String)
@ PyCall ~/.julia/packages/PyCall/7a7w0/src/exception.jl:83
[4] macro expansion
@ ~/.julia/packages/PyCall/7a7w0/src/exception.jl:97 [inlined]
[5] #107
@ ~/.julia/packages/PyCall/7a7w0/src/pyfncall.jl:43 [inlined]
[6] disable_sigint
@ ./c.jl:473 [inlined]
[7] __pycall!
@ ~/.julia/packages/PyCall/7a7w0/src/pyfncall.jl:42 [inlined]
[8] _pycall!(ret::PyObject, o::PyObject, args::Tuple{PyObject, Tuple{PyObject, PyObject}, PyObject}, nargs::Int64, kw::Ptr{Nothing})
@ PyCall ~/.julia/packages/PyCall/7a7w0/src/pyfncall.jl:29
[9] _pycall!(ret::PyObject, o::PyObject, args::Tuple{PyObject, Tuple{PyObject, PyObject}, PyObject}, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ PyCall ~/.julia/packages/PyCall/7a7w0/src/pyfncall.jl:11
[10] (::PyObject)(::PyObject, ::Vararg{Any}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ PyCall ~/.julia/packages/PyCall/7a7w0/src/pyfncall.jl:86
[11] (::PyObject)(::PyObject, ::Vararg{Any})
@ PyCall ~/.julia/packages/PyCall/7a7w0/src/pyfncall.jl:86
[12] rrule(wrap::TorchModuleWrapper, args::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ PyCallChainRules.Torch ~/.julia/packages/PyCallChainRules/Vrwrg/src/pytorch.jl:65
[13] rrule
@ ~/.julia/packages/PyCallChainRules/Vrwrg/src/pytorch.jl:60 [inlined]
[14] rrule
@ ~/.julia/packages/ChainRulesCore/IzITE/src/rules.jl:134 [inlined]
[15] chain_rrule
@ ~/.julia/packages/Zygote/H6vD3/src/compiler/chainrules.jl:216 [inlined]
[16] macro expansion
@ ~/.julia/packages/Zygote/H6vD3/src/compiler/interface2.jl:0 [inlined]
[17] _pullback(ctx::Zygote.Context, f::TorchModuleWrapper, args::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/H6vD3/src/compiler/interface2.jl:9
[18] _pullback
@ ./REPL[31]:1 [inlined]
[19] _pullback(::Zygote.Context, ::typeof(loss), ::TorchModuleWrapper, ::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, ::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/H6vD3/src/compiler/interface2.jl:0
[20] _pullback
@ ./REPL[35]:1 [inlined]
[21] _pullback(ctx::Zygote.Context, f::var"#11#12", args::TorchModuleWrapper)
@ Zygote ~/.julia/packages/Zygote/H6vD3/src/compiler/interface2.jl:0
[22] _pullback(f::Function, args::TorchModuleWrapper)
@ Zygote ~/.julia/packages/Zygote/H6vD3/src/compiler/interface.jl:34
[23] pullback(f::Function, args::TorchModuleWrapper)
@ Zygote ~/.julia/packages/Zygote/H6vD3/src/compiler/interface.jl:40
[24] gradient(f::Function, args::TorchModuleWrapper)
@ Zygote ~/.julia/packages/Zygote/H6vD3/src/compiler/interface.jl:75
[25] top-level scope
@ REPL[35]:1
ERROR: CUDA error: an illegal memory access was encountered (code 700, ERROR_ILLEGAL_ADDRESS)
Stacktrace:
[1] throw_api_error(res::CUDA.cudaError_enum)
@ CUDA ~/.julia/packages/CUDA/5jdFl/lib/cudadrv/error.jl:91
[2] isdone
@ ~/.julia/packages/CUDA/5jdFl/lib/cudadrv/stream.jl:109 [inlined]
[3] nonblocking_synchronize
@ ~/.julia/packages/CUDA/5jdFl/lib/cudadrv/stream.jl:139 [inlined]
[4] nonblocking_synchronize
@ ~/.julia/packages/CUDA/5jdFl/lib/cudadrv/context.jl:325 [inlined]
[5] device_synchronize()
@ CUDA ~/.julia/packages/CUDA/5jdFl/lib/cudadrv/context.jl:319
[6] top-level scope
@ ~/.julia/packages/CUDA/5jdFl/src/initialization.jl:54
Output of CUDA.versioninfo()
CUDA toolkit 11.6, artifact installation NVIDIA driver 470.103.1, for CUDA 11.4 CUDA driver 11.4Libraries:
- CUBLAS: 11.8.1
- CURAND: 10.2.9
- CUFFT: 10.7.0
- CUSOLVER: 11.3.2
- CUSPARSE: 11.7.1
- CUPTI: 16.0.0
- NVML: 11.0.0+470.103.1
- CUDNN: 8.30.2 (for CUDA 11.5.0)
- CUTENSOR: 1.4.0 (for CUDA 11.5.0)
Toolchain:
- Julia: 1.8.0-beta1
- LLVM: 13.0.1
- PTX ISA support: 3.2, 4.0, 4.1, 4.2, 4.3, 5.0, 6.0, 6.1, 6.3, 6.4, 6.5, 7.0
- Device capability support: sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80
1 device:
0: NVIDIA GeForce GTX 1080 Ti (sm_61, 10.067 GiB / 10.915 GiB available)