-
Notifications
You must be signed in to change notification settings - Fork 47
Description
This problem occurs when I use RTX4090:
Traceback (most recent call last):
File "main_qm9.py", line 298, in
main(args)
File "main_qm9.py", line 235, in main
train_err = train_one_epoch(model=model, criterion=criterion, norm_factor=norm_factor,
File "/home/zyli/equiformer/engine.py", line 63, in train_one_epoch
pred = model(f_in=data.x, pos=data.pos, batch=data.batch,
File "/home/zyli/anaconda3/envs/equiformer/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/zyli/equiformer/nets/graph_attention_transformer.py", line 885, in forward
node_features = blk(node_input=node_features, node_attr=node_attr,
File "/home/zyli/anaconda3/envs/equiformer/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/zyli/equiformer/nets/graph_attention_transformer.py", line 646, in forward
node_features = self.ga(node_input=node_features,
File "/home/zyli/anaconda3/envs/equiformer/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/zyli/equiformer/nets/graph_attention_transformer.py", line 508, in forward
alpha = torch_geometric.utils.softmax(alpha, edge_dst)
RuntimeError: The following operation failed in the TorchScript interpreter.
Traceback of TorchScript (most recent call last):
RuntimeError: nvrtc: error: invalid value for --gpu-architecture (-arch)
nvrtc compilation failed:
#define NAN __int_as_float(0x7fffffff)
#define POS_INFINITY __int_as_float(0x7f800000)
#define NEG_INFINITY __int_as_float(0xff800000)
template
device T maximum(T a, T b) {
return isnan(a) ? a : (a > b ? a : b);
}
template
device T minimum(T a, T b) {
return isnan(a) ? a : (a < b ? a : b);
}
extern "C" global
void fused_sub_exp(float* tsrc_1, float* tsrc_max_9, float* output_1) {
{
if ((long long)(threadIdx.x) + 512ll * (long long)(blockIdx.x)<140632ll ? 1 : 0) {
float v = __ldg(tsrc_1 + (long long)(threadIdx.x) + 512ll * (long long)(blockIdx.x));
float v_1 = __ldg(tsrc_max_9 + ((long long)(threadIdx.x) + 512ll * (long long)(blockIdx.x)) / 35158ll + 4ll * (((long long)(threadIdx.x) + 512ll * (long long)(blockIdx.x)) % 35158ll));
output_1[(long long)(threadIdx.x) + 512ll * (long long)(blockIdx.x)] = expf(v - v_1);
}}
}
(equiformer) zyli@ubuntu:~/equiformer$ conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch-nightly -c nvidia
Collecting package metadata (current_repodata.json): failed
CondaHTTPError: HTTP 000 CONNECTION FAILED for url https://conda.anaconda.org/pytorch-nightly/noarch/current_repodata.json
Elapsed: -
An HTTP error occurred when trying to retrieve this URL.
HTTP errors are often intermittent, and a simple retry will get you on your way.
'https://conda.anaconda.org/pytorch-nightly/noarch'
I use the same environment with you, since I created the environment from env_equiformer.yml