Replies: 1 comment 1 reply
-
Have you followed this message? |
Beta Was this translation helpful? Give feedback.
1 reply
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
nohup: 忽略输入
INFO:dpgen:-------------------------iter.000000 task 01--------------------------
INFO:dpgen:-------------------------iter.000000 task 02--------------------------
INFO:dpgen:-------------------------iter.000000 task 03--------------------------
INFO:dpgen:-------------------------iter.000000 task 04--------------------------
Traceback (most recent call last):
File "/home/shuoxing/.local/lib/python3.10/site-packages/dpdispatcher/submission.py", line 287, in handle_unexpected_submission_state
job.handle_unexpected_job_state()
File "/home/shuoxing/.local/lib/python3.10/site-packages/dpdispatcher/submission.py", line 732, in handle_unexpected_job_state
raise RuntimeError(
RuntimeError: job:8a5004e96ed9b5fe7d8777f76ea1aca247c87029 321 failed 3 times.job_detail:{'8a5004e96ed9b5fe7d8777f76ea1aca247c87029': {'job_task_list': [{'command': "/bin/sh -c '{ if [ ! -f dpgen.restart.10000 ]; then lmp -i input.lammps -v restart 0; else lmp -i input.lammps -v restart 1; fi }'", 'task_work_path': 'task.000.000007', 'forward_files': ['conf.lmp', 'input.lammps', 'traj'], 'backward_files': ['model_devi.out', 'model_devi.log', 'traj'], 'outlog': 'model_devi.log', 'errlog': 'model_devi.log'}, {'command': "/bin/sh -c '{ if [ ! -f dpgen.restart.10000 ]; then lmp -i input.lammps -v restart 0; else lmp -i input.lammps -v restart 1; fi }'", 'task_work_path': 'task.000.000003', 'forward_files': ['conf.lmp', 'input.lammps', 'traj'], 'backward_files': ['model_devi.out', 'model_devi.log', 'traj'], 'outlog': 'model_devi.log', 'errlog': 'model_devi.log'}, {'command': "/bin/sh -c '{ if [ ! -f dpgen.restart.10000 ]; then lmp -i input.lammps -v restart 0; else lmp -i input.lammps -v restart 1; fi }'", 'task_work_path': 'task.000.000002', 'forward_files': ['conf.lmp', 'input.lammps', 'traj'], 'backward_files': ['model_devi.out', 'model_devi.log', 'traj'], 'outlog': 'model_devi.log', 'errlog': 'model_devi.log'}, {'command': "/bin/sh -c '{ if [ ! -f dpgen.restart.10000 ]; then lmp -i input.lammps -v restart 0; else lmp -i input.lammps -v restart 1; fi }'", 'task_work_path': 'task.000.000008', 'forward_files': ['conf.lmp', 'input.lammps', 'traj'], 'backward_files': ['model_devi.out', 'model_devi.log', 'traj'], 'outlog': 'model_devi.log', 'errlog': 'model_devi.log'}, {'command': "/bin/sh -c '{ if [ ! -f dpgen.restart.10000 ]; then lmp -i input.lammps -v restart 0; else lmp -i input.lammps -v restart 1; fi }'", 'task_work_path': 'task.000.000005', 'forward_files': ['conf.lmp', 'input.lammps', 'traj'], 'backward_files': ['model_devi.out', 'model_devi.log', 'traj'], 'outlog': 'model_devi.log', 'errlog': 'model_devi.log'}, {'command': "/bin/sh -c '{ if [ ! -f dpgen.restart.10000 ]; then lmp -i input.lammps -v restart 0; else lmp -i input.lammps -v restart 1; fi }'", 'task_work_path': 'task.000.000006', 'forward_files': ['conf.lmp', 'input.lammps', 'traj'], 'backward_files': ['model_devi.out', 'model_devi.log', 'traj'], 'outlog': 'model_devi.log', 'errlog': 'model_devi.log'}, {'command': "/bin/sh -c '{ if [ ! -f dpgen.restart.10000 ]; then lmp -i input.lammps -v restart 0; else lmp -i input.lammps -v restart 1; fi }'", 'task_work_path': 'task.000.000009', 'forward_files': ['conf.lmp', 'input.lammps', 'traj'], 'backward_files': ['model_devi.out', 'model_devi.log', 'traj'], 'outlog': 'model_devi.log', 'errlog': 'model_devi.log'}, {'command': "/bin/sh -c '{ if [ ! -f dpgen.restart.10000 ]; then lmp -i input.lammps -v restart 0; else lmp -i input.lammps -v restart 1; fi }'", 'task_work_path': 'task.000.000004', 'forward_files': ['conf.lmp', 'input.lammps', 'traj'], 'backward_files': ['model_devi.out', 'model_devi.log', 'traj'], 'outlog': 'model_devi.log', 'errlog': 'model_devi.log'}, {'command': "/bin/sh -c '{ if [ ! -f dpgen.restart.10000 ]; then lmp -i input.lammps -v restart 0; else lmp -i input.lammps -v restart 1; fi }'", 'task_work_path': 'task.000.000000', 'forward_files': ['conf.lmp', 'input.lammps', 'traj'], 'backward_files': ['model_devi.out', 'model_devi.log', 'traj'], 'outlog': 'model_devi.log', 'errlog': 'model_devi.log'}, {'command': "/bin/sh -c '{ if [ ! -f dpgen.restart.10000 ]; then lmp -i input.lammps -v restart 0; else lmp -i input.lammps -v restart 1; fi }'", 'task_work_path': 'task.000.000001', 'forward_files': ['conf.lmp', 'input.lammps', 'traj'], 'backward_files': ['model_devi.out', 'model_devi.log', 'traj'], 'outlog': 'model_devi.log', 'errlog': 'model_devi.log'}], 'resources': {'number_node': 1, 'cpu_per_node': 26, 'gpu_per_node': 0, 'queue_name': 'debug', 'group_size': 10, 'custom_flags': [], 'strategy': {'if_cuda_multi_devices': False, 'ratio_unfinished': 0.0}, 'para_deg': 1, 'module_purge': False, 'module_unload_list': [], 'module_list': [], 'source_list': ['/home/shuoxing/work/chb/bin/deepmd.sh'], 'envs': {}, 'prepend_script': [], 'append_script': [], 'wait_time': 0, 'kwargs': {}}, 'job_state': <JobStatus.terminated: 4>, 'job_id': '321', 'fail_count': 3}}
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/shuoxing/.local/bin/dpgen", line 8, in
sys.exit(main())
File "/home/shuoxing/.local/lib/python3.10/site-packages/dpgen/main.py", line 233, in main
args.func(args)
File "/home/shuoxing/.local/lib/python3.10/site-packages/dpgen/generator/run.py", line 5109, in gen_run
run_iter(args.PARAM, args.MACHINE)
File "/home/shuoxing/.local/lib/python3.10/site-packages/dpgen/generator/run.py", line 4451, in run_iter
run_model_devi(ii, jdata, mdata)
File "/home/shuoxing/.local/lib/python3.10/site-packages/dpgen/generator/run.py", line 1962, in run_model_devi
run_md_model_devi(iter_index, jdata, mdata)
File "/home/shuoxing/.local/lib/python3.10/site-packages/dpgen/generator/run.py", line 1955, in run_md_model_devi
submission.run_submission()
File "/home/shuoxing/.local/lib/python3.10/site-packages/dpdispatcher/submission.py", line 252, in run_submission
self.handle_unexpected_submission_state()
File "/home/shuoxing/.local/lib/python3.10/site-packages/dpdispatcher/submission.py", line 290, in handle_unexpected_submission_state
raise RuntimeError(
RuntimeError: Meet errors will handle unexpected submission state.
Debug information: remote_root==/home/shuoxing/work/chb/dpgen-result/5826c49c8dac8e3071ac0639f17b3f3cfedb44e8.
Debug information: submission_hash==5826c49c8dac8e3071ac0639f17b3f3cfedb44e8.
Please check the dirs and scripts in remote_root. The job information mentioned above may help.
Below is my machine.json file
{
"api_version": "1.0",
"train" : [
{
"machine": {
"context_type": "local",
"batch_type": "slurm",
"machine_type": "slurm",
"local_root": "./",
"remote_root": "/home/shuoxing/work/chb/dpgen-result",
"_remote_profile": {
"_hostname": "node01",
"_username": "test",
"_port": "22"
}
},
"resources": {
"number_node": 1,
"cpu_per_node": 26,
"queue_name": "debug",
"_exclude_list": [],
"source_list": [
"/home/shuoxing/work/chb/bin/deepmd.sh"
],
"_module_list": [],
"_time_limit": "23:00:00",
"group_size": 1
},
"command": "dp"
}
],
"model_devi": [
{
"machine": {
"context_type": "local",
"batch_type": "slurm",
"machine_type": "slurm",
"local_root": "./",
"remote_root": "/home/shuoxing/work/chb/dpgen-result",
"_remote_profile": {
"_hostname": "node01",
"_username": "test",
"_port": "22"
}
},
"resources": {
"number_node": 1,
"cpu_per_node": 26,
"queue_name": "debug",
"exclude_list": [],
"source_list": [
"/home/shuoxing/work/chb/bin/deepmd.sh"
],
"_module_list": [],
"_time_limit": "23:00:00",
"_qos": "data",
"group_size": 10
},
"command": "lmp"
}
],
"fp": [
{
"machine": {
"context_type": "local",
"batch_type": "slurm",
"machine_type": "slurm",
"local_root": "./",
"remote_root": "/home/shuoxing/work/chb/dpgen-result",
"_remote_profile": {
"_hostname": "node01",
"_username": "test",
"_port": "22"
}
},
"resources": {
"number_node": 1,
"cpu_per_node": 26,
"queue_name": "debug",
"exclude_list": [],
"with_mpi": true,
"source_list": [],
"module_list": [
"/opt/software/vasp.5.4.4"
],
"time_limit": "120:00:00",
"_comment": "that's All",
"group_size": 3
},
"command": "mpirun -np 26 /opt/software/vasp.5.4.4/bin/vasp_std"
}
]
}
Beta Was this translation helpful? Give feedback.
All reactions