Skip to content

Commit 2dac5ff

Browse files
authored
Merge pull request #82 from deepmodeling/devel
support for pwmat
2 parents 94022da + b931cd0 commit 2dac5ff

22 files changed

+28478
-4
lines changed

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,9 @@ xyz_multi_systems.to_deepmd_raw('./my_deepmd_data/')
107107
| QE | log | True | False | System | 'qe/cp/traj' |
108108
| QE | log | True | True | LabeledSystem | 'qe/cp/traj' |
109109
|quip/gap|xyz|True|True|MultiSystems|'quip/gap/xyz'|
110-
110+
| PWmat | atom.config | False | False | System | 'pwmat/atom.config' |
111+
| PWmat | movement | True | True | LabeledSystem | 'pwmat/movement' |
112+
| PWmat | OUT.MLMD | True | True | LabeledSystem | 'pwmat/out.mlmd' |
111113
## Access data
112114
These properties stored in `System` and `LabeledSystem` can be accessed by operator `[]` with the key of the property supplied, for example
113115
```python

dpdata/pwmat/atomconfig.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#!/usr/bin/python3
2+
3+
import numpy as np
4+
5+
def _to_system_data_lower(lines) :
6+
system = {}
7+
natoms = int(lines[0].split()[0])
8+
cell = []
9+
for idx, ii in enumerate(lines):
10+
if 'lattice' in ii or 'Lattice' in ii or 'LATTICE' in ii:
11+
for kk in range(idx+1,idx+1+3):
12+
vector=[float(jj) for jj in lines[kk].split()[0:3]]
13+
cell.append(vector)
14+
system['cells'] = [np.array(cell)]
15+
coord = []
16+
atomic_number = []
17+
atom_numbs = []
18+
for idx, ii in enumerate(lines):
19+
if 'Position' in ii or 'POSITION' in ii or 'position' in ii:
20+
for kk in range(idx+1,idx+1+natoms):
21+
min = kk
22+
for jj in range(kk+1,idx+1+natoms):
23+
if int(lines[jj].split()[0]) < int(lines[min].split()[0]):
24+
min = jj
25+
lines[min], lines[kk] = lines[kk],lines[min]
26+
for gg in range(idx+1,idx+1+natoms):
27+
tmpv = [float(jj) for jj in lines[gg].split()[1:4]]
28+
tmpv = np.matmul(np.array(tmpv), system['cells'][0])
29+
coord.append(tmpv)
30+
tmpn = int(lines[gg].split()[0])
31+
atomic_number.append(tmpn)
32+
for ii in np.unique(sorted(atomic_number)) :
33+
atom_numbs.append(atomic_number.count(ii))
34+
system['atom_numbs'] = [int(ii) for ii in atom_numbs]
35+
system['coords'] = [np.array(coord)]
36+
system['orig'] = np.zeros(3)
37+
atom_types = []
38+
for idx,ii in enumerate(system['atom_numbs']) :
39+
for jj in range(ii) :
40+
atom_types.append(idx)
41+
system['atom_types'] = np.array(atom_types, dtype = int)
42+
ELEMENTS=['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', \
43+
'Sc', 'Ti', 'V', 'Cr','Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', \
44+
'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd',\
45+
'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', \
46+
'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', \
47+
'Md', 'No', 'Lr']
48+
49+
system['atom_names'] = [ELEMENTS[ii-1] for ii in np.unique(sorted(atomic_number))]
50+
return system
51+
52+
53+
def to_system_data(lines) :
54+
return _to_system_data_lower(lines)
55+
56+
57+
def from_system_data(system, f_idx = 0, skip_zeros = True) :
58+
ret = ''
59+
natoms = sum(system['atom_numbs'])
60+
ret += '%d' % natoms
61+
ret += '\n'
62+
ret += 'LATTICE'
63+
ret += '\n'
64+
for ii in system['cells'][f_idx] :
65+
for jj in ii :
66+
ret += '%.16e ' % jj
67+
ret += '\n'
68+
ret += 'POSITION'
69+
ret += '\n'
70+
atom_numbs = system['atom_numbs']
71+
atom_names = system['atom_names']
72+
atype = system['atom_types']
73+
posis = system['coords'][f_idx]
74+
# atype_idx = [[idx,tt] for idx,tt in enumerate(atype)]
75+
# sort_idx = np.argsort(atype, kind = 'mergesort')
76+
sort_idx = np.lexsort((np.arange(len(atype)), atype))
77+
atype = atype[sort_idx]
78+
posis = posis[sort_idx]
79+
symbal = []
80+
for ii, jj in zip(atom_numbs, atom_names):
81+
for kk in range(ii):
82+
symbal.append(jj)
83+
ELEMENTS=['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', \
84+
'Sc', 'Ti', 'V', 'Cr','Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', \
85+
'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd',\
86+
'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', \
87+
'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', \
88+
'Md', 'No', 'Lr']
89+
atomic_numbers = []
90+
for ii in symbal:
91+
atomic_numbers.append(ELEMENTS.index(ii)+1)
92+
posi_list = []
93+
for jj, ii in zip(atomic_numbers,posis) :
94+
ii = np.matmul(ii, np.linalg.inv(system['cells'][0]))
95+
posi_list.append('%d %15.10f %15.10f %15.10f 1 1 1' % \
96+
(jj, ii[0], ii[1], ii[2])
97+
)
98+
for kk in range(len(posi_list)):
99+
min = kk
100+
for jj in range(kk,len(posi_list)):
101+
if int(posi_list[jj].split()[0]) < int(posi_list[min].split()[0]):
102+
min = jj
103+
posi_list[min], posi_list[kk] = posi_list[kk],posi_list[min]
104+
posi_list.append('')
105+
ret += '\n'.join(posi_list)
106+
return ret

dpdata/pwmat/movement.py

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
import numpy as np
2+
3+
def system_info (lines, type_idx_zero = False) :
4+
atom_names = []
5+
atom_numbs = []
6+
nelm = 0
7+
natoms = int(lines[0].split()[0])
8+
iteration = float(lines[0].split('Etot')[0].split('=')[1].split(',')[0])
9+
# print(iteration)
10+
if iteration > 0 :
11+
nelm = 40
12+
else:
13+
nelm = 100
14+
atomic_number = []
15+
for idx,ii in enumerate(lines):
16+
if 'Position' in ii:
17+
for kk in range(idx+1,idx+1+natoms) :
18+
min = kk
19+
for jj in range(kk+1,idx+1+natoms):
20+
if int(lines[jj].split()[0]) < int(lines[min].split()[0]):
21+
min = jj
22+
lines[min], lines[kk] = lines[kk],lines[min]
23+
for gg in range(idx+1,idx+1+natoms):
24+
tmpn = int(lines[gg].split()[0])
25+
atomic_number.append(tmpn)
26+
for ii in np.unique(sorted(atomic_number)) :
27+
atom_numbs.append(atomic_number.count(ii))
28+
atom_types = []
29+
for idx,ii in enumerate(atom_numbs) :
30+
for jj in range(ii) :
31+
if type_idx_zero :
32+
atom_types.append(idx)
33+
else :
34+
atom_types.append(idx+1)
35+
ELEMENTS=['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', \
36+
'Sc', 'Ti', 'V', 'Cr','Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', \
37+
'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd',\
38+
'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', \
39+
'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', \
40+
'Md', 'No', 'Lr']
41+
for ii in np.unique(sorted(atomic_number)):
42+
atom_names.append(ELEMENTS[ii-1])
43+
return atom_names, atom_numbs, np.array(atom_types, dtype = int), nelm
44+
45+
46+
def get_movement_block(fp) :
47+
blk = []
48+
for ii in fp :
49+
if not ii:
50+
return blk
51+
blk.append(ii.rstrip('\n'))
52+
if '------------' in ii:
53+
return blk
54+
return blk
55+
56+
# we assume that the force is printed ...
57+
def get_frames (fname, begin = 0, step = 1) :
58+
fp = open(fname)
59+
blk = get_movement_block(fp)
60+
61+
atom_names, atom_numbs, atom_types, nelm = system_info(blk, type_idx_zero = True)
62+
ntot = sum(atom_numbs)
63+
64+
all_coords = []
65+
all_cells = []
66+
all_energies = []
67+
all_atomic_energy = []
68+
all_forces = []
69+
all_virials = []
70+
71+
cc = 0
72+
while len(blk) > 0 :
73+
if cc >= begin and (cc - begin) % step == 0 :
74+
coord, cell, energy, force, virial, is_converge = analyze_block(blk, ntot, nelm)
75+
if is_converge :
76+
if len(coord) == 0:
77+
break
78+
all_coords.append(coord)
79+
all_cells.append(cell)
80+
all_energies.append(energy)
81+
all_forces.append(force)
82+
if virial is not None :
83+
all_virials.append(virial)
84+
blk = get_movement_block(fp)
85+
cc += 1
86+
87+
if len(all_virials) == 0 :
88+
all_virials = None
89+
else :
90+
all_virials = np.array(all_virials)
91+
fp.close()
92+
return atom_names, atom_numbs, atom_types, np.array(all_cells), np.array(all_coords), \
93+
np.array(all_energies), np.array(all_forces), all_virials
94+
95+
96+
def analyze_block(lines, ntot, nelm) :
97+
coord = []
98+
cell = []
99+
energy = None
100+
# atomic_energy = []
101+
force = []
102+
virial = None
103+
is_converge = True
104+
sc_index = 0
105+
for idx,ii in enumerate(lines) :
106+
if 'Iteration' in ii:
107+
sc_index = int(ii.split('SCF =')[1])
108+
if sc_index >= nelm:
109+
is_converge = False
110+
energy = float(ii.split('Etot,Ep,Ek (eV)')[1].split()[1])
111+
elif '----------' in ii:
112+
assert((force is not None) and len(coord) > 0 and len(cell) > 0)
113+
# all_coords.append(coord)
114+
# all_cells.append(cell)
115+
# all_energies.append(energy)
116+
# all_forces.append(force)
117+
# if virial is not None :
118+
# all_virials.append(virial)
119+
return coord, cell, energy, force, virial, is_converge
120+
# elif 'NPT' in ii:
121+
# tmp_v = []
122+
elif 'Lattice vector' in ii:
123+
if 'stress' in lines[idx+1]:
124+
tmp_v = []
125+
for dd in range(3) :
126+
tmp_l = lines[idx+1+dd]
127+
cell.append([float(ss)
128+
for ss in tmp_l.split()[0:3]])
129+
tmp_v.append([float(stress) for stress in tmp_l.split()[5:8]])
130+
virial = np.zeros([3,3])
131+
virial[0][0] = tmp_v[0][0]
132+
virial[0][1] = tmp_v[0][1]
133+
virial[0][2] = tmp_v[0][2]
134+
virial[1][0] = tmp_v[1][0]
135+
virial[1][1] = tmp_v[1][1]
136+
virial[1][2] = tmp_v[1][2]
137+
virial[2][0] = tmp_v[2][0]
138+
virial[2][1] = tmp_v[2][1]
139+
virial[2][2] = tmp_v[2][2]
140+
volume = np.linalg.det(np.array(cell))
141+
virial = virial*160.2*10.0/volume
142+
else:
143+
for dd in range(3) :
144+
tmp_l = lines[idx+1+dd]
145+
cell.append([float(ss)
146+
for ss in tmp_l.split()[0:3]])
147+
148+
# else :
149+
# for dd in range(3) :
150+
# tmp_l = lines[idx+1+dd]
151+
# cell.append([float(ss)
152+
# for ss in tmp_l.split()[0:3]])
153+
# virial = np.zeros([3,3])
154+
elif 'Position' in ii:
155+
for kk in range(idx+1, idx+1+ntot):
156+
min = kk
157+
for jj in range(kk+1,idx+1+ntot):
158+
if int(lines[jj].split()[0]) < int(lines[min].split()[0]):
159+
min = jj
160+
lines[min], lines[kk] = lines[kk],lines[min]
161+
for gg in range(idx+1,idx+1+ntot):
162+
info = [float(jj) for jj in lines[gg].split()[1:4]]
163+
info = np.matmul(np.array(info),np.array(cell))
164+
coord.append(info)
165+
elif 'Force' in ii:
166+
for kk in range(idx+1, idx+1+ntot):
167+
min = kk
168+
for jj in range(kk+1,idx+1+ntot):
169+
if int(lines[jj].split()[0]) < int(lines[min].split()[0]):
170+
min = jj
171+
lines[min], lines[kk] = lines[kk],lines[min]
172+
for gg in range(idx+1,idx+1+ntot):
173+
info = [float(ss) for ss in lines[gg].split()]
174+
force.append(info[1:4])
175+
# elif 'Atomic-Energy' in ii:
176+
# for jj in range(idx+1, idx+1+ntot) :
177+
# tmp_l = lines[jj]
178+
# info = [float(ss) for ss in tmp_l.split()]
179+
# atomic_energy.append(info[1])
180+
return coord, cell, energy, force, virial, is_converge

dpdata/system.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
import dpdata.gaussian.log
1919
import dpdata.cp2k.output
2020
from dpdata.cp2k.output import Cp2kSystems
21+
import dpdata.pwmat.movement
22+
import dpdata.pwmat.atomconfig
2123
from copy import deepcopy
2224
from monty.json import MSONable
2325
from monty.serialization import loadfn,dumpfn
@@ -86,6 +88,7 @@ def __init__ (self,
8688
- ``qe/cp/traj``: Quantum Espresso CP trajectory files. should have: file_name+'.in' and file_name+'.pos'
8789
- ``siesta/output``: siesta SCF output file
8890
- ``siesta/aimd_output``: siesta aimd output file
91+
- ``pwmat/atom.config``: pwmat atom.config
8992
type_map : list of str
9093
Needed by formats lammps/lmp and lammps/dump. Maps atom type to name. The atom with type `ii` is mapped to `type_map[ii]`.
9194
If not provided the atom names are assigned to `'Type_1'`, `'Type_2'`, `'Type_3'`...
@@ -652,7 +655,34 @@ def from_siesta_aiMD_output(self, fname):
652655
self.data['coords'], \
653656
_e, _f, _v \
654657
= dpdata.siesta.aiMD_output.get_aiMD_frame(fname)
655-
658+
@register_from_funcs.register_funcs('atom.config')
659+
@register_from_funcs.register_funcs('final.config')
660+
@register_from_funcs.register_funcs('pwmat/atom.config')
661+
@register_from_funcs.register_funcs('pwmat/final.config')
662+
def from_pwmat_atomconfig(self, file_name) :
663+
with open(file_name) as fp:
664+
lines = [line.rstrip('\n') for line in fp]
665+
self.data = dpdata.pwmat.atomconfig.to_system_data(lines)
666+
self.rot_lower_triangular()
667+
668+
@register_to_funcs.register_funcs("pwmat/atom.config")
669+
def to_pwmat_atomconfig(self, file_name, frame_idx = 0) :
670+
"""
671+
Dump the system in pwmat atom.config format
672+
673+
Parameters
674+
----------
675+
file_name : str
676+
The output file name
677+
frame_idx : int
678+
The index of the frame to dump
679+
"""
680+
assert(frame_idx < len(self.data['coords']))
681+
w_str = dpdata.pwmat.atomconfig.from_system_data(self.data, frame_idx)
682+
with open(file_name, 'w') as fp:
683+
fp.write(w_str)
684+
685+
656686
def affine_map(self, trans, f_idx = 0) :
657687
assert(np.linalg.det(trans) != 0)
658688
self.data['cells'][f_idx] = np.matmul(self.data['cells'][f_idx], trans)
@@ -891,6 +921,8 @@ def __init__ (self,
891921
- ``gaussian/md``: gaussian ab initio molecular dynamics
892922
- ``cp2k/output``: cp2k output file
893923
- ``cp2k/aimd_output``: cp2k aimd output dir(contains *pos*.xyz and *.log file)
924+
- ``pwmat/movement``: pwmat md output file
925+
- ``pwmat/out.mlmd``: pwmat scf output file
894926
895927
type_map : list of str
896928
Needed by formats deepmd/raw and deepmd/npy. Maps atom type to name. The atom with type `ii` is mapped to `type_map[ii]`.
@@ -1111,6 +1143,34 @@ def from_cp2k_output(self, file_name) :
11111143
self.data['energies'], \
11121144
self.data['forces'], \
11131145
= dpdata.cp2k.output.get_frames(file_name)
1146+
@register_from_funcs.register_funcs('movement')
1147+
@register_from_funcs.register_funcs('MOVEMENT')
1148+
@register_from_funcs.register_funcs('mlmd')
1149+
@register_from_funcs.register_funcs('MLMD')
1150+
@register_from_funcs.register_funcs('pwmat/movement')
1151+
@register_from_funcs.register_funcs('pwmat/MOVEMENT')
1152+
@register_from_funcs.register_funcs('pwmat/mlmd')
1153+
@register_from_funcs.register_funcs('pwmat/MLMD')
1154+
def from_pwmat_output(self, file_name, begin = 0, step = 1) :
1155+
self.data['atom_names'], \
1156+
self.data['atom_numbs'], \
1157+
self.data['atom_types'], \
1158+
self.data['cells'], \
1159+
self.data['coords'], \
1160+
self.data['energies'], \
1161+
self.data['forces'], \
1162+
tmp_virial, \
1163+
= dpdata.pwmat.movement.get_frames(file_name, begin = begin, step = step)
1164+
if tmp_virial is not None :
1165+
self.data['virials'] = tmp_virial
1166+
# scale virial to the unit of eV
1167+
if 'virials' in self.data :
1168+
v_pref = 1 * 1e3 / 1.602176621e6
1169+
for ii in range (self.get_nframes()) :
1170+
vol = np.linalg.det(np.reshape(self.data['cells'][ii], [3,3]))
1171+
self.data['virials'][ii] *= v_pref * vol
1172+
# rotate the system to lammps convention
1173+
self.rot_lower_triangular()
11141174

11151175

11161176
def sub_system(self, f_idx) :

0 commit comments

Comments
 (0)