Skip to content

Commit 81c656c

Browse files
authored
Merge pull request #245 from amcadmus/master
Merge devel into master
2 parents 306172d + cdcc63f commit 81c656c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+107426
-5657
lines changed

.github/workflows/mirror_gitee.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ concurrency:
88

99
jobs:
1010
git-mirror:
11+
if: github.repository_owner == 'deepmodeling'
1112
runs-on: ubuntu-latest
1213
steps:
1314
- uses: wearerequired/git-mirror-action@v1

.github/workflows/test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
runs-on: ubuntu-latest
1010
strategy:
1111
matrix:
12-
python-version: [3.6, 3.7, 3.8]
12+
python-version: [3.7, 3.8, 3.9]
1313

1414
steps:
1515
- uses: actions/checkout@v2

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,14 +82,15 @@ The `System` or `LabeledSystem` can be constructed from the following file forma
8282
| Gromacs | gro | True | False | System | 'gromacs/gro' |
8383
| ABACUS | STRU | False | True | LabeledSystem | 'abacus/scf' |
8484
| ABACUS | cif | True | True | LabeledSystem | 'abacus/md' |
85+
| ase | structure | True | True | MultiSystems | 'ase/structure' |
8586

8687

8788
The Class `dpdata.MultiSystems` can read data from a dir which may contains many files of different systems, or from single xyz file which contains different systems.
8889

8990
Use `dpdata.MultiSystems.from_dir` to read from a directory, `dpdata.MultiSystems` will walk in the directory
9091
Recursively and find all file with specific file_name. Supports all the file formats that `dpdata.LabeledSystem` supports.
9192

92-
Use `dpdata.MultiSystems.from_file` to read from single file. Now only support quip/gap/xyz format file.
93+
Use `dpdata.MultiSystems.from_file` to read from single file. Single-file support is available for the `quip/gap/xyz` and `ase/structure` formats.
9394

9495
For example, for `quip/gap xyz` files, single .xyz file may contain many different configurations with different atom numbers and atom type.
9596

dpdata/cp2k/output.py

Lines changed: 131 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
import numpy as np
33
import re
44
from collections import OrderedDict
5+
6+
from scipy.constants.constants import R
57
from .cell import cell_to_low_triangle
68
from ..unit import EnergyConversion, LengthConversion, ForceConversion, PressureConversion
79

@@ -29,6 +31,8 @@ def __init__(self, log_file_name, xyz_file_name, restart=False):
2931
self.xyz_block_generator = self.get_xyz_block_generator()
3032
self.restart_flag = restart
3133
self.cell=None
34+
self.print_level=None
35+
self.atomic_kinds = None
3236

3337
if self.restart_flag:
3438
self.handle_single_log_frame(next(self.log_block_generator))
@@ -43,13 +47,14 @@ def __iter__(self):
4347
def __next__(self):
4448
info_dict = {}
4549
log_info_dict = self.handle_single_log_frame(next(self.log_block_generator))
50+
#print(log_info_dict)
4651
xyz_info_dict = self.handle_single_xyz_frame(next(self.xyz_block_generator))
47-
eq1 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_numbs'], xyz_info_dict['atom_numbs'])]
48-
eq2 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_names'], xyz_info_dict['atom_names'])]
49-
eq3 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_types'], xyz_info_dict['atom_types'])]
50-
assert all(eq1), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True')
51-
assert all(eq2), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True')
52-
assert all(eq3), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True')
52+
#eq1 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_numbs'], xyz_info_dict['atom_numbs'])]
53+
#eq2 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_names'], xyz_info_dict['atom_names'])]
54+
#eq3 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_types'], xyz_info_dict['atom_types'])]
55+
#assert all(eq1), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True')
56+
#assert all(eq2), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True')
57+
#assert all(eq3), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True')
5358
assert log_info_dict['energies']==xyz_info_dict['energies'], (log_info_dict['energies'], xyz_info_dict['energies'],'There may be errors in the file')
5459
info_dict.update(log_info_dict)
5560
info_dict.update(xyz_info_dict)
@@ -103,11 +108,18 @@ def handle_single_log_frame(self, lines):
103108
cell_angle_pattern = re.compile(r' INITIAL CELL ANGLS\[deg\]\s+=\s+(?P<alpha>\S+)\s+(?P<beta>\S+)\s+(?P<gamma>\S+)')
104109
cell_A, cell_B, cell_C = (0,0,0,)
105110
cell_alpha, cell_beta, cell_gamma=(0,0,0,)
111+
cell_a_pattern = re.compile(r' CELL\| Vector a \[angstrom\]:\s+(?P<ax>\S+)\s+(?P<ay>\S+)\s+(?P<az>\S+)')
112+
cell_b_pattern = re.compile(r' CELL\| Vector b \[angstrom\]:\s+(?P<bx>\S+)\s+(?P<by>\S+)\s+(?P<bz>\S+)')
113+
cell_c_pattern = re.compile(r' CELL\| Vector c \[angstrom\]:\s+(?P<cx>\S+)\s+(?P<cy>\S+)\s+(?P<cz>\S+)')
106114
force_start_pattern = re.compile(r' ATOMIC FORCES in')
107115
force_flag=False
108116
force_end_pattern = re.compile(r' SUM OF ATOMIC FORCES')
109117
force_lines= []
110118
cell_flag=0
119+
print_level_pattern = re.compile(r' GLOBAL\| Global print level\s+(?P<print_level>\S+)')
120+
print_level_flag = 0
121+
atomic_kinds_pattern = re.compile(r'\s+\d+\. Atomic kind:\s+(?P<akind>\S+)')
122+
atomic_kinds = []
111123
for line in lines:
112124
if force_start_pattern.match(line):
113125
force_flag=True
@@ -131,9 +143,47 @@ def handle_single_log_frame(self, lines):
131143
cell_beta = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['beta']))
132144
cell_gamma = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['gamma']))
133145
cell_flag+=1
146+
if print_level_pattern.match(line):
147+
print_level = print_level_pattern.match(line).groupdict()['print_level']
148+
print_level_flag += 1
149+
if cell_a_pattern.match(line):
150+
cell_ax = float(cell_a_pattern.match(line).groupdict()['ax'])
151+
cell_ay = float(cell_a_pattern.match(line).groupdict()['ay'])
152+
cell_az = float(cell_a_pattern.match(line).groupdict()['az'])
153+
cell_flag+=1
154+
if cell_b_pattern.match(line):
155+
cell_bx = float(cell_b_pattern.match(line).groupdict()['bx'])
156+
cell_by = float(cell_b_pattern.match(line).groupdict()['by'])
157+
cell_bz = float(cell_b_pattern.match(line).groupdict()['bz'])
158+
cell_flag+=1
159+
if cell_c_pattern.match(line):
160+
cell_cx = float(cell_c_pattern.match(line).groupdict()['cx'])
161+
cell_cy = float(cell_c_pattern.match(line).groupdict()['cy'])
162+
cell_cz = float(cell_c_pattern.match(line).groupdict()['cz'])
163+
cell_flag+=1
164+
165+
if atomic_kinds_pattern.match(line):
166+
akind = atomic_kinds_pattern.match(line).groupdict()['akind']
167+
atomic_kinds.append(akind)
168+
if print_level_flag == 1:
169+
self.print_level = print_level
170+
if print_level == 'LOW':
171+
raise RuntimeError("please provide cp2k output with higher print level(at least MEDIUM)")
172+
173+
134174
if cell_flag == 2:
135175
self.cell = cell_to_low_triangle(cell_A,cell_B,cell_C,
136176
cell_alpha,cell_beta,cell_gamma)
177+
elif cell_flag == 5:
178+
self.cell = np.asarray(
179+
[
180+
[cell_ax, cell_ay, cell_az],
181+
[cell_bx, cell_by, cell_bz],
182+
[cell_cx, cell_cy, cell_cz]]
183+
).astype('float32')
184+
if atomic_kinds:
185+
self.atomic_kinds = atomic_kinds
186+
#print(self.atomic_kinds)
137187
# lx = cell_A
138188
# xy = cell_B * np.cos(cell_gamma)
139189
# xz = cell_C * np.cos(cell_beta)
@@ -146,27 +196,32 @@ def handle_single_log_frame(self, lines):
146196

147197
element_index = -1
148198
element_dict = OrderedDict()
149-
atom_types_list = []
199+
atom_types_idx_list = []
150200
forces_list = []
151201
for line in force_lines[3:]:
152202
line_list = line.split()
153-
if element_dict.get(line_list[2]):
154-
element_dict[line_list[2]][1]+=1
203+
#print(line_list)
204+
if element_dict.get(line_list[1]):
205+
element_dict[line_list[1]][1]+=1
155206
else:
156207
element_index +=1
157-
element_dict[line_list[2]]=[element_index,1]
158-
atom_types_list.append(element_dict[line_list[2]][0])
208+
element_dict[line_list[1]]=[element_index,1]
209+
atom_types_idx_list.append(element_dict[line_list[1]][0])
159210
forces_list.append([float(line_list[3])*AU_TO_EV_EVERY_ANG,
160211
float(line_list[4])*AU_TO_EV_EVERY_ANG,
161212
float(line_list[5])*AU_TO_EV_EVERY_ANG])
162-
163-
atom_names=list(element_dict.keys())
213+
#print(atom_types_idx_list)
214+
#atom_names=list(element_dict.keys())
215+
atom_names=self.atomic_kinds
164216
atom_numbs=[]
165-
for ii in atom_names:
217+
218+
for ii in element_dict.keys():
166219
atom_numbs.append(element_dict[ii][1])
220+
#print(atom_numbs)
167221
info_dict['atom_names'] = atom_names
168222
info_dict['atom_numbs'] = atom_numbs
169-
info_dict['atom_types'] = np.asarray(atom_types_list)
223+
info_dict['atom_types'] = np.asarray(atom_types_idx_list)
224+
info_dict['print_level'] = self.print_level
170225
info_dict['cells'] = np.asarray([self.cell]).astype('float32')
171226
info_dict['energies'] = np.asarray([energy]).astype('float32')
172227
info_dict['forces'] = np.asarray([forces_list]).astype('float32')
@@ -208,9 +263,9 @@ def handle_single_xyz_frame(self, lines):
208263
atom_numbs=[]
209264
for ii in atom_names:
210265
atom_numbs.append(element_dict[ii][1])
211-
info_dict['atom_names'] = atom_names
212-
info_dict['atom_numbs'] = atom_numbs
213-
info_dict['atom_types'] = np.asarray(atom_types_list)
266+
#info_dict['atom_names'] = atom_names
267+
#info_dict['atom_numbs'] = atom_numbs
268+
#info_dict['atom_types'] = np.asarray(atom_types_list)
214269
info_dict['coords'] = np.asarray([coords_list]).astype('float32')
215270
info_dict['energies'] = np.array([energy]).astype('float32')
216271
info_dict['orig']=[0,0,0]
@@ -225,59 +280,69 @@ def get_frames (fname) :
225280
eV = EnergyConversion("hartree", "eV").value()
226281
angstrom = LengthConversion("bohr", "angstrom").value()
227282
GPa = PressureConversion("eV/angstrom^3", "GPa").value()
283+
atom_symbol_idx_list = []
228284
atom_symbol_list = []
229285
cell = []
230286
coord = []
231287
force = []
232288
stress = []
233-
cell_count = 0
234-
coord_count = 0
289+
235290

236291
fp = open(fname)
237292
# check if output is converged, if not, return sys = 0
238293
content = fp.read()
239294
count = content.count('SCF run converged')
240295
if count == 0:
241-
return [], [], [], [], [], [], [], []
296+
return [], [], [], [], [], [], [], None
242297

298+
# search duplicated header
243299
fp.seek(0)
300+
header_idx = []
244301
for idx, ii in enumerate(fp) :
245-
if ('CELL| Vector' in ii) and (cell_count < 3) :
246-
cell.append(ii.split()[4:7])
247-
cell_count += 1
248-
if 'Atom Kind Element' in ii :
249-
coord_flag = True
250-
coord_idx = idx
251-
coord_count += 1
252-
# get the coord block info
253-
if coord_flag and (coord_count == 1):
254-
if (idx > coord_idx + 1) :
255-
if (ii == '\n') :
256-
coord_flag = False
257-
else :
258-
coord.append(ii.split()[4:7])
259-
atom_symbol_list.append(ii.split()[2])
260-
if 'ENERGY|' in ii :
261-
energy = (ii.split()[8])
262-
if ' Atom Kind ' in ii :
263-
force_flag = True
264-
force_idx = idx
265-
if force_flag :
266-
if (idx > force_idx) :
267-
if 'SUM OF ATOMIC FORCES' in ii :
268-
force_flag = False
269-
else :
270-
force.append(ii.split()[3:6])
271-
# add reading stress tensor
272-
if 'STRESS TENSOR [GPa' in ii :
273-
stress_flag = True
274-
stress_idx = idx
275-
if stress_flag :
276-
if (idx > stress_idx + 2):
277-
if (ii == '\n') :
278-
stress_flag = False
279-
else :
280-
stress.append(ii.split()[1:4])
302+
if 'Multiplication driver' in ii :
303+
header_idx.append(idx)
304+
305+
# parse from last header
306+
fp.seek(0)
307+
for idx, ii in enumerate(fp) :
308+
if idx > header_idx[-1] :
309+
if 'CELL| Vector' in ii:
310+
cell.append(ii.split()[4:7])
311+
if 'Atomic kind:' in ii:
312+
atom_symbol_list.append(ii.split()[3])
313+
if 'Atom Kind Element' in ii :
314+
coord_flag = True
315+
coord_idx = idx
316+
317+
# get the coord block info
318+
if coord_flag :
319+
if (idx > coord_idx + 1) :
320+
if (ii == '\n') :
321+
coord_flag = False
322+
else :
323+
coord.append(ii.split()[4:7])
324+
atom_symbol_idx_list.append(ii.split()[1])
325+
if 'ENERGY|' in ii :
326+
energy = (ii.split()[8])
327+
if ' Atom Kind ' in ii :
328+
force_flag = True
329+
force_idx = idx
330+
if force_flag :
331+
if (idx > force_idx) :
332+
if 'SUM OF ATOMIC FORCES' in ii :
333+
force_flag = False
334+
else :
335+
force.append(ii.split()[3:6])
336+
# add reading stress tensor
337+
if 'STRESS TENSOR [GPa' in ii :
338+
stress_flag = True
339+
stress_idx = idx
340+
if stress_flag :
341+
if (idx > stress_idx + 2):
342+
if (ii == '\n') :
343+
stress_flag = False
344+
else :
345+
stress.append(ii.split()[1:4])
281346

282347

283348
fp.close()
@@ -287,20 +352,24 @@ def get_frames (fname) :
287352

288353
#conver to float array and add extra dimension for nframes
289354
cell = np.array(cell)
290-
cell = cell.astype(float)
355+
cell = cell.astype('float32')
291356
cell = cell[np.newaxis, :, :]
292357
coord = np.array(coord)
293-
coord = coord.astype(float)
358+
coord = coord.astype('float32')
294359
coord = coord[np.newaxis, :, :]
360+
atom_symbol_idx_list = np.array(atom_symbol_idx_list)
361+
atom_symbol_idx_list = atom_symbol_idx_list.astype(int)
362+
atom_symbol_idx_list = atom_symbol_idx_list - 1
295363
atom_symbol_list = np.array(atom_symbol_list)
364+
atom_symbol_list = atom_symbol_list[atom_symbol_idx_list]
296365
force = np.array(force)
297-
force = force.astype(float)
366+
force = force.astype('float32')
298367
force = force[np.newaxis, :, :]
299368

300369
# virial is not necessary
301370
if stress:
302371
stress = np.array(stress)
303-
stress = stress.astype(float)
372+
stress = stress.astype('float32')
304373
stress = stress[np.newaxis, :, :]
305374
# stress to virial conversion, default unit in cp2k is GPa
306375
# note the stress is virial = stress * volume
@@ -312,11 +381,10 @@ def get_frames (fname) :
312381
force = force * eV / angstrom
313382
# energy unit conversion, default unit in cp2k is hartree
314383
energy = float(energy) * eV
315-
energy = np.array(energy)
384+
energy = np.array(energy).astype('float32')
316385
energy = energy[np.newaxis]
317386

318387

319-
320388
tmp_names, symbol_idx = np.unique(atom_symbol_list, return_index=True)
321389
atom_types = []
322390
atom_numbs = []

dpdata/plugins/ase.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,20 @@
99

1010
@Format.register("ase/structure")
1111
class ASEStructureFormat(Format):
12+
"""Format for the `Atomic Simulation Environment <https://wiki.fysik.dtu.dk/ase/>`_ (ase).
13+
14+
ASE supports parsing a few dozen of data formats. As described in i
15+
`the documentation <ihttps://wiki.fysik.dtu.dk/ase/ase/io/io.html>`_,
16+
many of these formats can be determined automatically.
17+
Use the `ase_fmt` keyword argument to supply the format if
18+
automatic detection fails.
19+
"""
20+
1221
def from_labeled_system(self, data, **kwargs):
1322
return data
1423

15-
def from_multi_systems(self, file_name, begin=None, end=None, step=None, fmt='traj', **kwargs):
16-
frames = ase.io.read(file_name, format=fmt, index=slice(begin, end, step))
24+
def from_multi_systems(self, file_name, begin=None, end=None, step=None, ase_fmt=None, **kwargs):
25+
frames = ase.io.read(file_name, format=ase_fmt, index=slice(begin, end, step))
1726
for atoms in frames:
1827
symbols = atoms.get_chemical_symbols()
1928
atom_names = list(set(symbols))

dpdata/plugins/xyz.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,25 @@
1+
import numpy as np
2+
13
from dpdata.xyz.quip_gap_xyz import QuipGapxyzSystems
4+
from dpdata.xyz.xyz import coord_to_xyz
25
from dpdata.format import Format
36

7+
@Format.register("xyz")
8+
class XYZFormat(Format):
9+
"""XYZ foramt.
10+
11+
Examples
12+
--------
13+
>>> s.to("xyz", "a.xyz")
14+
"""
15+
def to_system(self, data, file_name, **kwargs):
16+
buff = []
17+
types = np.array(data['atom_names'])[data['atom_types']]
18+
for cc in data['coords']:
19+
buff.append(coord_to_xyz(cc, types))
20+
with open(file_name, 'w') as fp:
21+
fp.write("\n".join(buff))
22+
423

524
@Format.register("quip/gap/xyz")
625
@Format.register("quip/gap/xyz_file")

0 commit comments

Comments
 (0)