2
2
import numpy as np
3
3
import re
4
4
from collections import OrderedDict
5
+
6
+ from scipy .constants .constants import R
5
7
from .cell import cell_to_low_triangle
6
8
from ..unit import EnergyConversion , LengthConversion , ForceConversion , PressureConversion
7
9
@@ -29,6 +31,8 @@ def __init__(self, log_file_name, xyz_file_name, restart=False):
29
31
self .xyz_block_generator = self .get_xyz_block_generator ()
30
32
self .restart_flag = restart
31
33
self .cell = None
34
+ self .print_level = None
35
+ self .atomic_kinds = None
32
36
33
37
if self .restart_flag :
34
38
self .handle_single_log_frame (next (self .log_block_generator ))
@@ -43,13 +47,14 @@ def __iter__(self):
43
47
def __next__ (self ):
44
48
info_dict = {}
45
49
log_info_dict = self .handle_single_log_frame (next (self .log_block_generator ))
50
+ #print(log_info_dict)
46
51
xyz_info_dict = self .handle_single_xyz_frame (next (self .xyz_block_generator ))
47
- eq1 = [v1 == v2 for v1 ,v2 in zip (log_info_dict ['atom_numbs' ], xyz_info_dict ['atom_numbs' ])]
48
- eq2 = [v1 == v2 for v1 ,v2 in zip (log_info_dict ['atom_names' ], xyz_info_dict ['atom_names' ])]
49
- eq3 = [v1 == v2 for v1 ,v2 in zip (log_info_dict ['atom_types' ], xyz_info_dict ['atom_types' ])]
50
- assert all (eq1 ), (log_info_dict ,xyz_info_dict ,'There may be errors in the file. If it is a restart task; use restart=True' )
51
- assert all (eq2 ), (log_info_dict ,xyz_info_dict ,'There may be errors in the file. If it is a restart task; use restart=True' )
52
- assert all (eq3 ), (log_info_dict ,xyz_info_dict ,'There may be errors in the file. If it is a restart task; use restart=True' )
52
+ # eq1 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_numbs'], xyz_info_dict['atom_numbs'])]
53
+ # eq2 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_names'], xyz_info_dict['atom_names'])]
54
+ # eq3 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_types'], xyz_info_dict['atom_types'])]
55
+ # assert all(eq1), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True')
56
+ # assert all(eq2), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True')
57
+ # assert all(eq3), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True')
53
58
assert log_info_dict ['energies' ]== xyz_info_dict ['energies' ], (log_info_dict ['energies' ], xyz_info_dict ['energies' ],'There may be errors in the file' )
54
59
info_dict .update (log_info_dict )
55
60
info_dict .update (xyz_info_dict )
@@ -103,11 +108,18 @@ def handle_single_log_frame(self, lines):
103
108
cell_angle_pattern = re .compile (r' INITIAL CELL ANGLS\[deg\]\s+=\s+(?P<alpha>\S+)\s+(?P<beta>\S+)\s+(?P<gamma>\S+)' )
104
109
cell_A , cell_B , cell_C = (0 ,0 ,0 ,)
105
110
cell_alpha , cell_beta , cell_gamma = (0 ,0 ,0 ,)
111
+ cell_a_pattern = re .compile (r' CELL\| Vector a \[angstrom\]:\s+(?P<ax>\S+)\s+(?P<ay>\S+)\s+(?P<az>\S+)' )
112
+ cell_b_pattern = re .compile (r' CELL\| Vector b \[angstrom\]:\s+(?P<bx>\S+)\s+(?P<by>\S+)\s+(?P<bz>\S+)' )
113
+ cell_c_pattern = re .compile (r' CELL\| Vector c \[angstrom\]:\s+(?P<cx>\S+)\s+(?P<cy>\S+)\s+(?P<cz>\S+)' )
106
114
force_start_pattern = re .compile (r' ATOMIC FORCES in' )
107
115
force_flag = False
108
116
force_end_pattern = re .compile (r' SUM OF ATOMIC FORCES' )
109
117
force_lines = []
110
118
cell_flag = 0
119
+ print_level_pattern = re .compile (r' GLOBAL\| Global print level\s+(?P<print_level>\S+)' )
120
+ print_level_flag = 0
121
+ atomic_kinds_pattern = re .compile (r'\s+\d+\. Atomic kind:\s+(?P<akind>\S+)' )
122
+ atomic_kinds = []
111
123
for line in lines :
112
124
if force_start_pattern .match (line ):
113
125
force_flag = True
@@ -131,9 +143,47 @@ def handle_single_log_frame(self, lines):
131
143
cell_beta = np .deg2rad (float (cell_angle_pattern .match (line ).groupdict ()['beta' ]))
132
144
cell_gamma = np .deg2rad (float (cell_angle_pattern .match (line ).groupdict ()['gamma' ]))
133
145
cell_flag += 1
146
+ if print_level_pattern .match (line ):
147
+ print_level = print_level_pattern .match (line ).groupdict ()['print_level' ]
148
+ print_level_flag += 1
149
+ if cell_a_pattern .match (line ):
150
+ cell_ax = float (cell_a_pattern .match (line ).groupdict ()['ax' ])
151
+ cell_ay = float (cell_a_pattern .match (line ).groupdict ()['ay' ])
152
+ cell_az = float (cell_a_pattern .match (line ).groupdict ()['az' ])
153
+ cell_flag += 1
154
+ if cell_b_pattern .match (line ):
155
+ cell_bx = float (cell_b_pattern .match (line ).groupdict ()['bx' ])
156
+ cell_by = float (cell_b_pattern .match (line ).groupdict ()['by' ])
157
+ cell_bz = float (cell_b_pattern .match (line ).groupdict ()['bz' ])
158
+ cell_flag += 1
159
+ if cell_c_pattern .match (line ):
160
+ cell_cx = float (cell_c_pattern .match (line ).groupdict ()['cx' ])
161
+ cell_cy = float (cell_c_pattern .match (line ).groupdict ()['cy' ])
162
+ cell_cz = float (cell_c_pattern .match (line ).groupdict ()['cz' ])
163
+ cell_flag += 1
164
+
165
+ if atomic_kinds_pattern .match (line ):
166
+ akind = atomic_kinds_pattern .match (line ).groupdict ()['akind' ]
167
+ atomic_kinds .append (akind )
168
+ if print_level_flag == 1 :
169
+ self .print_level = print_level
170
+ if print_level == 'LOW' :
171
+ raise RuntimeError ("please provide cp2k output with higher print level(at least MEDIUM)" )
172
+
173
+
134
174
if cell_flag == 2 :
135
175
self .cell = cell_to_low_triangle (cell_A ,cell_B ,cell_C ,
136
176
cell_alpha ,cell_beta ,cell_gamma )
177
+ elif cell_flag == 5 :
178
+ self .cell = np .asarray (
179
+ [
180
+ [cell_ax , cell_ay , cell_az ],
181
+ [cell_bx , cell_by , cell_bz ],
182
+ [cell_cx , cell_cy , cell_cz ]]
183
+ ).astype ('float32' )
184
+ if atomic_kinds :
185
+ self .atomic_kinds = atomic_kinds
186
+ #print(self.atomic_kinds)
137
187
# lx = cell_A
138
188
# xy = cell_B * np.cos(cell_gamma)
139
189
# xz = cell_C * np.cos(cell_beta)
@@ -146,27 +196,32 @@ def handle_single_log_frame(self, lines):
146
196
147
197
element_index = - 1
148
198
element_dict = OrderedDict ()
149
- atom_types_list = []
199
+ atom_types_idx_list = []
150
200
forces_list = []
151
201
for line in force_lines [3 :]:
152
202
line_list = line .split ()
153
- if element_dict .get (line_list [2 ]):
154
- element_dict [line_list [2 ]][1 ]+= 1
203
+ #print(line_list)
204
+ if element_dict .get (line_list [1 ]):
205
+ element_dict [line_list [1 ]][1 ]+= 1
155
206
else :
156
207
element_index += 1
157
- element_dict [line_list [2 ]]= [element_index ,1 ]
158
- atom_types_list .append (element_dict [line_list [2 ]][0 ])
208
+ element_dict [line_list [1 ]]= [element_index ,1 ]
209
+ atom_types_idx_list .append (element_dict [line_list [1 ]][0 ])
159
210
forces_list .append ([float (line_list [3 ])* AU_TO_EV_EVERY_ANG ,
160
211
float (line_list [4 ])* AU_TO_EV_EVERY_ANG ,
161
212
float (line_list [5 ])* AU_TO_EV_EVERY_ANG ])
162
-
163
- atom_names = list (element_dict .keys ())
213
+ #print(atom_types_idx_list)
214
+ #atom_names=list(element_dict.keys())
215
+ atom_names = self .atomic_kinds
164
216
atom_numbs = []
165
- for ii in atom_names :
217
+
218
+ for ii in element_dict .keys ():
166
219
atom_numbs .append (element_dict [ii ][1 ])
220
+ #print(atom_numbs)
167
221
info_dict ['atom_names' ] = atom_names
168
222
info_dict ['atom_numbs' ] = atom_numbs
169
- info_dict ['atom_types' ] = np .asarray (atom_types_list )
223
+ info_dict ['atom_types' ] = np .asarray (atom_types_idx_list )
224
+ info_dict ['print_level' ] = self .print_level
170
225
info_dict ['cells' ] = np .asarray ([self .cell ]).astype ('float32' )
171
226
info_dict ['energies' ] = np .asarray ([energy ]).astype ('float32' )
172
227
info_dict ['forces' ] = np .asarray ([forces_list ]).astype ('float32' )
@@ -208,9 +263,9 @@ def handle_single_xyz_frame(self, lines):
208
263
atom_numbs = []
209
264
for ii in atom_names :
210
265
atom_numbs .append (element_dict [ii ][1 ])
211
- info_dict ['atom_names' ] = atom_names
212
- info_dict ['atom_numbs' ] = atom_numbs
213
- info_dict ['atom_types' ] = np .asarray (atom_types_list )
266
+ # info_dict['atom_names'] = atom_names
267
+ # info_dict['atom_numbs'] = atom_numbs
268
+ # info_dict['atom_types'] = np.asarray(atom_types_list)
214
269
info_dict ['coords' ] = np .asarray ([coords_list ]).astype ('float32' )
215
270
info_dict ['energies' ] = np .array ([energy ]).astype ('float32' )
216
271
info_dict ['orig' ]= [0 ,0 ,0 ]
@@ -225,59 +280,69 @@ def get_frames (fname) :
225
280
eV = EnergyConversion ("hartree" , "eV" ).value ()
226
281
angstrom = LengthConversion ("bohr" , "angstrom" ).value ()
227
282
GPa = PressureConversion ("eV/angstrom^3" , "GPa" ).value ()
283
+ atom_symbol_idx_list = []
228
284
atom_symbol_list = []
229
285
cell = []
230
286
coord = []
231
287
force = []
232
288
stress = []
233
- cell_count = 0
234
- coord_count = 0
289
+
235
290
236
291
fp = open (fname )
237
292
# check if output is converged, if not, return sys = 0
238
293
content = fp .read ()
239
294
count = content .count ('SCF run converged' )
240
295
if count == 0 :
241
- return [], [], [], [], [], [], [], []
296
+ return [], [], [], [], [], [], [], None
242
297
298
+ # search duplicated header
243
299
fp .seek (0 )
300
+ header_idx = []
244
301
for idx , ii in enumerate (fp ) :
245
- if ('CELL| Vector' in ii ) and (cell_count < 3 ) :
246
- cell .append (ii .split ()[4 :7 ])
247
- cell_count += 1
248
- if 'Atom Kind Element' in ii :
249
- coord_flag = True
250
- coord_idx = idx
251
- coord_count += 1
252
- # get the coord block info
253
- if coord_flag and (coord_count == 1 ):
254
- if (idx > coord_idx + 1 ) :
255
- if (ii == '\n ' ) :
256
- coord_flag = False
257
- else :
258
- coord .append (ii .split ()[4 :7 ])
259
- atom_symbol_list .append (ii .split ()[2 ])
260
- if 'ENERGY|' in ii :
261
- energy = (ii .split ()[8 ])
262
- if ' Atom Kind ' in ii :
263
- force_flag = True
264
- force_idx = idx
265
- if force_flag :
266
- if (idx > force_idx ) :
267
- if 'SUM OF ATOMIC FORCES' in ii :
268
- force_flag = False
269
- else :
270
- force .append (ii .split ()[3 :6 ])
271
- # add reading stress tensor
272
- if 'STRESS TENSOR [GPa' in ii :
273
- stress_flag = True
274
- stress_idx = idx
275
- if stress_flag :
276
- if (idx > stress_idx + 2 ):
277
- if (ii == '\n ' ) :
278
- stress_flag = False
279
- else :
280
- stress .append (ii .split ()[1 :4 ])
302
+ if 'Multiplication driver' in ii :
303
+ header_idx .append (idx )
304
+
305
+ # parse from last header
306
+ fp .seek (0 )
307
+ for idx , ii in enumerate (fp ) :
308
+ if idx > header_idx [- 1 ] :
309
+ if 'CELL| Vector' in ii :
310
+ cell .append (ii .split ()[4 :7 ])
311
+ if 'Atomic kind:' in ii :
312
+ atom_symbol_list .append (ii .split ()[3 ])
313
+ if 'Atom Kind Element' in ii :
314
+ coord_flag = True
315
+ coord_idx = idx
316
+
317
+ # get the coord block info
318
+ if coord_flag :
319
+ if (idx > coord_idx + 1 ) :
320
+ if (ii == '\n ' ) :
321
+ coord_flag = False
322
+ else :
323
+ coord .append (ii .split ()[4 :7 ])
324
+ atom_symbol_idx_list .append (ii .split ()[1 ])
325
+ if 'ENERGY|' in ii :
326
+ energy = (ii .split ()[8 ])
327
+ if ' Atom Kind ' in ii :
328
+ force_flag = True
329
+ force_idx = idx
330
+ if force_flag :
331
+ if (idx > force_idx ) :
332
+ if 'SUM OF ATOMIC FORCES' in ii :
333
+ force_flag = False
334
+ else :
335
+ force .append (ii .split ()[3 :6 ])
336
+ # add reading stress tensor
337
+ if 'STRESS TENSOR [GPa' in ii :
338
+ stress_flag = True
339
+ stress_idx = idx
340
+ if stress_flag :
341
+ if (idx > stress_idx + 2 ):
342
+ if (ii == '\n ' ) :
343
+ stress_flag = False
344
+ else :
345
+ stress .append (ii .split ()[1 :4 ])
281
346
282
347
283
348
fp .close ()
@@ -287,20 +352,24 @@ def get_frames (fname) :
287
352
288
353
#conver to float array and add extra dimension for nframes
289
354
cell = np .array (cell )
290
- cell = cell .astype (float )
355
+ cell = cell .astype ('float32' )
291
356
cell = cell [np .newaxis , :, :]
292
357
coord = np .array (coord )
293
- coord = coord .astype (float )
358
+ coord = coord .astype ('float32' )
294
359
coord = coord [np .newaxis , :, :]
360
+ atom_symbol_idx_list = np .array (atom_symbol_idx_list )
361
+ atom_symbol_idx_list = atom_symbol_idx_list .astype (int )
362
+ atom_symbol_idx_list = atom_symbol_idx_list - 1
295
363
atom_symbol_list = np .array (atom_symbol_list )
364
+ atom_symbol_list = atom_symbol_list [atom_symbol_idx_list ]
296
365
force = np .array (force )
297
- force = force .astype (float )
366
+ force = force .astype ('float32' )
298
367
force = force [np .newaxis , :, :]
299
368
300
369
# virial is not necessary
301
370
if stress :
302
371
stress = np .array (stress )
303
- stress = stress .astype (float )
372
+ stress = stress .astype ('float32' )
304
373
stress = stress [np .newaxis , :, :]
305
374
# stress to virial conversion, default unit in cp2k is GPa
306
375
# note the stress is virial = stress * volume
@@ -312,11 +381,10 @@ def get_frames (fname) :
312
381
force = force * eV / angstrom
313
382
# energy unit conversion, default unit in cp2k is hartree
314
383
energy = float (energy ) * eV
315
- energy = np .array (energy )
384
+ energy = np .array (energy ). astype ( 'float32' )
316
385
energy = energy [np .newaxis ]
317
386
318
387
319
-
320
388
tmp_names , symbol_idx = np .unique (atom_symbol_list , return_index = True )
321
389
atom_types = []
322
390
atom_numbs = []
0 commit comments