Skip to content

Commit 7a0f722

Browse files
Fix single species splitting in CAMS global forecast download
1 parent e6c7b14 commit 7a0f722

File tree

1 file changed

+23
-7
lines changed

1 file changed

+23
-7
lines changed

providentia/cams.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import re
1010
import requests
1111
import shutil
12+
from tqdm import tqdm
1213
import yaml
1314
import zipfile
1415

@@ -274,7 +275,7 @@ def extract_date(self, input_file, prefix, domain):
274275

275276
def format_data(self, input_filepath, output_filepath, species, prefix, domain, resolution, final_path, cams_species, url):
276277

277-
self.download_instance.logger.info(f"Formatting {final_path}\n")
278+
self.download_instance.logger.info(f"Formatting {final_path}")
278279

279280
# get file formatting
280281
cams_providentia_map = cams_formatting[prefix][domain]
@@ -366,17 +367,25 @@ def format_data(self, input_filepath, output_filepath, species, prefix, domain,
366367
output_file.close()
367368
input_file.close()
368369

369-
def split_nc_file(self, input_file_name, all_dates, cams_dict, temp_dir, prefix, domain):
370+
def split_nc_file(self, input_file_name, all_dates, cams_dict, temp_dir, prefix, domain, level):
370371

371372
# get file formatting
372373
cams_providentia_map = cams_formatting[prefix][domain]
373374

374375
# read the input netcdf file
375-
input_filepath = join(temp_dir,input_file_name)
376+
input_filepath = join(temp_dir, input_file_name)
376377
input_file = Dataset(input_filepath, 'r')
377378

379+
# set available dimensions
380+
available_dimensions = ['forecast_period', 'latitude', 'longitude']
381+
if level == 'multi':
382+
available_dimensions.append('model_level')
383+
384+
# create tqdm iterator
385+
all_dates_iter = tqdm(all_dates, bar_format= '{l_bar}{bar}|{n_fmt}/{total_fmt}',desc=f"Splitting {input_file_name} file ({len(all_dates)})")
386+
378387
# loop through the possible dates
379-
for i, date in enumerate(all_dates):
388+
for i, date in enumerate(all_dates_iter):
380389
# create a new file for each slice
381390
output_file_name = cams_dict["file_format"].replace("yyyy", f"{date.year:04d}") \
382391
.replace("mm", f"{date.month:02d}") \
@@ -385,7 +394,7 @@ def split_nc_file(self, input_file_name, all_dates, cams_dict, temp_dir, prefix,
385394
output_file = Dataset(output_filepath, 'w', format='NETCDF4')
386395

387396
# copy all the dimensions to the new file, leave forecas_reference_time as one
388-
for dim in ['forecast_period', 'model_level', 'latitude', 'longitude']:
397+
for dim in available_dimensions:
389398
output_file.createDimension(dim, input_file.dimensions[dim].size)
390399
output_file.createDimension('forecast_reference_time', 1)
391400

@@ -409,13 +418,18 @@ def split_nc_file(self, input_file_name, all_dates, cams_dict, temp_dir, prefix,
409418
if input_var_name == 'valid_time':
410419
output_var[:] = input_var[i,:]
411420
elif input_var_name not in cams_providentia_map:
412-
output_var[:] = input_var[:, i, :, :, :]
421+
if level == 'multi':
422+
output_var[:] = input_var[:, i, :, :, :]
423+
else:
424+
output_var[:] = input_var[:, i, :, :]
413425
else:
414426
output_var[:] = input_var[:]
415427

416428
# close new dataset
417429
output_file.close()
418430

431+
self.download_instance.logger.info('')
432+
419433
# close original dataset
420434
input_file.close()
421435

@@ -583,7 +597,7 @@ def download_cams_experiment(self, experiment):
583597

584598
# split the forecast file
585599
if cams_dict["split"] is True:
586-
self.split_nc_file(zip_file_name, all_dates, cams_dict, temp_dir, prefix, domain)
600+
self.split_nc_file(zip_file_name, all_dates, cams_dict, temp_dir, prefix, domain, level)
587601

588602
# iterate through all dates to format each of the day files
589603
for date in all_dates:
@@ -610,6 +624,8 @@ def download_cams_experiment(self, experiment):
610624
# change the file to remove to the last downloaded
611625
self.download_instance.latest_nc_file_path = final_path
612626

627+
self.download_instance.logger.info('')
628+
613629
# add one day to the date
614630
current_cams_date = next_cams_date + timedelta(days=1)
615631

0 commit comments

Comments
 (0)