99import re
1010import requests
1111import shutil
12+ from tqdm import tqdm
1213import yaml
1314import zipfile
1415
@@ -274,7 +275,7 @@ def extract_date(self, input_file, prefix, domain):
274275
275276 def format_data (self , input_filepath , output_filepath , species , prefix , domain , resolution , final_path , cams_species , url ):
276277
277- self .download_instance .logger .info (f"Formatting { final_path } \n " )
278+ self .download_instance .logger .info (f"Formatting { final_path } " )
278279
279280 # get file formatting
280281 cams_providentia_map = cams_formatting [prefix ][domain ]
@@ -366,17 +367,25 @@ def format_data(self, input_filepath, output_filepath, species, prefix, domain,
366367 output_file .close ()
367368 input_file .close ()
368369
369- def split_nc_file (self , input_file_name , all_dates , cams_dict , temp_dir , prefix , domain ):
370+ def split_nc_file (self , input_file_name , all_dates , cams_dict , temp_dir , prefix , domain , level ):
370371
371372 # get file formatting
372373 cams_providentia_map = cams_formatting [prefix ][domain ]
373374
374375 # read the input netcdf file
375- input_filepath = join (temp_dir ,input_file_name )
376+ input_filepath = join (temp_dir , input_file_name )
376377 input_file = Dataset (input_filepath , 'r' )
377378
379+ # set available dimensions
380+ available_dimensions = ['forecast_period' , 'latitude' , 'longitude' ]
381+ if level == 'multi' :
382+ available_dimensions .append ('model_level' )
383+
384+ # create tqdm iterator
385+ all_dates_iter = tqdm (all_dates , bar_format = '{l_bar}{bar}|{n_fmt}/{total_fmt}' ,desc = f"Splitting { input_file_name } file ({ len (all_dates )} )" )
386+
378387 # loop through the possible dates
379- for i , date in enumerate (all_dates ):
388+ for i , date in enumerate (all_dates_iter ):
380389 # create a new file for each slice
381390 output_file_name = cams_dict ["file_format" ].replace ("yyyy" , f"{ date .year :04d} " ) \
382391 .replace ("mm" , f"{ date .month :02d} " ) \
@@ -385,7 +394,7 @@ def split_nc_file(self, input_file_name, all_dates, cams_dict, temp_dir, prefix,
385394 output_file = Dataset (output_filepath , 'w' , format = 'NETCDF4' )
386395
387396 # copy all the dimensions to the new file, leave forecas_reference_time as one
388- for dim in [ 'forecast_period' , 'model_level' , 'latitude' , 'longitude' ] :
397+ for dim in available_dimensions :
389398 output_file .createDimension (dim , input_file .dimensions [dim ].size )
390399 output_file .createDimension ('forecast_reference_time' , 1 )
391400
@@ -409,13 +418,18 @@ def split_nc_file(self, input_file_name, all_dates, cams_dict, temp_dir, prefix,
409418 if input_var_name == 'valid_time' :
410419 output_var [:] = input_var [i ,:]
411420 elif input_var_name not in cams_providentia_map :
412- output_var [:] = input_var [:, i , :, :, :]
421+ if level == 'multi' :
422+ output_var [:] = input_var [:, i , :, :, :]
423+ else :
424+ output_var [:] = input_var [:, i , :, :]
413425 else :
414426 output_var [:] = input_var [:]
415427
416428 # close new dataset
417429 output_file .close ()
418430
431+ self .download_instance .logger .info ('' )
432+
419433 # close original dataset
420434 input_file .close ()
421435
@@ -583,7 +597,7 @@ def download_cams_experiment(self, experiment):
583597
584598 # split the forecast file
585599 if cams_dict ["split" ] is True :
586- self .split_nc_file (zip_file_name , all_dates , cams_dict , temp_dir , prefix , domain )
600+ self .split_nc_file (zip_file_name , all_dates , cams_dict , temp_dir , prefix , domain , level )
587601
588602 # iterate through all dates to format each of the day files
589603 for date in all_dates :
@@ -610,6 +624,8 @@ def download_cams_experiment(self, experiment):
610624 # change the file to remove to the last downloaded
611625 self .download_instance .latest_nc_file_path = final_path
612626
627+ self .download_instance .logger .info ('' )
628+
613629 # add one day to the date
614630 current_cams_date = next_cams_date + timedelta (days = 1 )
615631
0 commit comments