4545#
4646# Den interne metadataportalen http://www.byranettet.ssb.no/metadata/ har også alle filbeskrivelsene og filvariablene.
4747
48+ YEARS_BACK_CHECK = - 20
49+
4850
4951# %%
5052@dataclass
@@ -570,7 +572,6 @@ def open_path_datadok(path: str | Path, **read_fwf_params: Any) -> ArchiveData:
570572
571573 Raises:
572574 ValueError: If no datadok-api endpoint is found for the path given.
573- FileNotFoundError: If more than one file matches, with different file extensions, we do not know which to pick.
574575 """
575576 path_lib = convert_to_pathlib (path )
576577 combinations = get_path_combinations (path_lib , file_exts = ["" ])
@@ -586,6 +587,24 @@ def open_path_datadok(path: str | Path, **read_fwf_params: Any) -> ArchiveData:
586587 url_address = url_from_path (url_path )
587588 logger .info (f"Found datadok-response for path { url_path } " )
588589
590+ filepath = look_for_filepath (path_lib )
591+
592+ return import_archive_data (url_address , filepath , ** read_fwf_params )
593+
594+
595+ def look_for_filepath (path_lib : Path ) -> Path :
596+ """Look for possible placements of the physical "flatfile" on disk.
597+
598+ Args:
599+ path_lib (Path): The given path from the user as a pathlib.Path
600+
601+ Raises:
602+ FileNotFoundError: If we find more than one matching file, we do not know which to pick.
603+ FileNotFoundError: If we find zero matching files, we also do not know which to pick.
604+
605+ Returns:
606+ Path: The found path of an actual physical file.
607+ """
589608 file_combinations = get_path_combinations (
590609 path_lib .with_suffix ("" ), file_exts = None , add_dollar = False
591610 ) # file_exts=None gets replaced by dat, txt, ""
@@ -619,8 +638,7 @@ def open_path_datadok(path: str | Path, **read_fwf_params: Any) -> ArchiveData:
619638 filepath = filelist [0 ]
620639
621640 logger .info (f"Found datafile at path { filepath } " )
622-
623- return import_archive_data (url_address , filepath , ** read_fwf_params )
641+ return filepath
624642
625643
626644# Correcting path for API
@@ -866,35 +884,53 @@ def go_back_in_time(
866884 yr_char_ranges = get_yr_char_ranges (path_lib )
867885 # Loop over the years we want to look at, changing all the year ranges in the path
868886 if yr_char_ranges :
869- curr_path = path_lib
870- # Looking 20 years back in time
871- for looking_back in range (- 1 , - 20 , - 1 ):
872- for year_range in yr_char_ranges :
873- yr = curr_path .name [year_range [0 ] : year_range [1 ]]
874- name_update = (
875- curr_path .name [: year_range [0 ]]
876- + str (int (yr ) - 1 )
877- + curr_path .name [year_range [1 ] :]
878- )
879- curr_path = Path (curr_path .parent , name_update )
880- logger .debug (f"Looking back at { looking_back } , { curr_path = } " )
881- yr_combinations = get_path_combinations (curr_path , file_exts = exts )
882- for yrpath , ext in yr_combinations :
883- url_address = url_from_path (yrpath .with_suffix (ext ))
884- if test_url (url_address ):
885- f"Looking back { looking_back } years, found a path at { yrpath .with_suffix (ext )} "
886- return yrpath .with_suffix (ext )
887-
888- logger .info (
889- f"Looking back { looking_back } years, DIDNT find a path at { yrpath .with_suffix (ext )} "
890- )
887+ yrpath = bumpcheck_file_years_back (path_lib , yr_char_ranges , exts )
888+ if yrpath is not None :
889+ return yrpath
891890 else :
892891 logger .info (
893892 "Couldnt determine any year ranges in the pattern gXXXX (possibly repeating, like gXXXXgXXXX.)."
894893 )
895894 return None
896895
897896
897+ def bumpcheck_file_years_back (
898+ curr_path : Path , yr_char_ranges : list [tuple [int , int ]], exts : list [str ]
899+ ) -> Path | None :
900+ """Modify the path to point at older versions of file, to look for valid datadok-api paths.
901+
902+ Args:
903+ curr_path: The path given by user to look for.
904+ yr_char_ranges: The placement of the year ranges in the paths.
905+ exts: The base extensions to explore.
906+
907+ Returns:
908+ Path | None :
909+ """
910+ # Looking X years back in time
911+ for looking_back in range (- 1 , YEARS_BACK_CHECK , - 1 ):
912+ for year_range in yr_char_ranges :
913+ yr = curr_path .name [year_range [0 ] : year_range [1 ]]
914+ name_update = (
915+ curr_path .name [: year_range [0 ]]
916+ + str (int (yr ) - 1 )
917+ + curr_path .name [year_range [1 ] :]
918+ )
919+ curr_path = Path (curr_path .parent , name_update )
920+ logger .debug (f"Looking back at { looking_back } , { curr_path = } " )
921+ yr_combinations = get_path_combinations (curr_path , file_exts = exts )
922+ for yrpath , ext in yr_combinations :
923+ url_address = url_from_path (yrpath .with_suffix (ext ))
924+ if test_url (url_address ):
925+ f"Looking back { looking_back } years, found a path at { yrpath .with_suffix (ext )} "
926+ return yrpath .with_suffix (ext )
927+
928+ logger .info (
929+ f"Looking back { looking_back } years, DIDNT find a path at { yrpath .with_suffix (ext )} "
930+ )
931+ return None
932+
933+
898934def get_yr_char_ranges (path : str | Path ) -> list [tuple [int , int ]]:
899935 """Find the character ranges containing years in the path. Usually 1-4 ranges.
900936
0 commit comments