Skip to content

Commit e2cfeb5

Browse files
authored
Merge pull request #148 from statisticsnorway/fix_formats
Fix bug in formats when trying to map multiple ranges to same category.
2 parents ace4e09 + 26d18bc commit e2cfeb5

File tree

2 files changed

+78
-59
lines changed

2 files changed

+78
-59
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "ssb-fagfunksjoner"
3-
version = "1.0.8"
3+
version = "1.0.9"
44
description = "Fellesfunksjoner for ssb i Python"
55
authors = ["SSB-pythonistas <ssb-pythonistas@ssb.no>"]
66
license = "MIT"

src/fagfunksjoner/formats/formats.py

Lines changed: 77 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,16 @@
1010

1111

1212
class SsbFormat(dict[Any, Any]):
13-
"""Custom dictionary class designed to handle specific formatting conventions."""
13+
"""Custom dictionary class designed to handle specific formatting conventions, including mapping intervals (defined as range strings) even when they map to the same value."""
1414

1515
def __init__(self, start_dict: SSBFORMAT_INPUT_TYPE | None = None) -> None:
1616
"""Initializes the SsbFormat instance.
1717
1818
Args:
1919
start_dict (dict, optional): Initial dictionary to populate SsbFormat.
2020
"""
21-
super(dict, self).__init__()
22-
self.cached = True # Switching the default to False, will f-up __setitem__
21+
super().__init__()
22+
self.cached = True # Switching the default to False might f-up __setitem__
2323
if start_dict:
2424
for k, v in start_dict.items():
2525
dict.__setitem__(self, k, v)
@@ -43,7 +43,7 @@ def __setitem__(self, key: str | int | float | NAType | None, value: Any) -> Non
4343
dict.__setitem__(self, key, value)
4444
if isinstance(key, str):
4545
if "-" in key and key.count("-") == 1:
46-
self.store_ranges()
46+
self.store_ranges() # update ranges after adding a new range key
4747
if key.lower() == "other" and key != "other":
4848
self.set_other_as_lowercase()
4949
if self.check_if_na(key):
@@ -52,17 +52,20 @@ def __setitem__(self, key: str | int | float | NAType | None, value: Any) -> Non
5252
def __missing__(self, key: str | int | float | NAType | None) -> Any:
5353
"""Overrides the '__missing__' method of dictionary to handle missing keys.
5454
55+
Checks for integer/string confusion, NA values, or membership in a defined range.
56+
If none apply and an 'other' key exists, its value is returned.
57+
5558
Args:
5659
key (str | int | float | NAType | None): Key that is missing in the dictionary.
5760
5861
Returns:
59-
Any: Value of key in any special conditions: confusion int/str, in one of the ranges, NA or if other is defined.
62+
Any: The corresponding mapped value based on special conditions.
6063
6164
Raises:
62-
ValueError: If the key is not found in the format and no 'other' key is specified.
65+
ValueError: If the key is not found and no 'other' key is defined.
6366
"""
6467
int_str_confuse = self.int_str_confuse(key)
65-
if int_str_confuse:
68+
if int_str_confuse is not None:
6669
if self.cached:
6770
self[key] = int_str_confuse
6871
return int_str_confuse
@@ -73,7 +76,7 @@ def __missing__(self, key: str | int | float | NAType | None) -> Any:
7376
return self.na_value
7477

7578
key_in_range = self.look_in_ranges(key)
76-
if key_in_range:
79+
if key_in_range is not None:
7780
if self.cached:
7881
self[key] = key_in_range
7982
return key_in_range
@@ -87,50 +90,63 @@ def __missing__(self, key: str | int | float | NAType | None) -> Any:
8790
raise ValueError(f"{key} not in format, and no other-key is specified.")
8891

8992
def store_ranges(self) -> None:
90-
"""Stores ranges based on specified keys in the dictionary."""
91-
self.ranges: dict[str, tuple[float, float]] = {}
93+
"""Stores ranges by converting range-string keys into tuple keys.
94+
95+
For example, a key "0-18" with value "A" will be stored as
96+
{(0.0, 18.0): "A"}.
97+
"""
98+
self.ranges: dict[tuple[float, float], Any] = {}
9299
for key, value in self.items():
93100
if isinstance(key, str) and "-" in key and key.count("-") == 1:
94101
self._range_to_floats(key, value)
95102

96-
def _range_to_floats(self, key: str, value: str) -> None:
97-
"""Converts a range key to a tuple of floats.
103+
def _range_to_floats(self, key: str, value: Any) -> None:
104+
"""Converts a range-string key to a tuple of floats and stores it.
98105
99106
Args:
100-
key: Key to be converted to a tuple of floats.
101-
value (str): Value to be associated with the converted range.
107+
key (str): A string representing a range in the format "lower-upper". The lower bound should be
108+
either a digit or "low" and the upper bound a digit or "high".
109+
value (Any): The value to be associated with the converted range in the ranges dictionary.
110+
111+
Raises:
112+
ValueError: If either the lower or upper bound contains a '.' character, indicating a float-like
113+
value instead of an integer-like value.
102114
"""
103-
bottom, top = key.split("-")[0].strip(), key.split("-")[1].strip()
104-
if (bottom.isdigit() or bottom.lower() == "low") and (
105-
top.isdigit() or top.lower() == "high"
115+
parts = key.split("-")
116+
if len(parts) != 2:
117+
return
118+
bottom_str, top_str = parts[0].strip(), parts[1].strip()
119+
if "." in bottom_str or "." in top_str:
120+
raise ValueError(
121+
f"Ranges must be int-like values not float-like {bottom_str}-{top_str}"
122+
)
123+
if (bottom_str.isdigit() or bottom_str.lower() == "low") and (
124+
top_str.isdigit() or top_str.lower() == "high"
106125
):
107-
if bottom.lower() == "low":
108-
bottom_float = float("-inf")
109-
else:
110-
bottom_float = float(bottom)
111-
if top.lower() == "high":
112-
top_float = float("inf")
113-
else:
114-
top_float = float(top)
115-
self.ranges[value] = (bottom_float, top_float)
116-
117-
def look_in_ranges(self, key: str | int | float | NAType | None) -> None | str:
118-
"""Looks for the specified key within the stored ranges.
126+
bottom_float = (
127+
float("-inf") if bottom_str.lower() == "low" else float(bottom_str)
128+
)
129+
top_float = float("inf") if top_str.lower() == "high" else float(top_str)
130+
self.ranges[(bottom_float, top_float)] = value
131+
132+
def look_in_ranges(self, key: str | int | float | NAType | None) -> None | Any:
133+
"""Returns the mapping value for the key if it falls within any defined range.
134+
135+
The method attempts to convert the key to a float and then checks if it lies within
136+
any of the stored range intervals. If the key is None, NA, or not of a convertible type,
137+
the method returns None.
138+
"""
139+
if key is None or pd.isna(key) or not isinstance(key, str | int | float):
140+
return None
119141

120-
Args:
121-
key: Key to search within the stored ranges.
142+
try:
143+
key_value = float(key)
144+
except (ValueError, TypeError):
145+
return None
122146

123-
Returns:
124-
The value associated with the range containing the key, if found; otherwise, None.
125-
"""
126-
if isinstance(key, str | int | float):
127-
try:
128-
key = float(key)
129-
except ValueError:
130-
return None
131-
for range_key, (bottom, top) in self.ranges.items():
132-
if key >= bottom and key <= top:
133-
return range_key
147+
for (bottom, top), mapping_value in self.ranges.items():
148+
if bottom <= key_value <= top:
149+
return mapping_value
134150
return None
135151

136152
def int_str_confuse(self, key: str | int | float | NAType | None) -> None | Any:
@@ -144,27 +160,30 @@ def int_str_confuse(self, key: str | int | float | NAType | None) -> None | Any:
144160
"""
145161
if isinstance(key, str):
146162
try:
147-
key = int(key)
148-
if key in self:
149-
return self[key]
163+
int_key = int(key)
164+
if int_key in self:
165+
return self[int_key]
150166
except ValueError:
151167
return None
152168
elif isinstance(key, int):
153-
key = str(key)
154-
if key in self:
155-
return self[key]
169+
str_key = str(key)
170+
if str_key in self:
171+
return self[str_key]
156172
return None
157173

158174
def set_other_as_lowercase(self) -> None:
159-
"""Sets the key 'other' to lowercase if mixed cases are found."""
160-
found = False
161-
for key in self:
162-
if isinstance(key, str) and key.lower() == "other":
163-
found = True
164-
break
165-
if found:
166-
value = self[key]
167-
del self[key]
175+
"""Ensures that the 'other' key is stored in lowercase.
176+
177+
If a key matching 'other' in any other case is found, its value is reassigned to 'other'.
178+
"""
179+
keys_to_update = [
180+
k
181+
for k in self
182+
if isinstance(k, str) and k.lower() == "other" and k != "other"
183+
]
184+
for k in keys_to_update:
185+
value = self[k]
186+
del self[k]
168187
self["other"] = value
169188

170189
def set_na_value(self) -> bool:
@@ -181,7 +200,7 @@ def set_na_value(self) -> bool:
181200
return False
182201

183202
@staticmethod
184-
def check_if_na(key: str | Any) -> bool:
203+
def check_if_na(key: Any) -> bool:
185204
"""Checks if the specified key represents a NA (Not Available) value.
186205
187206
Args:

0 commit comments

Comments
 (0)