Skip to content
This repository was archived by the owner on Jan 8, 2025. It is now read-only.

Commit 93c5bb5

Browse files
authored
Fix #188 - create a tool to collect non-alias and dump all metadata (#255)
1 parent 9f6da56 commit 93c5bb5

File tree

4 files changed

+112
-26
lines changed

4 files changed

+112
-26
lines changed

docs/scan_ids.md

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Scan IDs tool
2+
3+
We create a new tool for dumping the metadata of all non-alias IDs into JSON.
4+
5+
We could run it with the following command:
6+
```commandline
7+
python -m tools.scan_ids
8+
```
9+
10+
The result looks like:
11+
```json
12+
[
13+
{
14+
"package_name": "idnumbers.nationalid.yugoslavia",
15+
"country_code": "yugoslavia",
16+
"ids": [
17+
{
18+
"class_name": "UniqueMasterCitizenNumber",
19+
"metadata": {
20+
"iso3166_alpha2": null,
21+
"min_length": 13,
22+
"max_length": 13,
23+
"parsable": true,
24+
"checksum": true,
25+
"regexp": "^(?P<dd>\\d{2})(?P<mm>\\d{2})(?P<yyy>\\d{3})(?P<location>\\d{2})(?P<sn>\\d{3})(?P<checksum>\\d)$",
26+
"alias_of": null,
27+
"names": [
28+
"Unique master citizen number",
29+
"JMBG",
30+
"Jedinstveni mati\u010dni broj gra\u0111ana",
31+
"\u0408\u0435\u0434\u0438\u043d\u0441\u0442\u0432\u0435\u043d\u0438 \u043c\u0430\u0442\u0438\u0447\u043d\u0438 \u0431\u0440\u043e\u0458 \u0433\u0440\u0430\u0452\u0430\u043d\u0430",
32+
"\u0408\u041c\u0411\u0413",
33+
"\u0415\u0434\u0438\u043d\u0441\u0442\u0432\u0435\u043d \u043c\u0430\u0442\u0438\u0447\u0435\u043d \u0431\u0440\u043e\u0458 \u043d\u0430 \u0433\u0440\u0430\u0453\u0430\u043d\u0438\u043d\u043e\u0442",
34+
"\u0415\u041c\u0411\u0413",
35+
"Enotna mati\u010dna \u0161tevilka ob\u010dana,",
36+
"EM\u0160O"
37+
],
38+
"links": [
39+
"https://en.wikipedia.org/wiki/Unique_Master_Citizen_Number"
40+
],
41+
"deprecated": false
42+
}
43+
}
44+
]
45+
}
46+
]
47+
```
48+
49+
We could use the info/JSON to build a reference doc or generating the sample codes.

idnumbers/nationalid/smr/social_security.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -36,31 +36,5 @@ def validate(id_number: str) -> bool:
3636
return validate_regexp(id_number, SocialSecurityNumber.METADATA.regexp)
3737

3838

39-
class TaxRegistrationNumber:
40-
"""
41-
San Marino, entity tax registration number, COE number
42-
https://www.oecd.org/tax/automatic-exchange/crs-implementation-and-assistance/tax-identification-numbers/San-Marino-TIN.pdf
43-
"""
44-
METADATA = SimpleNamespace(**{
45-
'iso3166_alpha2': 'SM',
46-
'min_length': 7,
47-
'max_length': 7,
48-
# length without insignificant chars
49-
'parsable': False,
50-
# has parse function
51-
'checksum': False,
52-
# has checksum function
53-
'regexp': re.compile(r'^SM\d{5}$')
54-
# regular expression to validate the id
55-
})
56-
57-
@staticmethod
58-
def validate(id_number: str) -> bool:
59-
"""
60-
Validate
61-
"""
62-
return validate_regexp(id_number, TaxRegistrationNumber.METADATA.regexp)
63-
64-
6539
SSI = SocialSecurityNumber
6640
"""alias of SocialSecurityNumber"""

tools/__init__.py

Whitespace-only changes.

tools/scan_ids.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import importlib
2+
import inspect
3+
import json
4+
import os
5+
6+
7+
def collect_ids(package_name, output_filename):
8+
# Find the root package directory
9+
package_directory = importlib.import_module(package_name).__path__[0]
10+
11+
# Recursively collect metadata for all modules and classes
12+
metadata = []
13+
modules_count = 0
14+
classes_count = 0
15+
country_codes = []
16+
for root, _, files in os.walk(package_directory):
17+
for file in files:
18+
if file.endswith('.py'):
19+
# Convert the file path to a package path
20+
module_name = os.path.splitext(os.path.relpath(os.path.join(root, file), package_directory))[0]
21+
module_name = module_name.replace(os.path.sep, '.')
22+
# No upper case module name. They are aliases
23+
if module_name == module_name.upper():
24+
continue
25+
26+
# Import the module and collect metadata for its classes
27+
module = importlib.import_module(package_name + '.' + module_name)
28+
module_metadata = []
29+
for name, obj in inspect.getmembers(module):
30+
if inspect.isclass(obj) and hasattr(obj, 'METADATA') and obj.METADATA.alias_of is None:
31+
cls_metadata = obj.METADATA.__dict__
32+
if type(cls_metadata['regexp']) is not str:
33+
cls_metadata['regexp'] = cls_metadata['regexp'].pattern
34+
module_metadata.append({
35+
'class_name': name,
36+
'metadata': obj.METADATA.__dict__
37+
})
38+
39+
# Append the module's metadata to the overall list
40+
if module_metadata:
41+
modules_count += 1
42+
classes_count += len(module_metadata)
43+
country_code = str(module_name).split('.')[0]
44+
if country_code not in country_codes:
45+
country_codes.append(country_code)
46+
metadata.append({
47+
'package_name': package_name + '.' + module_name,
48+
'country_code': country_code,
49+
'ids': module_metadata
50+
})
51+
52+
print('----------------------------------------------------------------------------')
53+
print(f'Modules: {modules_count}')
54+
print(f'Countries: {len(country_codes)}')
55+
print(f'IDs: {classes_count}')
56+
print('----------------------------------------------------------------------------')
57+
# Write the metadata to a JSON file
58+
with open(output_filename, 'w') as f:
59+
json.dump(metadata, f, indent=2)
60+
61+
62+
if __name__ == '__main__':
63+
collect_ids('idnumbers.nationalid', 'result.json')

0 commit comments

Comments
 (0)