Skip to content

Commit 59b0c7c

Browse files
committed
[ENH] Switch to md5 hashing for data serialization and tests
Improve hashing mechanism by transitioning from Python's `hash()` to `hashlib.md5()` for consistent cross-platform serialization. Introduced a new test helper to handle date fields in JSON verification, ensuring dynamic dates do not affect test approvals.
1 parent 3f3271a commit 59b0c7c

File tree

6 files changed

+97
-51
lines changed

6 files changed

+97
-51
lines changed

docs/developers_notes/dev_log/2025_05.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@
66
# TODO:
77
-[ ] Saving and loading models
88
- [x] Make tests passing for InterpOptions serializable
9-
- [ ] Dealing with large numpy arrays
10-
- [ ] Trying to have a better implementation for deserializing complex fields
9+
- [x] Dealing with large numpy arrays
10+
- [x] Trying to have a better implementation for deserializing complex fields
11+
- [ ] Make save and load function
1112
-[ ] Better api for nugget effect optimization
1213

1314
## Saving models

gempy/core/data/_data_points_helpers.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
1-
from typing import Sequence
1+
import hashlib
2+
3+
from typing import Sequence
24

35
import numpy as np
46

57

68
def structural_element_hasher(i: int, name: str, hash_length: int = 8) -> int:
79
# Get the last 'hash_length' digits from the hash
8-
name_hash = abs(hash(name)) % (10 ** hash_length)
9-
10+
name_hash = int(hashlib.md5(name.encode('utf-8')).hexdigest(), 16) % (10 ** hash_length)
1011
return i * (10 ** hash_length) + name_hash
1112

1213

gempy/core/data/structural_frame.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
import numpy as np
1+
import hashlib
2+
3+
import numpy as np
24
import warnings
35
from dataclasses import dataclass
46
from pydantic import model_validator, computed_field
@@ -76,12 +78,12 @@ def deserialize_orientations(values: "StructuralFrame"):
7678
@computed_field
7779
@property
7880
def serialize_sp(self) -> int:
79-
return hash(self.surface_points_copy.data.tobytes())
81+
return int(hashlib.md5(self.surface_points_copy.data.tobytes()).hexdigest()[:8], 16)
8082

8183
@computed_field
8284
@property
8385
def serialize_orientations(self) -> int:
84-
return hash(self.orientations_copy.data.tobytes())
86+
return int(hashlib.md5(self.orientations_copy.data.tobytes()).hexdigest()[:8], 16)
8587

8688
def __init__(self, structural_groups: list[StructuralGroup], color_gen: ColorsGenerator):
8789
self.structural_groups = structural_groups # ? This maybe could be optional

test/test_modules/test_serialize_model.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
1+
import json
2+
import numpy as np
3+
14
import os
25
import pprint
36

47
import gempy as gp
58
from gempy.core.data.encoders.converters import loading_model_injection
69
from gempy.core.data.enumerators import ExampleModel
710
from gempy_engine.core.data import InterpolationOptions
8-
from verify_helper import verify_json
11+
from verify_helper import verify_json, verify_json_ignoring_dates
912

1013

1114
def test_generate_horizontal_stratigraphic_model():
1215
model: gp.data.GeoModel = gp.generate_example_model(ExampleModel.HORIZONTAL_STRAT, compute_model=False)
13-
model_json = model.model_dump_json(by_alias=True, indent=4)
16+
model_json = model.model_dump_json(by_alias=True, indent=4, exclude={"*data"})
1417

1518
# Write the JSON to disk
1619
file_path = os.path.join("temp", "horizontal_stratigraphic_model.json")
@@ -34,10 +37,13 @@ def test_generate_horizontal_stratigraphic_model():
3437
assert model_deserialized.__str__() == model.__str__()
3538

3639
# # Validate json against schema
37-
if False:
40+
if True:
3841
# Ensure the 'verify/' directory exists
3942
os.makedirs("verify", exist_ok=True)
40-
verify_json(model_json, name="verify/Horizontal Stratigraphic Model serialization")
43+
verify_model = json.loads(model_json)
44+
verify_model["meta"]["creation_date"] = "<DATE_IGNORED>"
45+
verify_json(json.dumps(verify_model, indent=4), name="verify/Horizontal Stratigraphic Model serialization")
46+
4147

4248

4349
def test_interpolation_options():

test/test_modules/test_serialize_model.test_generate_horizontal_stratigraphic_model.verify/Horizontal Stratigraphic Model serialization.approved.txt

Lines changed: 39 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"meta": {
33
"name": "horizontal",
4-
"creation_date": "2025-05-14T11:11:08.368911",
4+
"creation_date": "<DATE_IGNORED>",
55
"last_modification_date": null,
66
"owner": null
77
},
@@ -20,48 +20,48 @@
2020
100.0,
2121
200.0,
2222
600.0,
23-
191371726,
24-
0.00002
23+
117776925,
24+
2e-05
2525
],
2626
[
2727
500.0,
2828
200.0,
2929
600.0,
30-
191371726,
31-
0.00002
30+
117776925,
31+
2e-05
3232
],
3333
[
3434
900.0,
3535
200.0,
3636
600.0,
37-
191371726,
38-
0.00002
37+
117776925,
38+
2e-05
3939
],
4040
[
4141
100.0,
4242
800.0,
4343
600.0,
44-
191371726,
45-
0.00002
44+
117776925,
45+
2e-05
4646
],
4747
[
4848
500.0,
4949
800.0,
5050
600.0,
51-
191371726,
52-
0.00002
51+
117776925,
52+
2e-05
5353
],
5454
[
5555
900.0,
5656
800.0,
5757
600.0,
58-
191371726,
59-
0.00002
58+
117776925,
59+
2e-05
6060
]
6161
],
6262
"name_id_map": {
63-
"rock1": 646258,
64-
"rock2": 191371726
63+
"rock1": 67239155,
64+
"rock2": 117776925
6565
},
6666
"_model_transform": null
6767
},
@@ -74,20 +74,20 @@
7474
0.0,
7575
0.0,
7676
1.0,
77-
191371726,
77+
117776925,
7878
0.01
7979
]
8080
],
8181
"name_id_map": {
82-
"rock1": 646258,
83-
"rock2": 191371726
82+
"rock1": 67239155,
83+
"rock2": 117776925
8484
},
8585
"_model_transform": null
8686
},
8787
"vertices": null,
8888
"edges": null,
8989
"scalar_field_at_interface": null,
90-
"_id": 191371726
90+
"_id": 117776925
9191
},
9292
{
9393
"name": "rock1",
@@ -99,48 +99,48 @@
9999
100.0,
100100
200.0,
101101
400.0,
102-
646258,
103-
0.00002
102+
67239155,
103+
2e-05
104104
],
105105
[
106106
500.0,
107107
200.0,
108108
400.0,
109-
646258,
110-
0.00002
109+
67239155,
110+
2e-05
111111
],
112112
[
113113
900.0,
114114
200.0,
115115
400.0,
116-
646258,
117-
0.00002
116+
67239155,
117+
2e-05
118118
],
119119
[
120120
100.0,
121121
800.0,
122122
400.0,
123-
646258,
124-
0.00002
123+
67239155,
124+
2e-05
125125
],
126126
[
127127
500.0,
128128
800.0,
129129
400.0,
130-
646258,
131-
0.00002
130+
67239155,
131+
2e-05
132132
],
133133
[
134134
900.0,
135135
800.0,
136136
400.0,
137-
646258,
138-
0.00002
137+
67239155,
138+
2e-05
139139
]
140140
],
141141
"name_id_map": {
142-
"rock1": 646258,
143-
"rock2": 191371726
142+
"rock1": 67239155,
143+
"rock2": 117776925
144144
},
145145
"_model_transform": null
146146
},
@@ -153,20 +153,20 @@
153153
0.0,
154154
0.0,
155155
1.0,
156-
646258,
156+
67239155,
157157
0.01
158158
]
159159
],
160160
"name_id_map": {
161-
"rock1": 646258,
162-
"rock2": 191371726
161+
"rock1": 67239155,
162+
"rock2": 117776925
163163
},
164164
"_model_transform": null
165165
},
166166
"vertices": null,
167167
"edges": null,
168168
"scalar_field_at_interface": null,
169-
"_id": 646258
169+
"_id": 67239155
170170
}
171171
],
172172
"structural_relation": 1,
@@ -249,8 +249,8 @@
249249
},
250250
"is_dirty": true,
251251
"basement_color": "#ffbe00",
252-
"serialize_sp": 3101115521743951095,
253-
"serialize_orientations": 4571791654756834882
252+
"serialize_sp": 3507338795,
253+
"serialize_orientations": 553806131
254254
},
255255
"grid": {
256256
"_octree_grid": null,

test/verify_helper.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,42 @@ def verify_json(item, name: str):
3535
verify(item, options=parameters)
3636

3737

38+
def verify_json_ignoring_dates(item, name: str, date_fields=None):
39+
"""
40+
Verify JSON content while ignoring specified date fields.
41+
42+
Args:
43+
item: The JSON object to verify
44+
name: Name for the verification
45+
date_fields: List of field names containing dates to ignore
46+
"""
47+
# Deep copy to avoid modifying the original
48+
import copy
49+
item_copy = copy.deepcopy(item)
50+
51+
# Replace date fields with placeholders
52+
date_fields = date_fields or ["date", "created_at", "updated_at", "timestamp"]
53+
_replace_dates_recursive(item_copy, date_fields)
54+
55+
# Use your existing verify_json function
56+
verify_json(item_copy, name)
57+
58+
59+
def _replace_dates_recursive(obj, date_fields):
60+
"""Helper function to recursively replace date values."""
61+
if isinstance(obj, dict):
62+
for key, value in obj.items():
63+
if key in date_fields and isinstance(value, str):
64+
obj[key] = "<DATE_IGNORED>"
65+
elif isinstance(value, (dict, list)):
66+
_replace_dates_recursive(value, date_fields)
67+
elif isinstance(obj, list):
68+
for item in obj:
69+
if isinstance(item, (dict, list)):
70+
_replace_dates_recursive(item, date_fields)
71+
72+
73+
3874
def gempy_verify_array(item, name: str, rtol: float = 1e-5, atol: float = 1e-5, ):
3975
# ! You will have to set the path to your diff tool
4076
reporter = GenericDiffReporter.create(

0 commit comments

Comments
 (0)