Skip to content

8.4.2 #528

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 17, 2025
Merged

8.4.2 #528

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 2 additions & 11 deletions deepdiff/base.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,12 @@
from typing import Protocol, Any
from typing import Any
from deepdiff.helper import strings, numbers, SetOrdered


DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES = 12
TYPE_STABILIZATION_MSG = 'Unable to stabilize the Numpy array {} due to {}. Please set ignore_order=False.'


class BaseProtocol(Protocol):
t1: Any
t2: Any
cutoff_distance_for_pairs: float
use_log_scale: bool
log_scale_similarity_threshold: float
view: str


class Base(BaseProtocol):
class Base:
numbers = numbers
strings = strings

Expand Down
12 changes: 10 additions & 2 deletions deepdiff/deephash.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pathlib import Path
from enum import Enum
from deepdiff.helper import (strings, numbers, times, unprocessed, not_hashed, add_to_frozen_set,
convert_item_or_items_into_set_else_none, get_doc,
convert_item_or_items_into_set_else_none, get_doc, ipranges,
convert_item_or_items_into_compiled_regexes_else_none,
get_id, type_is_subclass_of_type_group, type_in_type_group,
number_to_string, datetime_normalize, KEY_TO_VAL_STR,
Expand Down Expand Up @@ -142,7 +142,7 @@ class DeepHash(Base):
__doc__ = doc

def __init__(self,
obj,
obj: Any,
*,
apply_hash=True,
custom_operators: Optional[List[Any]] =None,
Expand Down Expand Up @@ -484,6 +484,11 @@ def _prep_number(self, obj):
number_format_notation=self.number_format_notation)
return KEY_TO_VAL_STR.format(type_, obj)

def _prep_ipranges(self, obj):
type_ = 'iprange'
obj = str(obj)
return KEY_TO_VAL_STR.format(type_, obj)

def _prep_datetime(self, obj):
type_ = 'datetime'
obj = datetime_normalize(self.truncate_datetime, obj, default_timezone=self.default_timezone)
Expand Down Expand Up @@ -558,6 +563,9 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET):
elif isinstance(obj, numbers): # type: ignore
result = self._prep_number(obj)

elif isinstance(obj, ipranges):
result = self._prep_ipranges(obj)

elif isinstance(obj, MutableMapping):
result, counts = self._prep_dict(obj=obj, parent=parent, parents_ids=parents_ids)

Expand Down
24 changes: 21 additions & 3 deletions deepdiff/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from enum import Enum
from copy import deepcopy
from math import isclose as is_close
from typing import List, Dict, Callable, Union, Any, Pattern, Tuple, Optional, Set, FrozenSet, TYPE_CHECKING
from typing import List, Dict, Callable, Union, Any, Pattern, Tuple, Optional, Set, FrozenSet, TYPE_CHECKING, Protocol
from collections.abc import Mapping, Iterable, Sequence
from collections import defaultdict
from inspect import getmembers
Expand All @@ -27,7 +27,7 @@
np_ndarray, np_floating, get_numpy_ndarray_rows, RepeatedTimer,
TEXT_VIEW, TREE_VIEW, DELTA_VIEW, detailed__dict__, add_root_to_paths,
np, get_truncate_datetime, dict_, CannotCompare, ENUM_INCLUDE_KEYS,
PydanticBaseModel, Opcode, SetOrdered)
PydanticBaseModel, Opcode, SetOrdered, ipranges)
from deepdiff.serialization import SerializationMixin
from deepdiff.distance import DistanceMixin, logarithmic_similarity
from deepdiff.model import (
Expand Down Expand Up @@ -119,7 +119,17 @@ def _report_progress(_stats, progress_logger, duration):
)


class DeepDiff(ResultDict, SerializationMixin, DistanceMixin, Base):
class DeepDiffProtocol(Protocol):
t1: Any
t2: Any
cutoff_distance_for_pairs: float
use_log_scale: bool
log_scale_similarity_threshold: float
view: str



class DeepDiff(ResultDict, SerializationMixin, DistanceMixin, DeepDiffProtocol, Base):
__doc__ = doc

CACHE_AUTO_ADJUST_THRESHOLD = 0.25
Expand Down Expand Up @@ -1501,6 +1511,11 @@ def _diff_numbers(self, level, local_tree=None, report_type_change=True):
if t1_s != t2_s:
self._report_result('values_changed', level, local_tree=local_tree)

def _diff_ipranges(self, level, local_tree=None):
"""Diff IP ranges"""
if str(level.t1) != str(level.t2):
self._report_result('values_changed', level, local_tree=local_tree)

def _diff_datetime(self, level, local_tree=None):
"""Diff DateTimes"""
level.t1 = datetime_normalize(self.truncate_datetime, level.t1, default_timezone=self.default_timezone)
Expand Down Expand Up @@ -1695,6 +1710,9 @@ def _diff(self, level, parents_ids=frozenset(), _original_type=None, local_tree=
elif isinstance(level.t1, datetime.datetime):
self._diff_datetime(level, local_tree=local_tree)

elif isinstance(level.t1, ipranges):
self._diff_ipranges(level, local_tree=local_tree)

elif isinstance(level.t1, (datetime.date, datetime.timedelta, datetime.time)):
self._diff_time(level, local_tree=local_tree)

Expand Down
46 changes: 34 additions & 12 deletions deepdiff/distance.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,45 @@
import math
import datetime
from deepdiff.base import BaseProtocol
from typing import TYPE_CHECKING, Callable, Protocol, Any
from deepdiff.deephash import DeepHash
from deepdiff.helper import (
DELTA_VIEW, numbers, strings, add_to_frozen_set, not_found, only_numbers, np, np_float64, time_to_seconds,
cartesian_product_numpy, np_ndarray, np_array_factory, get_homogeneous_numpy_compatible_type_of_seq, dict_,
CannotCompare)
from collections.abc import Mapping, Iterable

if TYPE_CHECKING:
from deepdiff.diff import DeepDiffProtocol

DISTANCE_CALCS_NEEDS_CACHE = "Distance calculation can not happen once the cache is purged. Try with _cache='keep'"
class DistanceProtocol(DeepDiffProtocol, Protocol):
hashes: dict
deephash_parameters: dict
iterable_compare_func: Callable | None
math_epsilon: float
cutoff_distance_for_pairs: float

def __get_item_rough_length(self, item, parent:str="root") -> float:
...

def _to_delta_dict(
self,
directed: bool = True,
report_repetition_required: bool = True,
always_include_values: bool = False,
) -> dict:
...

def __calculate_item_deephash(self, item: Any) -> None:
...


class DistanceMixin(BaseProtocol):

def _get_rough_distance(self):
DISTANCE_CALCS_NEEDS_CACHE = "Distance calculation can not happen once the cache is purged. Try with _cache='keep'"


class DistanceMixin:

def _get_rough_distance(self: "DistanceProtocol"):
"""
Gives a numeric value for the distance of t1 and t2 based on how many operations are needed to convert
one to the other.
Expand Down Expand Up @@ -51,7 +74,7 @@ def _get_rough_distance(self):

return diff_length / (t1_len + t2_len)

def __get_item_rough_length(self, item, parent='root'):
def __get_item_rough_length(self: "DistanceProtocol", item, parent='root'):
"""
Get the rough length of an item.
It is used as a part of calculating the rough distance between objects.
Expand All @@ -69,7 +92,7 @@ def __get_item_rough_length(self, item, parent='root'):
length = DeepHash.get_key(self.hashes, key=item, default=None, extract_index=1)
return length

def __calculate_item_deephash(self, item):
def __calculate_item_deephash(self: "DistanceProtocol", item: Any) -> None:
DeepHash(
item,
hashes=self.hashes,
Expand All @@ -79,8 +102,7 @@ def __calculate_item_deephash(self, item):
)

def _precalculate_distance_by_custom_compare_func(
self, hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type):

self: "DistanceProtocol", hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type):
pre_calced_distances = dict_()
for added_hash in hashes_added:
for removed_hash in hashes_removed:
Expand All @@ -99,7 +121,7 @@ def _precalculate_distance_by_custom_compare_func(
return pre_calced_distances

def _precalculate_numpy_arrays_distance(
self, hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type):
self: "DistanceProtocol", hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type):

# We only want to deal with 1D arrays.
if isinstance(t2_hashtable[next(iter(hashes_added))].item, (np_ndarray, list)):
Expand Down Expand Up @@ -203,7 +225,7 @@ def _get_numbers_distance(num1, num2, max_=1, use_log_scale=False, log_scale_sim
return 0
if use_log_scale:
distance = logarithmic_distance(num1, num2)
if distance < logarithmic_distance:
if distance < 0:
return 0
return distance
if not isinstance(num1, float):
Expand Down Expand Up @@ -246,7 +268,7 @@ def numpy_apply_log_keep_sign(array, offset=MATH_LOG_OFFSET):
return signed_log_values


def logarithmic_similarity(a: numbers, b: numbers, threshold: float=0.1):
def logarithmic_similarity(a: numbers, b: numbers, threshold: float=0.1) -> float:
"""
A threshold of 0.1 translates to about 10.5% difference.
A threshold of 0.5 translates to about 65% difference.
Expand All @@ -255,7 +277,7 @@ def logarithmic_similarity(a: numbers, b: numbers, threshold: float=0.1):
return logarithmic_distance(a, b) < threshold


def logarithmic_distance(a: numbers, b: numbers):
def logarithmic_distance(a: numbers, b: numbers) -> float:
# Apply logarithm to the absolute values and consider the sign
a = float(a)
b = float(b)
Expand Down
6 changes: 4 additions & 2 deletions deepdiff/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
import string
import time
import enum
from typing import NamedTuple, Any, List, Optional, Dict, Union, TYPE_CHECKING
import ipaddress
from typing import NamedTuple, Any, List, Optional, Dict, Union, TYPE_CHECKING, Tuple
from ast import literal_eval
from decimal import Decimal, localcontext, InvalidOperation as InvalidDecimalOperation
from itertools import repeat
Expand Down Expand Up @@ -184,9 +185,10 @@ def get_semvar_as_integer(version):
only_complex_number = (complex,) + numpy_complex_numbers
only_numbers = (int, float, complex, Decimal) + numpy_numbers
datetimes = (datetime.datetime, datetime.date, datetime.timedelta, datetime.time)
ipranges = (ipaddress.IPv4Interface, ipaddress.IPv6Interface, ipaddress.IPv4Network, ipaddress.IPv6Network)
uuids = (uuid.UUID, )
times = (datetime.datetime, datetime.time)
numbers = only_numbers + datetimes
numbers: Tuple = only_numbers + datetimes
booleans = (bool, np_bool_)

basic_types = strings + numbers + uuids + booleans + (type(None), )
Expand Down
7 changes: 5 additions & 2 deletions deepdiff/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import logging

from deepdiff.helper import (
strings, numbers, add_to_frozen_set, get_doc, dict_, RE_COMPILED_TYPE
strings, numbers, add_to_frozen_set, get_doc, dict_, RE_COMPILED_TYPE, ipranges
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -115,7 +115,7 @@ def __init__(self,
matched_values=self.__set_or_dict(),
unprocessed=[])
self.use_regexp = use_regexp
if not strict_checking and isinstance(item, numbers):
if not strict_checking and (isinstance(item, numbers) or isinstance(item, ipranges)):
item = str(item)
if self.use_regexp:
try:
Expand Down Expand Up @@ -312,6 +312,9 @@ def __search(self, obj, item, parent="root", parents_ids=frozenset()):
elif isinstance(obj, strings) and isinstance(item, numbers):
return

elif isinstance(obj, ipranges):
self.__search_str(str(obj), item, parent)

elif isinstance(obj, numbers):
self.__search_numbers(obj, item, parent)

Expand Down
4 changes: 3 additions & 1 deletion deepdiff/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
pydantic_base_model_type,
PydanticBaseModel,
NotPresent,
ipranges,
)
from deepdiff.model import DeltaResult

Expand Down Expand Up @@ -112,7 +113,8 @@ class UnsupportedFormatErr(TypeError):
'SetOrdered': SetOrdered,
'namedtuple': collections.namedtuple,
'OrderedDict': collections.OrderedDict,
'Pattern': re.Pattern,
'Pattern': re.Pattern,
'iprange': str,
}


Expand Down
14 changes: 10 additions & 4 deletions deepdiff/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ def calculate_weights(node):
weight = 0
children_weights = {}
for k, v in node.items():
edge_weight = len(k)
try:
edge_weight = len(k)
except TypeError:
edge_weight = 1
child_weight, child_structure = calculate_weights(v)
total_weight = edge_weight + child_weight
weight += total_weight
Expand Down Expand Up @@ -133,6 +136,9 @@ def greedy_tree_summarization_balanced(json_data: JSON, max_weight: int, balance


def summarize(data: JSON, max_length:int=200, balance_threshold:float=0.6) -> str:
return json_dumps(
greedy_tree_summarization_balanced(data, max_length, balance_threshold)
)
try:
return json_dumps(
greedy_tree_summarization_balanced(data, max_length, balance_threshold)
)
except Exception:
return str(data)
6 changes: 6 additions & 0 deletions tests/test_diff_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -2252,3 +2252,9 @@ def test_affected_root_keys_when_dict_empty(self):

diff2 = DeepDiff({}, {1:1, 2:2})
assert [] == diff2.affected_root_keys

def test_range1(self):
range1 = range(0, 10)
range2 = range(0, 8)
diff = DeepDiff(range1, range2)
assert {'iterable_item_removed': {'root[8]': 8, 'root[9]': 9}} == diff
35 changes: 33 additions & 2 deletions tests/test_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
import pytz
import logging
import datetime
import ipaddress
from typing import Union
from pathlib import Path
from collections import namedtuple
from functools import partial
from enum import Enum
from deepdiff import DeepHash
from deepdiff import DeepDiff, DeepHash
from deepdiff.deephash import (
prepare_string_for_hashing, unprocessed,
UNPROCESSED_KEY, BoolObj, HASH_LOOKUP_ERR_MSG, combine_hashes_lists)
Expand Down Expand Up @@ -999,10 +1001,39 @@ def test_combine_hashes_lists(self, items, prefix, expected):
(7, b"First have a cup of potatos. Then \xc3\x28 cup of flour", None, False, UnicodeDecodeError, EXPECTED_MESSAGE3),
])
def test_hash_encodings(self, test_num, item, encodings, ignore_encoding_errors, expected_result, expected_message):
if UnicodeDecodeError == expected_result:
if UnicodeDecodeError == expected_result: # NOQA
with pytest.raises(expected_result) as exc_info:
DeepHash(item, encodings=encodings, ignore_encoding_errors=ignore_encoding_errors)
assert expected_message == str(exc_info.value), f"test_encodings test #{test_num} failed."
else:
result = DeepHash(item, encodings=encodings, ignore_encoding_errors=ignore_encoding_errors)
assert expected_result == result, f"test_encodings test #{test_num} failed."

def test_ip_addresses(self):

class ClassWithIp:
"""Class containing single data member to demonstrate deepdiff infinite iterate over IPv6Interface"""

def __init__(self, addr: str):
self.field: Union[
ipaddress.IPv4Network,
ipaddress.IPv6Network,
ipaddress.IPv4Interface,
ipaddress.IPv6Interface,
] = ipaddress.IPv6Network(addr)


obj1 = ClassWithIp("2002:db8::/30")
obj1_hash = DeepHashPrep(obj1)
repr(obj1_hash) # shouldn't raise error
assert r"objClassWithIp:{str:field:iprange:2002:db8::/30}" == obj1_hash[obj1]
obj2 = ClassWithIp("2001:db8::/32")
diff = DeepDiff(obj1, obj2)
assert {
"values_changed": {
"root.field": {
"new_value": ipaddress.IPv6Network("2001:db8::/32"),
"old_value": ipaddress.IPv6Network("2002:db8::/30"),
}
}
} == diff
Loading