Skip to content

Commit b91fb89

Browse files
Merge pull request #9002 from ThomasWaldmann/chunker-fuzzing-test-1.4
fuzzing test for default chunker
2 parents f8d9f8c + 9485564 commit b91fb89

File tree

1 file changed

+42
-1
lines changed

1 file changed

+42
-1
lines changed

src/borg/testsuite/chunker_pytest.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import pytest
66

77
from .chunker import cf
8-
from ..chunker import Chunker, ChunkerFixed, sparsemap, has_seek_hole
8+
from ..chunker import Chunker, ChunkerFixed, sparsemap, has_seek_hole, get_chunker
99
from ..constants import * # NOQA
1010

1111
BS = 4096 # fs block size
@@ -161,3 +161,44 @@ def test_buzhash_chunksize_distribution():
161161
# most chunks should be cut due to buzhash triggering, not due to clipping at min/max size:
162162
assert min_count < 10
163163
assert max_count < 10
164+
165+
166+
@pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1")
167+
@pytest.mark.parametrize("worker", range(os.cpu_count() or 1))
168+
def test_fuzz_chunkify(worker):
169+
# Fuzz the chunker with random and all-zero data of misc. sizes and seeds 0 or random int32 values.
170+
import random
171+
172+
def rnd_int32():
173+
uint = random.getrandbits(32)
174+
return uint if uint < 2**31 else uint - 2**32
175+
176+
seeds = [0] + [rnd_int32() for _ in range(50)]
177+
sizes = [random.randint(1, 4 * 1024 * 1024) for _ in range(50)]
178+
179+
for seed in seeds:
180+
chunker = get_chunker(*CHUNKER_PARAMS, seed=seed)
181+
for size in sizes:
182+
# Random data
183+
data = os.urandom(size)
184+
with BytesIO(data) as bio:
185+
parts = cf(chunker.chunkify(bio))
186+
reconstructed = b"".join(parts)
187+
print(seed, size)
188+
assert reconstructed == data
189+
190+
# All-same data
191+
data = b"\x42" * size
192+
with BytesIO(data) as bio:
193+
parts = cf(chunker.chunkify(bio))
194+
reconstructed = b"".join(parts)
195+
print(seed, size)
196+
assert reconstructed == data
197+
198+
# All-zero data
199+
data = b"\x00" * size
200+
with BytesIO(data) as bio:
201+
parts = cf(chunker.chunkify(bio))
202+
# parts has just the integer sizes of each all-zero chunk (since fmap is not used, they are data)
203+
print(seed, size)
204+
assert sum(parts) == size

0 commit comments

Comments
 (0)