|  | 
| 5 | 5 | import pytest | 
| 6 | 6 | 
 | 
| 7 | 7 | from .chunker import cf | 
| 8 |  | -from ..chunker import Chunker, ChunkerFixed, sparsemap, has_seek_hole | 
|  | 8 | +from ..chunker import Chunker, ChunkerFixed, sparsemap, has_seek_hole, get_chunker | 
| 9 | 9 | from ..constants import *  # NOQA | 
| 10 | 10 | 
 | 
| 11 | 11 | BS = 4096  # fs block size | 
| @@ -161,3 +161,44 @@ def test_buzhash_chunksize_distribution(): | 
| 161 | 161 |     # most chunks should be cut due to buzhash triggering, not due to clipping at min/max size: | 
| 162 | 162 |     assert min_count < 10 | 
| 163 | 163 |     assert max_count < 10 | 
|  | 164 | + | 
|  | 165 | + | 
|  | 166 | +@pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1") | 
|  | 167 | +@pytest.mark.parametrize("worker", range(os.cpu_count() or 1)) | 
|  | 168 | +def test_fuzz_chunkify(worker): | 
|  | 169 | +    # Fuzz the chunker with random and all-zero data of misc. sizes and seeds 0 or random int32 values. | 
|  | 170 | +    import random | 
|  | 171 | + | 
|  | 172 | +    def rnd_int32(): | 
|  | 173 | +        uint = random.getrandbits(32) | 
|  | 174 | +        return uint if uint < 2**31 else uint - 2**32 | 
|  | 175 | + | 
|  | 176 | +    seeds = [0] + [rnd_int32() for _ in range(50)] | 
|  | 177 | +    sizes = [random.randint(1, 4 * 1024 * 1024) for _ in range(50)] | 
|  | 178 | + | 
|  | 179 | +    for seed in seeds: | 
|  | 180 | +        chunker = get_chunker(*CHUNKER_PARAMS, seed=seed) | 
|  | 181 | +        for size in sizes: | 
|  | 182 | +            # Random data | 
|  | 183 | +            data = os.urandom(size) | 
|  | 184 | +            with BytesIO(data) as bio: | 
|  | 185 | +                parts = cf(chunker.chunkify(bio)) | 
|  | 186 | +            reconstructed = b"".join(parts) | 
|  | 187 | +            print(seed, size) | 
|  | 188 | +            assert reconstructed == data | 
|  | 189 | + | 
|  | 190 | +            # All-same data | 
|  | 191 | +            data = b"\x42" * size | 
|  | 192 | +            with BytesIO(data) as bio: | 
|  | 193 | +                parts = cf(chunker.chunkify(bio)) | 
|  | 194 | +            reconstructed = b"".join(parts) | 
|  | 195 | +            print(seed, size) | 
|  | 196 | +            assert reconstructed == data | 
|  | 197 | + | 
|  | 198 | +            # All-zero data | 
|  | 199 | +            data = b"\x00" * size | 
|  | 200 | +            with BytesIO(data) as bio: | 
|  | 201 | +                parts = cf(chunker.chunkify(bio)) | 
|  | 202 | +            # parts has just the integer sizes of each all-zero chunk (since fmap is not used, they are data) | 
|  | 203 | +            print(seed, size) | 
|  | 204 | +            assert sum(parts) == size | 
0 commit comments