Skip to content

Commit ae07c96

Browse files
rcannoodDriesSchaumontbentsherman
authored
Add additional test for hasher (#6198) [ci fast]
Signed-off-by: Robrecht Cannoodt <rcannood@gmail.com> Signed-off-by: Ben Sherman <bentshermann@gmail.com> Co-authored-by: DriesSchaumont <5946712+DriesSchaumont@users.noreply.github.com> Co-authored-by: Ben Sherman <bentshermann@gmail.com>
1 parent a0eeed8 commit ae07c96

File tree

2 files changed

+29
-4
lines changed

2 files changed

+29
-4
lines changed

modules/nf-commons/src/main/nextflow/util/HashBuilder.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -346,12 +346,15 @@ public FileVisitResult visitFile(Path path, BasicFileAttributes attrs) throws IO
346346
try {
347347
// the file relative base
348348
final String relPath = base.relativize(path).toString();
349-
// compute the file path hash and sum to the result hash
350-
// since the sum is commutative, the traverse order does not matter
351-
sumBytes(resultBytes, hashBytes(relPath, HashMode.STANDARD));
352349
// the file content sha-256 checksum
353350
final String sha256 = sha256Cache.get(path);
354-
sumBytes(resultBytes, hashBytes(sha256, HashMode.STANDARD));
351+
// compute the file path hash and sum to the result hash
352+
// since the sum is commutative, the traverse order does not matter
353+
// compute a hash of the (file path, file hash) pair.
354+
// since the sum is commutative, the resulting hash in `resultBytes` is invariant to the file traversal order.
355+
// however, the file path and file hash do need to be processed together,
356+
// otherwise this introduces an edge case with directories with similar contents with have the same sha (see nextflow-io/nextflow#6198)
357+
sumBytes(resultBytes, hashBytes(Map.entry(relPath, sha256), HashMode.STANDARD));
355358
return FileVisitResult.CONTINUE;
356359
}
357360
catch (ExecutionException t) {

modules/nf-commons/src/test/nextflow/util/HashBuilderTest.groovy

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,4 +131,26 @@ class HashBuilderTest extends Specification {
131131
then:
132132
hash1.hash() == hash2.hash()
133133
}
134+
135+
def 'directories with same content but different structure should yield different hashes'() {
136+
given:
137+
def folder = TestHelper.createInMemTempDir()
138+
folder.resolve('dir1').mkdir()
139+
folder.resolve('dir2').mkdir()
140+
and:
141+
folder.resolve('dir1/foo').text = "I'm foo"
142+
folder.resolve('dir1/bar').text = "I'm bar"
143+
and:
144+
// the content of these files is intentionally swapped
145+
folder.resolve('dir2/foo').text = "I'm bar"
146+
folder.resolve('dir2/bar').text = "I'm foo"
147+
148+
when:
149+
def hash1 = HashBuilder.hashDirSha256(HashBuilder.defaultHasher(), folder.resolve('dir1'), folder.resolve('dir1'))
150+
and:
151+
def hash2 = HashBuilder.hashDirSha256(HashBuilder.defaultHasher(), folder.resolve('dir2'), folder.resolve('dir2'))
152+
153+
then:
154+
hash1.hash() != hash2.hash()
155+
}
134156
}

0 commit comments

Comments
 (0)