Skip to content

Commit 5065bee

Browse files
committed
file_checksum: use Java to compute file checksum rather than system() call
this can be slightly slower, but seems to be a better practice.
1 parent 6847287 commit 5065bee

File tree

6 files changed

+47
-9
lines changed

6 files changed

+47
-9
lines changed

+stdlib/+fileio/file_checksum.m

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
function hash = file_checksum(file, method)
2+
%% file_checkum compute checksum of file
3+
% read in chunks to avoid excessive RAM use
4+
%
5+
% method: md5, sha1, sha256, etc.
6+
%
7+
% Reference: https://docs.oracle.com/javase/8/docs/api/java/security/MessageDigest.html
8+
arguments
9+
file (1,1) string {mustBeFile}
10+
method (1,1) string {mustBeNonzeroLengthText}
11+
end
12+
13+
if any(method == ["sha256", "SHA256"])
14+
method = "SHA-256";
15+
end
16+
17+
file_chunk = 10e6; % arbitrary (bytes) didn't seem to be very sensitive for speed
18+
19+
inst = java.security.MessageDigest.getInstance(method);
20+
21+
fid = fopen(file, 'r');
22+
assert(fid > 0, "could not open " + file)
23+
24+
while ~feof(fid)
25+
inst.update(fread(fid, file_chunk, '*uint8'))
26+
end
27+
fclose(fid);
28+
29+
hash = typecast(inst.digest, 'uint8');
30+
31+
hash = string(sprintf('%.2x', hash));
32+
33+
end

+stdlib/+test/TestFileImpure.m

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,18 +67,20 @@ function test_is_exe_which_fullpath(tc)
6767

6868
function test_hash(tc)
6969
import matlab.unittest.constraints.IsFile
70+
import matlab.unittest.fixtures.TemporaryFolderFixture
7071

71-
fn = tempname;
72+
fixture = tc.applyFixture(TemporaryFolderFixture);
73+
74+
fn = fullfile(fixture.Folder, "hello");
7275
fid = fopen(fn, "w");
7376
tc.assumeGreaterThan(fid, 0);
7477
fprintf(fid, "hello");
7578
fclose(fid);
7679
tc.assumeThat(fn, IsFile)
7780

78-
tc.verifyEqual(stdlib.fileio.md5sum(fn), "5d41402abc4b2a76b9719d911017c592")
79-
tc.verifyEqual(stdlib.fileio.sha256sum(fn), "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824")
81+
tc.verifyEqual(stdlib.fileio.file_checksum(fn, "md5"), "5d41402abc4b2a76b9719d911017c592")
82+
tc.verifyEqual(stdlib.fileio.file_checksum(fn, "sha256"), "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824")
8083

81-
delete(fn)
8284
end
8385

8486
end

+stdlib/md5sum.m

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,6 @@
55
file (1,1) string {mustBeFile}
66
end
77

8-
hash = stdlib.fileio.md5sum(file);
8+
hash = stdlib.fileio.file_checksum(file, "md5");
99

1010
end

+stdlib/sha256sum.m

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,6 @@
55
file (1,1) string {mustBeFile}
66
end
77

8-
hash = stdlib.fileio.sha256sum(file);
8+
hash = stdlib.fileio.file_checksum(file, "SHA-256");
99

1010
end

+stdlib/+fileio/md5sum.m renamed to archive/md5sum.m

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
function hash = md5sum(file)
22
%% md5sum(file)
3+
% DEPRECATED: use file_checksum(file, "md5") instead
34
% compute MD5 hash of file
45
arguments
56
file (1,1) string {mustBeFile}
@@ -12,7 +13,7 @@
1213
elseif ispc
1314
[stat, hash] = system("CertUtil -hashfile " + file + " MD5");
1415
else
15-
error("no sha256sum method for your OS")
16+
error("no method for your OS")
1617
end
1718

1819
assert(stat == 0, hash, "failed to compute md5sum of " + file)

+stdlib/+fileio/sha256sum.m renamed to archive/sha256sum.m

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
function hash = sha256sum(file)
2-
2+
%% sha256sum
3+
% DEPRECATED: use file_checksum(file, "SHA-256") instead
4+
% compute sha256 checksum of filetemp
35
arguments
46
file (1,1) string {mustBeFile}
57
end
@@ -11,7 +13,7 @@
1113
elseif ispc
1214
[stat, hash] = system("CertUtil -hashfile " + file + " SHA256");
1315
else
14-
error("no sha256sum method for your OS")
16+
error("no method for your OS")
1517
end
1618

1719
assert(stat == 0, hash, "failed to compute SHA256 hash of " + file)

0 commit comments

Comments
 (0)