@@ -318,17 +318,18 @@ use std::fs;
318
318
use std:: hash:: { self , Hasher } ;
319
319
use std:: io:: { self , Read } ;
320
320
use std:: path:: { Path , PathBuf } ;
321
- use std:: str;
321
+ use std:: str:: { self , FromStr } ;
322
322
use std:: sync:: { Arc , Mutex } ;
323
323
use std:: time:: SystemTime ;
324
324
325
325
use anyhow:: { bail, format_err} ;
326
326
use filetime:: FileTime ;
327
- use fxhash:: FxHasher ;
328
327
use log:: { debug, info} ;
328
+ use md5:: { Digest , Md5 } ;
329
329
use serde:: de;
330
330
use serde:: ser;
331
331
use serde:: { Deserialize , Serialize } ;
332
+ use sha1:: Sha1 ;
332
333
333
334
use crate :: core:: compiler:: unit_graph:: UnitDep ;
334
335
use crate :: core:: Package ;
@@ -344,8 +345,14 @@ use super::job::{
344
345
} ;
345
346
use super :: { BuildContext , Context , FileFlavor , Unit } ;
346
347
347
- type FileSize = u32 ;
348
- type FileHash = u64 ;
348
+ // While source files can't currently be > 4Gb, bin files could be.
349
+ pub type FileSize = u64 ;
350
+
351
+ #[ derive( Clone ) ]
352
+ pub struct FileHash {
353
+ kind : SourceFileHashAlgorithm ,
354
+ hash : String ,
355
+ }
349
356
350
357
/// Determines if a `unit` is up-to-date, and if not prepares necessary work to
351
358
/// update the persisted fingerprint.
@@ -751,13 +758,22 @@ impl LocalFingerprint {
751
758
LocalFingerprint :: RerunIfChanged { output, paths } => {
752
759
let c: Vec < _ > = paths
753
760
. iter ( )
754
- . map ( |p| ( pkg_root. join ( p) , 0u32 , 0u64 ) )
761
+ . map ( |p| {
762
+ (
763
+ pkg_root. join ( p) ,
764
+ 0u64 ,
765
+ FileHash {
766
+ kind : SourceFileHashAlgorithm :: Md5 ,
767
+ hash : String :: new ( ) ,
768
+ } ,
769
+ )
770
+ } )
755
771
. collect ( ) ;
756
772
Ok ( find_stale_file (
757
773
config,
758
774
mtime_cache,
759
775
& target_root. join ( output) ,
760
- & c ,
776
+ c . as_slice ( ) ,
761
777
) )
762
778
}
763
779
@@ -1706,7 +1722,7 @@ fn find_stale_file(
1706
1722
config : & Config ,
1707
1723
mtime_cache : & mut HashMap < PathBuf , ( FileTime , FileSize , FileHash ) > ,
1708
1724
reference : & Path ,
1709
- paths : & [ ( PathBuf , u32 , u64 ) ] ,
1725
+ paths : & [ ( PathBuf , FileSize , FileHash ) ] ,
1710
1726
) -> Option < StaleItem > {
1711
1727
let reference_mtime = match paths:: mtime ( reference) {
1712
1728
Ok ( mtime) => mtime,
@@ -1716,7 +1732,7 @@ fn find_stale_file(
1716
1732
for ( path, reference_size, reference_hash) in paths {
1717
1733
let path = & path;
1718
1734
let ( path_mtime, path_size, path_hash) = match mtime_cache. entry ( path. to_path_buf ( ) ) {
1719
- Entry :: Occupied ( o) => * o. get ( ) ,
1735
+ Entry :: Occupied ( o) => o. get ( ) . clone ( ) , //FIXME? do we need to clone here?
1720
1736
Entry :: Vacant ( v) => {
1721
1737
let mtime = match paths:: mtime ( path) {
1722
1738
Ok ( mtime) => mtime,
@@ -1725,13 +1741,21 @@ fn find_stale_file(
1725
1741
let current_size = if config. cli_unstable ( ) . hash_tracking {
1726
1742
match std:: fs:: metadata ( path) {
1727
1743
// For file difference checking just check the lower bits of file size
1728
- Ok ( metadata) => metadata. len ( ) as u32 ,
1744
+ Ok ( metadata) => metadata. len ( ) ,
1729
1745
Err ( ..) => return Some ( StaleItem :: MissingFile ( path. to_path_buf ( ) ) ) , //todo
1730
1746
}
1731
1747
} else {
1732
1748
0
1733
1749
} ;
1734
- * v. insert ( ( mtime, current_size, 0u64 ) ) // Hash calculated only if needed later.
1750
+ v. insert ( (
1751
+ mtime,
1752
+ current_size,
1753
+ FileHash {
1754
+ kind : SourceFileHashAlgorithm :: Md5 ,
1755
+ hash : String :: new ( ) ,
1756
+ } ,
1757
+ ) )
1758
+ . clone ( ) // Hash calculated only if needed later.
1735
1759
}
1736
1760
} ;
1737
1761
@@ -1773,21 +1797,40 @@ fn find_stale_file(
1773
1797
1774
1798
// Same size but mtime is different. Probably there's no change...
1775
1799
// compute hash and compare to prevent change cascade...
1776
- if config. cli_unstable ( ) . hash_tracking && * reference_hash > 0 {
1800
+ if config. cli_unstable ( ) . hash_tracking && reference_hash. hash . len ( ) > 0 {
1801
+ // FIXME? We could fail a little faster by seeing if any size discrepencies on _any_ file before checking hashes.
1802
+ // but not sure it's worth the additional complexity.
1777
1803
//FIXME put the result in the mtime_cache rather than hashing each time!
1778
1804
let mut reader = io:: BufReader :: new ( fs:: File :: open ( & path) . unwrap ( ) ) ; //FIXME
1779
- let mut hasher = FxHasher :: default ( ) ;
1780
- let mut buffer = [ 0 ; 1024 ] ;
1781
- loop {
1782
- let count = reader. read ( & mut buffer) . unwrap ( ) ; //FIXME
1783
- if count == 0 {
1784
- break ;
1805
+
1806
+ let hash = match reference_hash. kind {
1807
+ SourceFileHashAlgorithm :: Md5 => {
1808
+ let mut hasher = Md5 :: new ( ) ;
1809
+ let mut buffer = [ 0 ; 1024 ] ;
1810
+ loop {
1811
+ let count = reader. read ( & mut buffer) . unwrap ( ) ; //FIXME
1812
+ if count == 0 {
1813
+ break ;
1814
+ }
1815
+ hasher. input ( & buffer[ ..count] ) ;
1816
+ }
1817
+ format ! ( "{:?}" , hasher. result( ) )
1785
1818
}
1786
- hasher. write ( & buffer[ ..count] ) ;
1787
- }
1788
- let hash = hasher. finish ( ) ;
1819
+ SourceFileHashAlgorithm :: Sha1 => {
1820
+ let mut hasher = Sha1 :: new ( ) ;
1821
+ let mut buffer = [ 0 ; 1024 ] ;
1822
+ loop {
1823
+ let count = reader. read ( & mut buffer) . unwrap ( ) ; //FIXME
1824
+ if count == 0 {
1825
+ break ;
1826
+ }
1827
+ hasher. input ( & buffer[ ..count] ) ;
1828
+ }
1829
+ format ! ( "{:?}" , hasher. result( ) )
1830
+ }
1831
+ } ;
1789
1832
1790
- if hash == * reference_hash {
1833
+ if hash == reference_hash. hash {
1791
1834
continue ;
1792
1835
}
1793
1836
}
@@ -1807,6 +1850,24 @@ fn find_stale_file(
1807
1850
None
1808
1851
}
1809
1852
1853
+ #[ derive( Clone , Copy , Eq , PartialEq ) ]
1854
+ pub enum SourceFileHashAlgorithm {
1855
+ Md5 ,
1856
+ Sha1 ,
1857
+ }
1858
+
1859
+ impl FromStr for SourceFileHashAlgorithm {
1860
+ type Err = ( ) ;
1861
+
1862
+ fn from_str ( s : & str ) -> Result < SourceFileHashAlgorithm , ( ) > {
1863
+ match s {
1864
+ "md5" => Ok ( SourceFileHashAlgorithm :: Md5 ) ,
1865
+ "sha1" => Ok ( SourceFileHashAlgorithm :: Sha1 ) ,
1866
+ _ => Err ( ( ) ) ,
1867
+ }
1868
+ }
1869
+ }
1870
+
1810
1871
enum DepInfoPathType {
1811
1872
// src/, e.g. src/lib.rs
1812
1873
PackageRootRelative ,
@@ -1914,7 +1975,7 @@ pub fn translate_dep_info(
1914
1975
pub struct RustcDepInfo {
1915
1976
/// The list of files that the main target in the dep-info file depends on.
1916
1977
/// and lower 32bits of size and hash (or 0 if not there).
1917
- pub files : Vec < ( PathBuf , u32 , u64 ) > , //FIXME use Option<NonZeroU32 > instead?
1978
+ pub files : Vec < ( PathBuf , FileSize , FileHash ) > , //FIXME use Option<NonZero > instead?
1918
1979
/// The list of environment variables we found that the rustc compilation
1919
1980
/// depends on.
1920
1981
///
@@ -1939,21 +2000,34 @@ struct EncodedDepInfo {
1939
2000
impl EncodedDepInfo {
1940
2001
fn parse ( mut bytes : & [ u8 ] ) -> Option < EncodedDepInfo > {
1941
2002
let bytes = & mut bytes;
1942
- let nfiles = read_usize ( bytes) ? ;
2003
+ let nfiles = read_usize ( bytes) . unwrap ( ) ;
1943
2004
let mut files = Vec :: with_capacity ( nfiles as usize ) ;
1944
2005
for _ in 0 ..nfiles {
1945
2006
//FIXME: backward compatibility!!!
1946
- let size = read_usize ( bytes) ? as FileSize ;
2007
+ let size = read_u64 ( bytes) ? as FileSize ;
1947
2008
//debug!("read size as {}", size);
1948
- let hash = read_u64 ( bytes) ?;
2009
+ let hash_buf = read_bytes ( bytes) ?;
2010
+
2011
+ let hash = String :: from_utf8 ( hash_buf. to_vec ( ) ) . unwrap ( ) ;
2012
+
1949
2013
//debug!("read hash as {}", hash);
2014
+ let kind = match read_u8 ( bytes) ? {
2015
+ 0 => SourceFileHashAlgorithm :: Md5 ,
2016
+ 1 => SourceFileHashAlgorithm :: Sha1 ,
2017
+ _ => return None ,
2018
+ } ;
1950
2019
let ty = match read_u8 ( bytes) ? {
1951
2020
0 => DepInfoPathType :: PackageRootRelative ,
1952
2021
1 => DepInfoPathType :: TargetRootRelative ,
1953
2022
_ => return None ,
1954
2023
} ;
1955
2024
let bytes = read_bytes ( bytes) ?;
1956
- files. push ( ( size, hash, ty, util:: bytes2path ( bytes) . ok ( ) ?) ) ;
2025
+ files. push ( (
2026
+ size,
2027
+ FileHash { kind, hash } ,
2028
+ ty,
2029
+ util:: bytes2path ( bytes) . ok ( ) ?,
2030
+ ) ) ;
1957
2031
}
1958
2032
1959
2033
let nenv = read_usize ( bytes) ?;
@@ -2015,9 +2089,14 @@ impl EncodedDepInfo {
2015
2089
write_usize ( dst, self . files . len ( ) ) ;
2016
2090
for ( size, hash, ty, file) in self . files . iter ( ) {
2017
2091
//debug!("writing depinfo size as {} ", *size as usize);
2018
- write_usize ( dst, * size as usize ) ;
2019
- //debug!("writing depinfo hash as {} ", *hash);
2020
- write_u64 ( dst, * hash) ;
2092
+ write_u64 ( dst, * size) ;
2093
+ //debug!("writing depinfo hash as {} ", hash.hash.len());
2094
+ write_bytes ( dst, hash. hash . as_bytes ( ) ) ;
2095
+ //write(dst, hash.hash);
2096
+ match hash. kind {
2097
+ SourceFileHashAlgorithm :: Md5 => dst. push ( 0 ) ,
2098
+ SourceFileHashAlgorithm :: Sha1 => dst. push ( 1 ) ,
2099
+ }
2021
2100
match ty {
2022
2101
DepInfoPathType :: PackageRootRelative => dst. push ( 0 ) ,
2023
2102
DepInfoPathType :: TargetRootRelative => dst. push ( 1 ) ,
@@ -2094,8 +2173,13 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult<RustcDepInfo>
2094
2173
if ret. files [ i] . 0 . to_string_lossy ( ) == file {
2095
2174
let parts: Vec < _ > = line[ "# size:" . len ( ) ..] . split ( " " ) . collect ( ) ;
2096
2175
ret. files [ i] . 1 = parts[ 0 ] . trim ( ) . parse ( ) ?; //FIXME do we need trims?
2097
- let hash = & parts[ 1 ] [ "hash:" . len ( ) ..] . trim ( ) ;
2098
- ret. files [ i] . 2 = hash. parse ( ) ?;
2176
+ let kind_hash: Vec < _ > = parts[ 1 ] . split ( ":" ) . collect ( ) ;
2177
+ let hash = kind_hash[ 1 ] ;
2178
+ ret. files [ i] . 2 = FileHash {
2179
+ kind : SourceFileHashAlgorithm :: from_str ( kind_hash[ 0 ] )
2180
+ . expect ( "unknown hashing algo" ) ,
2181
+ hash : hash. to_string ( ) ,
2182
+ } ;
2099
2183
break ;
2100
2184
}
2101
2185
}
@@ -2117,7 +2201,14 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult<RustcDepInfo>
2117
2201
internal ( "malformed dep-info format, trailing \\ " . to_string ( ) )
2118
2202
} ) ?) ;
2119
2203
}
2120
- ret. files . push ( ( file. into ( ) , 0 , 0 ) ) ;
2204
+ ret. files . push ( (
2205
+ file. into ( ) ,
2206
+ 0 ,
2207
+ FileHash {
2208
+ kind : SourceFileHashAlgorithm :: Md5 ,
2209
+ hash : String :: new ( ) ,
2210
+ } ,
2211
+ ) ) ;
2121
2212
}
2122
2213
} else {
2123
2214
prev_line = Some ( line) ;
0 commit comments