@@ -150,11 +150,16 @@ impl<'a> BlobObject<'a> {
150
150
/// otherwise it will be copied to the blobdir first.
151
151
///
152
152
/// In order to deduplicate files that contain the same data,
153
- /// the file will be named as the hash of the file data.
153
+ /// the file will be named `<hash>.<extension>`, e.g. `ce940175885d7b78f7b7e9f1396611f.jpg`.
154
+ /// The `original_name` param is only used to get the extension.
154
155
///
155
156
/// This is done in a in way which avoids race-conditions when multiple files are
156
157
/// concurrently created.
157
- pub fn create_and_deduplicate ( context : & ' a Context , src : & Path ) -> Result < BlobObject < ' a > > {
158
+ pub fn create_and_deduplicate (
159
+ context : & ' a Context ,
160
+ src : & Path ,
161
+ original_name : & str ,
162
+ ) -> Result < BlobObject < ' a > > {
158
163
// `create_and_deduplicate{_from_bytes}()` do blocking I/O, but can still be called
159
164
// from an async context thanks to `block_in_place()`.
160
165
// Tokio's "async" I/O functions are also just thin wrappers around the blocking I/O syscalls,
@@ -180,7 +185,25 @@ impl<'a> BlobObject<'a> {
180
185
src_in_blobdir = & temp_path;
181
186
}
182
187
183
- let blob = BlobObject :: from_hash ( blobdir, file_hash ( src_in_blobdir) ?) ;
188
+ let hash = file_hash ( src_in_blobdir) ?. to_hex ( ) ;
189
+ let hash = hash. as_str ( ) ;
190
+ let hash = hash. get ( 0 ..31 ) . unwrap_or ( hash) ;
191
+ let new_file = if let Some ( extension) = Path :: new ( original_name)
192
+ . extension ( )
193
+ . filter ( |e| e. len ( ) <= 32 )
194
+ {
195
+ format ! (
196
+ "$BLOBDIR/{hash}.{}" ,
197
+ extension. to_string_lossy( ) . to_lowercase( )
198
+ )
199
+ } else {
200
+ format ! ( "$BLOBDIR/{hash}" )
201
+ } ;
202
+
203
+ let blob = BlobObject {
204
+ blobdir,
205
+ name : new_file,
206
+ } ;
184
207
let new_path = blob. to_abs_path ( ) ;
185
208
186
209
// This will also replace an already-existing file.
@@ -194,7 +217,8 @@ impl<'a> BlobObject<'a> {
194
217
195
218
/// Creates a new blob object with the file contents in `data`.
196
219
/// In order to deduplicate files that contain the same data,
197
- /// the file will be renamed to a hash of the file data.
220
+ /// the file will be named `<hash>.<extension>`, e.g. `ce940175885d7b78f7b7e9f1396611f.jpg`.
221
+ /// The `original_name` param is only used to get the extension.
198
222
///
199
223
/// The `data` will be written into the file without race-conditions.
200
224
///
@@ -203,6 +227,7 @@ impl<'a> BlobObject<'a> {
203
227
pub fn create_and_deduplicate_from_bytes (
204
228
context : & ' a Context ,
205
229
data : & [ u8 ] ,
230
+ original_name : & str ,
206
231
) -> Result < BlobObject < ' a > > {
207
232
task:: block_in_place ( || {
208
233
let blobdir = context. get_blobdir ( ) ;
@@ -213,20 +238,10 @@ impl<'a> BlobObject<'a> {
213
238
std:: fs:: write ( & temp_path, data) . context ( "writing new blobfile failed" ) ?;
214
239
} ;
215
240
216
- BlobObject :: create_and_deduplicate ( context, & temp_path)
241
+ BlobObject :: create_and_deduplicate ( context, & temp_path, original_name )
217
242
} )
218
243
}
219
244
220
- fn from_hash ( blobdir : & Path , hash : blake3:: Hash ) -> BlobObject < ' _ > {
221
- let hash = hash. to_hex ( ) ;
222
- let hash = hash. as_str ( ) ;
223
- let hash = hash. get ( 0 ..31 ) . unwrap_or ( hash) ;
224
- BlobObject {
225
- blobdir,
226
- name : format ! ( "$BLOBDIR/{hash}" ) ,
227
- }
228
- }
229
-
230
245
/// Creates a blob from a file, possibly copying it to the blobdir.
231
246
///
232
247
/// If the source file is not a path to into the blob directory
@@ -674,7 +689,7 @@ impl<'a> BlobObject<'a> {
674
689
encode_img ( & img, ofmt, & mut encoded) ?;
675
690
}
676
691
677
- self . name = BlobObject :: create_and_deduplicate_from_bytes ( context, & encoded)
692
+ self . name = BlobObject :: create_and_deduplicate_from_bytes ( context, & encoded, & name )
678
693
. context ( "failed to write recoded blob to file" ) ?
679
694
. name ;
680
695
}
@@ -905,8 +920,11 @@ mod tests {
905
920
#[ tokio:: test( flavor = "multi_thread" , worker_threads = 2 ) ]
906
921
async fn test_lowercase_ext ( ) {
907
922
let t = TestContext :: new ( ) . await ;
908
- let blob = BlobObject :: create ( & t, "foo.TXT" , b"hello" ) . await . unwrap ( ) ;
909
- assert_eq ! ( blob. as_name( ) , "$BLOBDIR/foo.txt" ) ;
923
+ let blob = BlobObject :: create_and_deduplicate_from_bytes ( & t, b"hello" , "foo.TXT" ) . unwrap ( ) ;
924
+ assert ! (
925
+ blob. as_name( ) . ends_with( ".txt" ) ,
926
+ "Blob {blob:?} should end with .txt"
927
+ ) ;
910
928
}
911
929
912
930
#[ tokio:: test( flavor = "multi_thread" , worker_threads = 2 ) ]
@@ -980,10 +998,10 @@ mod tests {
980
998
#[ tokio:: test( flavor = "multi_thread" , worker_threads = 2 ) ]
981
999
async fn test_create_long_names ( ) {
982
1000
let t = TestContext :: new ( ) . await ;
983
- let s = "1" . repeat ( 150 ) ;
984
- let blob = BlobObject :: create ( & t, & s , b"data" ) . await . unwrap ( ) ;
1001
+ let s = format ! ( "file.{}" , "a" . repeat( 100 ) ) ;
1002
+ let blob = BlobObject :: create_and_deduplicate_from_bytes ( & t, b"data" , & s ) . unwrap ( ) ;
985
1003
let blobname = blob. as_name ( ) . split ( '/' ) . last ( ) . unwrap ( ) ;
986
- assert ! ( blobname. len( ) < 128 ) ;
1004
+ assert ! ( blobname. len( ) < 70 ) ;
987
1005
}
988
1006
989
1007
#[ tokio:: test( flavor = "multi_thread" , worker_threads = 2 ) ]
@@ -1618,28 +1636,28 @@ mod tests {
1618
1636
1619
1637
let path = t. get_blobdir ( ) . join ( "anyfile.dat" ) ;
1620
1638
fs:: write ( & path, b"bla" ) . await ?;
1621
- let blob = BlobObject :: create_and_deduplicate ( & t, & path) ?;
1622
- assert_eq ! ( blob. name, "$BLOBDIR/ce940175885d7b78f7b7e9f1396611f" ) ;
1639
+ let blob = BlobObject :: create_and_deduplicate ( & t, & path, "anyfile.dat" ) ?;
1640
+ assert_eq ! ( blob. name, "$BLOBDIR/ce940175885d7b78f7b7e9f1396611f.dat " ) ;
1623
1641
assert_eq ! ( path. exists( ) , false ) ;
1624
1642
1625
1643
assert_eq ! ( fs:: read( & blob. to_abs_path( ) ) . await ?, b"bla" ) ;
1626
1644
1627
1645
fs:: write ( & path, b"bla" ) . await ?;
1628
- let blob2 = BlobObject :: create_and_deduplicate ( & t, & path) ?;
1646
+ let blob2 = BlobObject :: create_and_deduplicate ( & t, & path, "anyfile.dat" ) ?;
1629
1647
assert_eq ! ( blob2. name, blob. name) ;
1630
1648
1631
1649
let path_outside_blobdir = t. dir . path ( ) . join ( "anyfile.dat" ) ;
1632
1650
fs:: write ( & path_outside_blobdir, b"bla" ) . await ?;
1633
- let blob3 = BlobObject :: create_and_deduplicate ( & t, & path_outside_blobdir) ?;
1651
+ let blob3 = BlobObject :: create_and_deduplicate ( & t, & path_outside_blobdir, "anyfile.dat" ) ?;
1634
1652
assert ! ( path_outside_blobdir. exists( ) ) ;
1635
1653
assert_eq ! ( blob3. name, blob. name) ;
1636
1654
1637
1655
fs:: write ( & path, b"blabla" ) . await ?;
1638
- let blob4 = BlobObject :: create_and_deduplicate ( & t, & path) ?;
1656
+ let blob4 = BlobObject :: create_and_deduplicate ( & t, & path, "anyfile.dat" ) ?;
1639
1657
assert_ne ! ( blob4. name, blob. name) ;
1640
1658
1641
1659
fs:: remove_dir_all ( t. get_blobdir ( ) ) . await ?;
1642
- let blob5 = BlobObject :: create_and_deduplicate ( & t, & path_outside_blobdir) ?;
1660
+ let blob5 = BlobObject :: create_and_deduplicate ( & t, & path_outside_blobdir, "anyfile.dat" ) ?;
1643
1661
assert_eq ! ( blob5. name, blob. name) ;
1644
1662
1645
1663
Ok ( ( ) )
@@ -1650,7 +1668,7 @@ mod tests {
1650
1668
let t = TestContext :: new ( ) . await ;
1651
1669
1652
1670
fs:: remove_dir ( t. get_blobdir ( ) ) . await ?;
1653
- let blob = BlobObject :: create_and_deduplicate_from_bytes ( & t, b"bla" ) ?;
1671
+ let blob = BlobObject :: create_and_deduplicate_from_bytes ( & t, b"bla" , "file" ) ?;
1654
1672
assert_eq ! ( blob. name, "$BLOBDIR/ce940175885d7b78f7b7e9f1396611f" ) ;
1655
1673
1656
1674
assert_eq ! ( fs:: read( & blob. to_abs_path( ) ) . await ?, b"bla" ) ;
@@ -1662,7 +1680,7 @@ mod tests {
1662
1680
// which we can't mock from our code.
1663
1681
tokio:: time:: sleep ( Duration :: from_millis ( 1100 ) ) . await ;
1664
1682
1665
- let blob2 = BlobObject :: create_and_deduplicate_from_bytes ( & t, b"bla" ) ?;
1683
+ let blob2 = BlobObject :: create_and_deduplicate_from_bytes ( & t, b"bla" , "file" ) ?;
1666
1684
assert_eq ! ( blob2. name, blob. name) ;
1667
1685
1668
1686
let modified2 = blob. to_abs_path ( ) . metadata ( ) ?. modified ( ) ?;
@@ -1675,15 +1693,15 @@ mod tests {
1675
1693
sql:: housekeeping ( & t) . await ?;
1676
1694
assert_eq ! ( blob2. to_abs_path( ) . exists( ) , false ) ;
1677
1695
1678
- let blob3 = BlobObject :: create_and_deduplicate_from_bytes ( & t, b"blabla" ) ?;
1696
+ let blob3 = BlobObject :: create_and_deduplicate_from_bytes ( & t, b"blabla" , "file" ) ?;
1679
1697
assert_ne ! ( blob3. name, blob. name) ;
1680
1698
1681
1699
{
1682
1700
// If something goes wrong and the blob file is overwritten,
1683
1701
// the correct content should be restored:
1684
1702
fs:: write ( blob3. to_abs_path ( ) , b"bloblo" ) . await ?;
1685
1703
1686
- let blob4 = BlobObject :: create_and_deduplicate_from_bytes ( & t, b"blabla" ) ?;
1704
+ let blob4 = BlobObject :: create_and_deduplicate_from_bytes ( & t, b"blabla" , "file" ) ?;
1687
1705
let blob4_content = fs:: read ( blob4. to_abs_path ( ) ) . await ?;
1688
1706
assert_eq ! ( blob4_content, b"blabla" ) ;
1689
1707
}
0 commit comments