@@ -221,6 +221,7 @@ impl core::fmt::Debug for ShortFileName {
221
221
}
222
222
223
223
/// Used to store a Long File Name
224
+ #[ derive( Debug ) ]
224
225
pub struct LfnBuffer < ' a > {
225
226
/// We fill this buffer in from the back
226
227
inner : & ' a mut [ u8 ] ,
@@ -230,6 +231,8 @@ pub struct LfnBuffer<'a> {
230
231
free : usize ,
231
232
/// Did we overflow?
232
233
overflow : bool ,
234
+ /// If a surrogate-pair is split over two directory entries, remember half of it here.
235
+ unpaired_surrogate : Option < u16 > ,
233
236
}
234
237
235
238
impl < ' a > LfnBuffer < ' a > {
@@ -240,19 +243,34 @@ impl<'a> LfnBuffer<'a> {
240
243
inner : storage,
241
244
free : len,
242
245
overflow : false ,
246
+ unpaired_surrogate : None ,
243
247
}
244
248
}
245
249
246
250
/// Empty out this buffer
247
251
pub fn clear ( & mut self ) {
248
252
self . free = self . inner . len ( ) ;
249
253
self . overflow = false ;
254
+ self . unpaired_surrogate = None ;
250
255
}
251
256
252
- /// Push the 13 UCS-2 characters into this string
257
+ /// Push the 13 UTF-16 codepoints into this string.
253
258
///
254
259
/// We assume they are pushed last-chunk-first, as you would find
255
260
/// them on disk.
261
+ ///
262
+ /// Any chunk starting with a half of a surrogate pair has that saved for the next call.
263
+ ///
264
+ /// ```text
265
+ /// [de00, 002e, 0074, 0078, 0074, 0000, ffff, ffff, ffff, ffff, ffff, ffff, ffff]
266
+ /// [0041, 0042, 0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037, 0038, 0039, d83d]
267
+ ///
268
+ /// Would map to
269
+ ///
270
+ /// 0041 0042 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 1f600 002e 0074 0078 0074, or
271
+ ///
272
+ /// "AB0123456789😀.txt"
273
+ /// ```
256
274
pub fn push ( & mut self , buffer : & [ u16 ; 13 ] ) {
257
275
// find the first null, if any
258
276
let null_idx = buffer
@@ -261,25 +279,70 @@ impl<'a> LfnBuffer<'a> {
261
279
. unwrap_or ( buffer. len ( ) ) ;
262
280
// take all the wide chars, up to the null (or go to the end)
263
281
let buffer = & buffer[ 0 ..null_idx] ;
264
- for ch in buffer. iter ( ) . rev ( ) {
265
- let ch = char:: from_u32 ( * ch as u32 ) . unwrap_or ( '?' ) ;
282
+
283
+ // This next part will convert the 16-bit values into chars, noting that
284
+ // chars outside the Basic Multilingual Plane will require two 16-bit
285
+ // values to encode (see UTF-16 Surrogate Pairs).
286
+ //
287
+ // We cache the decoded chars into this array so we can iterate them
288
+ // backwards. It's 60 bytes, but it'll have to do.
289
+ let mut char_vec: heapless:: Vec < char , 13 > = heapless:: Vec :: new ( ) ;
290
+ // Now do the decode, including the unpaired surrogate (if any) from
291
+ // last time (maybe it has a pair now!)
292
+ let mut is_first = true ;
293
+ for ch in char:: decode_utf16 (
294
+ buffer
295
+ . iter ( )
296
+ . cloned ( )
297
+ . chain ( self . unpaired_surrogate . take ( ) . iter ( ) . cloned ( ) ) ,
298
+ ) {
299
+ match ch {
300
+ Ok ( ch) => {
301
+ char_vec. push ( ch) . expect ( "Vec was full!?" ) ;
302
+ }
303
+ Err ( e) => {
304
+ // OK, so we found half a surrogate pair and nothing to go
305
+ // with it. Was this the first codepoint in the chunk?
306
+ if is_first {
307
+ // it was - the other half is probably in the next chunk
308
+ // so save this for next time
309
+ trace ! ( "LFN saved {:?}" , e. unpaired_surrogate( ) ) ;
310
+ self . unpaired_surrogate = Some ( e. unpaired_surrogate ( ) ) ;
311
+ } else {
312
+ // it wasn't - can't deal with it these mid-sequence, so
313
+ // replace it
314
+ trace ! ( "LFN replaced {:?}" , e. unpaired_surrogate( ) ) ;
315
+ char_vec. push ( '\u{fffd}' ) . expect ( "Vec was full?!" ) ;
316
+ }
317
+ }
318
+ }
319
+ is_first = false ;
320
+ }
321
+
322
+ for ch in char_vec. iter ( ) . rev ( ) {
266
323
trace ! ( "LFN push {:?}" , ch) ;
267
- let mut ch_bytes = [ 0u8 ; 4 ] ;
268
- // a buffer of length 4 is always enough
269
- let ch_str = ch. encode_utf8 ( & mut ch_bytes) ;
270
- if self . free < ch_str. len ( ) {
324
+ // a buffer of length 4 is enough to encode any char
325
+ let mut encoded_ch = [ 0u8 ; 4 ] ;
326
+ let encoded_ch = ch. encode_utf8 ( & mut encoded_ch) ;
327
+ if self . free < encoded_ch. len ( ) {
328
+ // the LFN buffer they gave us was not long enough. Note for
329
+ // later, so we don't show them garbage.
271
330
self . overflow = true ;
272
331
return ;
273
332
}
274
- // store the encoded character in the buffer, working backwards
275
- for b in ch_str. bytes ( ) . rev ( ) {
333
+ // Store the encoded char in the buffer, working backwards. We
334
+ // already checked there was enough space.
335
+ for b in encoded_ch. bytes ( ) . rev ( ) {
276
336
self . free -= 1 ;
277
337
self . inner [ self . free ] = b;
278
338
}
279
339
}
280
340
}
281
341
282
342
/// View this LFN buffer as a string-slice
343
+ ///
344
+ /// If the buffer overflowed while parsing the LFN, or if this buffer is
345
+ /// empty, you get an empty string.
283
346
pub fn as_str ( & self ) -> & str {
284
347
if self . overflow {
285
348
""
@@ -418,6 +481,22 @@ mod test {
418
481
] ) ;
419
482
assert_eq ! ( buf. as_str( ) , "ABCDEFGHIJKLM0123∂" ) ;
420
483
}
484
+
485
+ #[ test]
486
+ fn two_piece_split_surrogate ( ) {
487
+ let mut storage = [ 0u8 ; 64 ] ;
488
+ let mut buf: LfnBuffer = LfnBuffer :: new ( & mut storage) ;
489
+
490
+ buf. push ( & [
491
+ 0xde00 , 0x002e , 0x0074 , 0x0078 , 0x0074 , 0x0000 , 0xffff , 0xffff , 0xffff , 0xffff , 0xffff ,
492
+ 0xffff , 0xffff ,
493
+ ] ) ;
494
+ buf. push ( & [
495
+ 0xd83d , 0xde00 , 0x0030 , 0x0031 , 0x0032 , 0x0033 , 0x0034 , 0x0035 , 0x0036 , 0x0037 , 0x0038 ,
496
+ 0x0039 , 0xd83d ,
497
+ ] ) ;
498
+ assert_eq ! ( buf. as_str( ) , "😀0123456789😀.txt" ) ;
499
+ }
421
500
}
422
501
423
502
// ****************************************************************************
0 commit comments