Skip to content

Commit c2c893d

Browse files
ref(dif): Replace ChunkedDifMatch with generic ChunkedObject
So that we can reuse the code for chunk uploading difs for other file types as well, replace the `ChunkedDifMatch` struct with a similar, generic type called `ChunkedObject`. The code path is not fully generic yet, but it is a step in the right direction. ref #2195
1 parent 1395de8 commit c2c893d

File tree

3 files changed

+129
-108
lines changed

3 files changed

+129
-108
lines changed

src/utils/chunks.rs renamed to src/utils/chunks/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55
//!
66
//! See `BatchedSliceExt::batches` for more information.
77
8+
mod types;
9+
10+
pub use types::{Chunked, MissingObjectsInfo};
11+
812
use std::sync::Arc;
913
use std::time::Duration;
1014

src/utils/chunks/types.rs

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
//! Contains data types used in the chunk upload process.
2+
3+
use anyhow::Result;
4+
use sha1_smol::Digest;
5+
6+
use crate::utils::chunks::Chunk;
7+
use crate::utils::fs;
8+
9+
/// Information returned by `assemble_difs` containing flat lists of incomplete
10+
/// objects and their missing chunks.
11+
pub type MissingObjectsInfo<'m, T> = (Vec<&'m Chunked<T>>, Vec<Chunk<'m>>);
12+
13+
/// Chunked arbitrary data with computed SHA1 checksums.
14+
pub struct Chunked<T> {
15+
/// Original object
16+
object: T,
17+
18+
/// SHA1 checksum of the entire object
19+
checksum: Digest,
20+
21+
/// SHA1 checksums of all chunks
22+
chunks: Vec<Digest>,
23+
24+
/// Size of a single chunk
25+
chunk_size: usize,
26+
}
27+
28+
impl<T> Chunked<T> {
29+
/// Returns the SHA1 checksum of the entire object.
30+
pub fn checksum(&self) -> Digest {
31+
self.checksum
32+
}
33+
34+
/// Returns the original object.
35+
pub fn object(&self) -> &T {
36+
&self.object
37+
}
38+
39+
/// Returns the SHA1 checksums of each chunk, in order.
40+
pub fn chunk_hashes(&self) -> &[Digest] {
41+
&self.chunks
42+
}
43+
}
44+
45+
impl<T> Chunked<T>
46+
where
47+
T: AsRef<[u8]>,
48+
{
49+
/// Creates a new `ChunkedObject` from the given object, using
50+
/// the given chunk size.
51+
pub fn from(object: T, chunk_size: usize) -> Result<Self> {
52+
let (checksum, chunks) = fs::get_sha1_checksums(object.as_ref(), chunk_size)?;
53+
Ok(Self {
54+
object,
55+
checksum,
56+
chunks,
57+
chunk_size,
58+
})
59+
}
60+
61+
/// Returns an iterator over all chunks of the object.
62+
/// The iterator yields `Chunk` objects, which contain the chunk's
63+
/// SHA1 checksum and a byte slice pointing to the chunk's data.
64+
pub fn iter_chunks(&self) -> impl Iterator<Item = Chunk<'_>> {
65+
self.object
66+
.as_ref()
67+
.chunks(self.chunk_size)
68+
.zip(self.chunk_hashes().iter())
69+
.map(|(data, checksum)| Chunk((*checksum, data)))
70+
}
71+
}

src/utils/dif_upload.rs

Lines changed: 54 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ use std::mem::transmute;
1212
use std::ops::Deref;
1313
use std::path::{Component, Path, PathBuf};
1414
use std::process::Command;
15-
use std::slice::{Chunks, Iter};
1615
use std::str;
1716
use std::thread;
1817
use std::time::{Duration, Instant};
@@ -39,10 +38,11 @@ use crate::api::{
3938
use crate::config::Config;
4039
use crate::constants::{DEFAULT_MAX_DIF_SIZE, DEFAULT_MAX_WAIT};
4140
use crate::utils::chunks::{
42-
upload_chunks, BatchedSliceExt, Chunk, ItemSize, ASSEMBLE_POLL_INTERVAL,
41+
upload_chunks, BatchedSliceExt, Chunk, Chunked, ItemSize, MissingObjectsInfo,
42+
ASSEMBLE_POLL_INTERVAL,
4343
};
4444
use crate::utils::dif::ObjectDifFeatures;
45-
use crate::utils::fs::{get_sha1_checksum, get_sha1_checksums, TempDir, TempFile};
45+
use crate::utils::fs::{get_sha1_checksum, TempDir, TempFile};
4646
use crate::utils::progress::{ProgressBar, ProgressStyle};
4747
use crate::utils::ui::{copy_with_progress, make_byte_progress_bar};
4848

@@ -52,25 +52,6 @@ pub use crate::api::DebugInfoFile;
5252
/// Fallback maximum number of chunks in a batch for the legacy upload.
5353
static MAX_CHUNKS: u64 = 64;
5454

55-
/// An iterator over chunks of data in a `ChunkedDifMatch` object.
56-
///
57-
/// This struct is returned by `ChunkedDifMatch::chunks`.
58-
struct DifChunks<'a> {
59-
checksums: Iter<'a, Digest>,
60-
iter: Chunks<'a, u8>,
61-
}
62-
63-
impl<'a> Iterator for DifChunks<'a> {
64-
type Item = Chunk<'a>;
65-
66-
fn next(&mut self) -> Option<Self::Item> {
67-
match (self.checksums.next(), self.iter.next()) {
68-
(Some(checksum), Some(data)) => Some(Chunk((*checksum, data))),
69-
(_, _) => None,
70-
}
71-
}
72-
}
73-
7455
/// A Debug Information File.
7556
///
7657
/// This is primarily used to store inside the [`DifMatch`] so does not contain any
@@ -286,6 +267,40 @@ impl fmt::Debug for DifMatch<'_> {
286267
}
287268
}
288269

270+
impl AsRef<[u8]> for DifMatch<'_> {
271+
fn as_ref(&self) -> &[u8] {
272+
self.data()
273+
}
274+
}
275+
276+
trait ToAssemble {
277+
fn to_assemble(&self, with_debug_id: bool) -> (Digest, ChunkedDifRequest<'_>);
278+
}
279+
280+
impl ToAssemble for Chunked<DifMatch<'_>> {
281+
/// Creates a tuple which can be collected into a `ChunkedDifRequest`.
282+
// Some(...) for debug_id can only be done if the ChunkedUploadCapability::Pdbs is
283+
// present, which is kind of a protocol bug. Not supplying it means more recent
284+
// sentry-cli versions keep working with ancient versions of sentry by not
285+
// triggering this protocol bug in most common situations.
286+
// See: https://github.com/getsentry/sentry-cli/issues/980
287+
// See: https://github.com/getsentry/sentry-cli/issues/1056
288+
fn to_assemble(&self, with_debug_id: bool) -> (Digest, ChunkedDifRequest<'_>) {
289+
(
290+
self.checksum(),
291+
ChunkedDifRequest {
292+
name: self.object().file_name(),
293+
debug_id: if with_debug_id {
294+
self.object().debug_id
295+
} else {
296+
None
297+
},
298+
chunks: self.chunk_hashes(),
299+
},
300+
)
301+
}
302+
}
303+
289304
/// A `DifMatch` with computed SHA1 checksum.
290305
#[derive(Debug)]
291306
struct HashedDifMatch<'data> {
@@ -320,72 +335,6 @@ impl ItemSize for HashedDifMatch<'_> {
320335
}
321336
}
322337

323-
/// A chunked `DifMatch` with computed SHA1 checksums.
324-
#[derive(Debug)]
325-
struct ChunkedDifMatch<'data> {
326-
inner: HashedDifMatch<'data>,
327-
chunks: Vec<Digest>,
328-
chunk_size: u64,
329-
}
330-
331-
impl<'data> ChunkedDifMatch<'data> {
332-
/// Slices the DIF into chunks of `chunk_size` bytes each, and computes SHA1
333-
/// checksums for every chunk as well as the entire DIF.
334-
pub fn from(inner: DifMatch<'data>, chunk_size: u64) -> Result<Self> {
335-
let (checksum, chunks) = get_sha1_checksums(inner.data(), chunk_size as usize)?;
336-
Ok(ChunkedDifMatch {
337-
inner: HashedDifMatch { inner, checksum },
338-
chunks,
339-
chunk_size,
340-
})
341-
}
342-
343-
/// Returns an iterator over all chunk checksums.
344-
pub fn checksums(&self) -> Iter<'_, Digest> {
345-
self.chunks.iter()
346-
}
347-
348-
/// Returns an iterator over all `DifChunk`s.
349-
pub fn chunks(&self) -> DifChunks<'_> {
350-
DifChunks {
351-
checksums: self.checksums(),
352-
iter: self.data().chunks(self.chunk_size as usize),
353-
}
354-
}
355-
356-
/// Creates a tuple which can be collected into a `ChunkedDifRequest`.
357-
// Some(...) for debug_id can only be done if the ChunkedUploadCapability::Pdbs is
358-
// present, which is kind of a protocol bug. Not supplying it means more recent
359-
// sentry-cli versions keep working with ancient versions of sentry by not
360-
// triggering this protocol bug in most common situations.
361-
// See: https://github.com/getsentry/sentry-cli/issues/980
362-
// See: https://github.com/getsentry/sentry-cli/issues/1056
363-
pub fn to_assemble(&self, with_debug_id: bool) -> (Digest, ChunkedDifRequest<'_>) {
364-
(
365-
self.checksum(),
366-
ChunkedDifRequest {
367-
name: self.file_name(),
368-
debug_id: if with_debug_id { self.debug_id } else { None },
369-
chunks: &self.chunks,
370-
},
371-
)
372-
}
373-
}
374-
375-
impl<'data> Deref for ChunkedDifMatch<'data> {
376-
type Target = HashedDifMatch<'data>;
377-
378-
fn deref(&self) -> &Self::Target {
379-
&self.inner
380-
}
381-
}
382-
383-
impl ItemSize for ChunkedDifMatch<'_> {
384-
fn size(&self) -> u64 {
385-
self.deref().size()
386-
}
387-
}
388-
389338
type ZipFileArchive = ZipArchive<BufReader<File>>;
390339

391340
/// A handle to the source of a potential `DifMatch` used inside `search_difs`.
@@ -469,10 +418,6 @@ impl DifSource<'_> {
469418
}
470419
}
471420

472-
/// Information returned by `assemble_difs` containing flat lists of incomplete
473-
/// DIFs and their missing chunks.
474-
type MissingDifsInfo<'data, 'm> = (Vec<&'m ChunkedDifMatch<'data>>, Vec<Chunk<'m>>);
475-
476421
/// Verifies that the given path contains a ZIP file and opens it.
477422
fn try_open_zip<P>(path: P) -> Result<Option<ZipFileArchive>>
478423
where
@@ -1268,9 +1213,9 @@ fn create_il2cpp_mappings<'a>(difs: &[DifMatch<'a>]) -> Result<Vec<DifMatch<'a>>
12681213
/// The returned value contains separate vectors for incomplete DIFs and
12691214
/// missing chunks for convenience.
12701215
fn try_assemble_difs<'data, 'm>(
1271-
difs: &'m [ChunkedDifMatch<'data>],
1216+
difs: &'m [Chunked<DifMatch<'data>>],
12721217
options: &DifUpload,
1273-
) -> Result<MissingDifsInfo<'data, 'm>> {
1218+
) -> Result<MissingObjectsInfo<'m, DifMatch<'data>>> {
12741219
let api = Api::current();
12751220
let request = difs
12761221
.iter()
@@ -1288,7 +1233,7 @@ fn try_assemble_difs<'data, 'm>(
12881233
// nicer.
12891234
let difs_by_checksum = difs
12901235
.iter()
1291-
.map(|m| (m.checksum, m))
1236+
.map(|m| (m.checksum(), m))
12921237
.collect::<BTreeMap<_, _>>();
12931238

12941239
let mut difs = Vec::new();
@@ -1317,7 +1262,7 @@ fn try_assemble_difs<'data, 'm>(
13171262
// will have to call `try_assemble_difs` again after uploading
13181263
// them.
13191264
let mut missing_chunks = chunked_match
1320-
.chunks()
1265+
.iter_chunks()
13211266
.filter(|&Chunk((c, _))| file_response.missing_chunks.contains(&c))
13221267
.peekable();
13231268

@@ -1346,11 +1291,11 @@ fn try_assemble_difs<'data, 'm>(
13461291
/// `chunk_options`.
13471292
///
13481293
/// This function blocks until all chunks have been uploaded.
1349-
fn upload_missing_chunks(
1350-
missing_info: &MissingDifsInfo<'_, '_>,
1294+
fn upload_missing_chunks<T>(
1295+
missing_info: &MissingObjectsInfo<'_, T>,
13511296
chunk_options: &ChunkUploadOptions,
13521297
) -> Result<()> {
1353-
let (difs, chunks) = missing_info;
1298+
let (objects, chunks) = missing_info;
13541299

13551300
// Chunks might be empty if errors occurred in a previous upload. We do
13561301
// not need to render a progress bar or perform an upload in this case.
@@ -1362,17 +1307,17 @@ fn upload_missing_chunks(
13621307
"{} Uploading {} missing debug information file{}...\
13631308
\n{{wide_bar}} {{bytes}}/{{total_bytes}} ({{eta}})",
13641309
style(">").dim(),
1365-
style(difs.len().to_string()).yellow(),
1366-
if difs.len() == 1 { "" } else { "s" }
1310+
style(objects.len().to_string()).yellow(),
1311+
if objects.len() == 1 { "" } else { "s" }
13671312
));
13681313

13691314
upload_chunks(chunks, chunk_options, progress_style)?;
13701315

13711316
println!(
13721317
"{} Uploaded {} missing debug information {}",
13731318
style(">").dim(),
1374-
style(difs.len().to_string()).yellow(),
1375-
match difs.len() {
1319+
style(objects.len().to_string()).yellow(),
1320+
match objects.len() {
13761321
1 => "file",
13771322
_ => "files",
13781323
}
@@ -1408,7 +1353,7 @@ fn render_detail(detail: &Option<String>, fallback: Option<&str>) {
14081353
/// This function assumes that all chunks have been uploaded successfully. If there are still
14091354
/// missing chunks in the assemble response, this likely indicates a bug in the server.
14101355
fn poll_dif_assemble(
1411-
difs: &[&ChunkedDifMatch<'_>],
1356+
difs: &[&Chunked<DifMatch<'_>>],
14121357
options: &DifUpload,
14131358
) -> Result<(Vec<DebugInfoFile>, bool)> {
14141359
let progress_style = ProgressStyle::default_bar().template(
@@ -1490,7 +1435,7 @@ fn poll_dif_assemble(
14901435
.to_owned()
14911436
});
14921437

1493-
let difs_by_checksum: BTreeMap<_, _> = difs.iter().map(|m| (m.checksum, m)).collect();
1438+
let difs_by_checksum: BTreeMap<_, _> = difs.iter().map(|m| (m.checksum(), m)).collect();
14941439

14951440
for &(checksum, ref success) in &successes {
14961441
// Silently skip all OK entries without a "dif" record since the server
@@ -1512,6 +1457,7 @@ fn poll_dif_assemble(
15121457
// If we skip waiting for the server to finish processing, there
15131458
// are pending entries. We only expect results that have been
15141459
// uploaded in the first place, so we can skip everything else.
1460+
let dif = dif.object();
15151461
let kind = match dif.dif.get() {
15161462
ParsedDif::Object(ref object) => match object.kind() {
15171463
symbolic::debuginfo::ObjectKind::None => String::new(),
@@ -1550,7 +1496,7 @@ fn poll_dif_assemble(
15501496
.ok_or_else(|| format_err!("Server returned unexpected checksum"))?;
15511497
errored.push((dif, error));
15521498
}
1553-
errored.sort_by_key(|x| x.0.file_name());
1499+
errored.sort_by_key(|x| x.0.object().file_name());
15541500

15551501
let has_errors = !errored.is_empty();
15561502
for (dif, error) in errored {
@@ -1560,7 +1506,7 @@ fn poll_dif_assemble(
15601506
_ => Some("An unknown error occurred"),
15611507
};
15621508

1563-
println!(" {:>7} {}", style("ERROR").red(), dif.file_name());
1509+
println!(" {:>7} {}", style("ERROR").red(), dif.object().file_name());
15641510
render_detail(&error.detail, fallback);
15651511
}
15661512

@@ -1600,7 +1546,7 @@ fn upload_difs_chunked(
16001546

16011547
// Calculate checksums and chunks
16021548
let chunked = prepare_difs(processed, |m| {
1603-
ChunkedDifMatch::from(m, chunk_options.chunk_size)
1549+
Chunked::from(m, chunk_options.chunk_size as usize)
16041550
})?;
16051551

16061552
// Upload missing chunks to the server and remember incomplete difs

0 commit comments

Comments
 (0)