Skip to content

Commit e6e8bf3

Browse files
test(chunk-upload): Add a test for uploading multiple debug files (#2274)
This test ensures that the correct chunks are sent to the server when multiple debug files are being uploaded. Note that our chunk uploading code does not guarantee that the chunks appear in any particular order within the request. Only the invariant that all chunks get uploaded (in any arbitrary order) is guaranteed. Because of this, we need to parse the request body into an unordered set of all chunks, and compare this against the set of chunks we expect to receive. ref #2194
1 parent 04cdedf commit e6e8bf3

File tree

9 files changed

+207
-2
lines changed

9 files changed

+207
-2
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
This directory contains a set of three macOS Rust executables, compiled as a debug build, and thus containing debug information.
2+
We use these executables to test the chunk upload functionality of the Sentry CLI.
Binary file not shown.

tests/integration/debug_files/upload.rs

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::{fs, str};
44

55
use regex::bytes::Regex;
66

7-
use crate::integration::{AssertCommand, MockEndpointBuilder, TestManager};
7+
use crate::integration::{chunk_upload, AssertCommand, MockEndpointBuilder, TestManager};
88

99
/// This regex is used to extract the boundary from the content-type header.
1010
/// We need to match the boundary, since it changes with each request.
@@ -285,3 +285,98 @@ fn ensure_correct_chunk_upload() {
285285
.with_default_token()
286286
.run_and_assert(AssertCommand::Success);
287287
}
288+
289+
#[test]
290+
/// This test verifies a correct chunk upload of multiple debug files.
291+
fn chunk_upload_multiple_files() {
292+
let expected_chunk_body = fs::read(
293+
"tests/integration/_expected_requests/debug_files/upload/chunk_upload_multiple_files.bin",
294+
)
295+
.expect("expected chunk body file should be present");
296+
// This is the boundary used in the expected request file.
297+
// It was randomly generated when the expected request was recorded.
298+
let boundary_of_expected_request = "------------------------b26LKrHFvpOPfwMoDhYNY8";
299+
300+
let is_first_assemble_call = AtomicBool::new(true);
301+
TestManager::new()
302+
.mock_endpoint(
303+
MockEndpointBuilder::new("GET", "/api/0/organizations/wat-org/chunk-upload/")
304+
.with_response_file("debug_files/get-chunk-upload.json"),
305+
)
306+
.mock_endpoint(
307+
MockEndpointBuilder::new("POST", "/api/0/organizations/wat-org/chunk-upload/")
308+
.with_response_fn(move |request| {
309+
let boundary = chunk_upload::boundary_from_request(request)
310+
.expect("content-type header should be a valid multipart/form-data header");
311+
312+
let body = request.body().expect("body should be readable");
313+
314+
let chunks = chunk_upload::split_chunk_body(body, boundary)
315+
.expect("body should be a valid multipart/form-data body");
316+
317+
let expected_chunks = chunk_upload::split_chunk_body(
318+
&expected_chunk_body,
319+
boundary_of_expected_request,
320+
)
321+
.expect("expected chunk body is a valid multipart/form-data body");
322+
323+
// Using assert! because in case of failure, the output with assert_eq!
324+
// is too long to be useful.
325+
assert!(
326+
chunks == expected_chunks,
327+
"Uploaded chunks differ from the expected chunks"
328+
);
329+
330+
vec![]
331+
}),
332+
)
333+
.mock_endpoint(
334+
MockEndpointBuilder::new(
335+
"POST",
336+
"/api/0/projects/wat-org/wat-project/files/difs/assemble/",
337+
)
338+
.with_header_matcher("content-type", "application/json")
339+
.with_response_fn(move |_| {
340+
if is_first_assemble_call.swap(false, Ordering::Relaxed) {
341+
r#"{
342+
"6e217f035ed538d4d6c14129baad5cb52e680e74": {
343+
"state": "not_found",
344+
"missingChunks": ["6e217f035ed538d4d6c14129baad5cb52e680e74"]
345+
},
346+
"500848b7815119669a292f2ae1f44af11d7aa2d3": {
347+
"state": "not_found",
348+
"missingChunks": ["500848b7815119669a292f2ae1f44af11d7aa2d3"]
349+
},
350+
"fc27d95861d56fe16a2b66150e31652b76e8c678": {
351+
"state": "not_found",
352+
"missingChunks": ["fc27d95861d56fe16a2b66150e31652b76e8c678"]
353+
}
354+
}"#
355+
} else {
356+
r#"{
357+
"6e217f035ed538d4d6c14129baad5cb52e680e74": {
358+
"state": "created",
359+
"missingChunks": []
360+
},
361+
"500848b7815119669a292f2ae1f44af11d7aa2d3": {
362+
"state": "created",
363+
"missingChunks": []
364+
},
365+
"fc27d95861d56fe16a2b66150e31652b76e8c678": {
366+
"state": "created",
367+
"missingChunks": []
368+
}
369+
}"#
370+
}
371+
.into()
372+
})
373+
.expect(2),
374+
)
375+
.assert_cmd(vec![
376+
"debug-files",
377+
"upload",
378+
"tests/integration/_fixtures/debug_files/upload/chunk_upload_multiple_files",
379+
])
380+
.with_default_token()
381+
.run_and_assert(AssertCommand::Success);
382+
}

tests/integration/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ use std::io;
3131
use std::path::Path;
3232

3333
use test_utils::MockEndpointBuilder;
34-
use test_utils::{env, AssertCommand, ChunkOptions, ServerBehavior, TestManager};
34+
use test_utils::{chunk_upload, env, AssertCommand, ChunkOptions, ServerBehavior, TestManager};
3535

3636
pub const UTC_DATE_FORMAT: &str = r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{6,9}Z";
3737
const VERSION: &str = env!("CARGO_PKG_VERSION");
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
//! Utilities for chunk upload tests.
2+
use std::collections::HashSet;
3+
use std::error::Error;
4+
use std::str;
5+
use std::sync::LazyLock;
6+
7+
use mockito::Request;
8+
use regex::bytes::Regex;
9+
10+
/// This regex is used to extract the boundary from the content-type header.
11+
/// We need to match the boundary, since it changes with each request.
12+
/// The regex matches the format as specified in
13+
/// https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html.
14+
static CONTENT_TYPE_REGEX: LazyLock<Regex> = LazyLock::new(|| {
15+
Regex::new(
16+
r#"^multipart\/form-data; boundary=(?<boundary>[\w'\(\)+,\-\.\/:=? ]{0,69}[\w'\(\)+,\-\.\/:=?])$"#
17+
)
18+
.expect("Regex is valid")
19+
});
20+
21+
/// A trait which abstracts over accessing headers from a mock request.
22+
/// Allows future compatibility in case we switch to a different mock library.
23+
pub trait HeaderContainer {
24+
fn header(&self, header_name: &str) -> Vec<&[u8]>;
25+
}
26+
27+
impl HeaderContainer for Request {
28+
fn header(&self, header_name: &str) -> Vec<&[u8]> {
29+
self.header(header_name)
30+
.iter()
31+
.map(|h| h.as_bytes())
32+
.collect()
33+
}
34+
}
35+
36+
/// Split a multipart/form-data body into its constituent chunks.
37+
/// The chunks are returned as a set, since chunk uploading code
38+
/// does not guarantee any specific order of the chunks in the body.
39+
/// We only want to check the invariant that each expected chunk is
40+
/// in the body, not the order of the chunks.
41+
pub fn split_chunk_body<'b>(
42+
body: &'b [u8],
43+
boundary: &str,
44+
) -> Result<HashSet<&'b [u8]>, Box<dyn Error>> {
45+
let escaped_boundary = regex::escape(boundary);
46+
47+
let inner_body = entire_body_regex(&escaped_boundary)
48+
.captures(body)
49+
.ok_or("body does not match multipart form regex")?
50+
.name("body")
51+
.expect("the regex has a \"body\" capture group which should always match")
52+
.as_bytes();
53+
54+
// Using HashSet does have the small disadvantage that we don't
55+
// preserve the count of any duplicate chunks, so our tests will
56+
// fail to detect when the same chunk is included multiple times
57+
// (this would be a bug). But, this way, we don't need to keep
58+
// track of counts of chunks.
59+
Ok(boundary_regex(&escaped_boundary)
60+
.split(inner_body)
61+
.collect())
62+
}
63+
64+
/// Extract the boundary from a multipart/form-data request content-type header.
65+
/// Returns an error if the content-type header is not present exactly once,
66+
/// if the content-type does not match the multipart/form-data regex, or if the
67+
/// boundary is not valid UTF-8.
68+
pub fn boundary_from_request(request: &impl HeaderContainer) -> Result<&str, Box<dyn Error>> {
69+
let content_type_headers = request.header("content-type");
70+
71+
if content_type_headers.len() != 1 {
72+
return Err(format!(
73+
"content-type header should be present exactly once, found {} times",
74+
content_type_headers.len()
75+
)
76+
.into());
77+
}
78+
79+
let content_type = content_type_headers[0];
80+
81+
let boundary = CONTENT_TYPE_REGEX
82+
.captures(content_type)
83+
.ok_or("content-type does not match multipart/form-data regex")?
84+
.name("boundary")
85+
.expect("if the regex matches, the boundary should match as well.")
86+
.as_bytes();
87+
88+
Ok(str::from_utf8(boundary)?)
89+
}
90+
91+
/// Given the regex-escaped boundary of a multipart form, return a regex which
92+
/// should match the entire body of the form. The regex includes a named capture
93+
/// group for the body (named "body"), which includes everything from the first starting
94+
/// boundary to the final ending boundary (non-inclusive of the boundaries).
95+
/// May panic if the boundary is not regex-escaped.
96+
fn entire_body_regex(regex_escaped_boundary: &str) -> Regex {
97+
Regex::new(&format!(
98+
r#"^--{regex_escaped_boundary}(?<body>(?s-u:.*?))--{regex_escaped_boundary}--\s*$"#
99+
))
100+
.expect("This regex should be valid")
101+
}
102+
103+
/// Given the regex-escaped boundary of a multipart form, return a regex which
104+
/// matches the start of a section of the form.
105+
fn boundary_regex(regex_escaped_boundary: &str) -> Regex {
106+
Regex::new(&format!(r#"--{regex_escaped_boundary}"#)).expect("This regex should be valid")
107+
}

tests/integration/test_utils/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
//! A collection of utilities for integration tests.
22
3+
pub mod chunk_upload;
34
pub mod env;
45

56
mod mock_common_endpoints;

0 commit comments

Comments
 (0)