Skip to content

Commit a0801ec

Browse files
authored
re-use http checksums on retry attempts (#4200)
## Description A change was needed for the S3 Flexible Checksum SEP to add guidance for reusing checksums during retry attempts to prevent data corruption. When a request fails after checksum calculation is complete, SDKs must save and reuse the checksum for retry attempts rather than recalculating it. This prevents inconsistencies when payload content might change between retries, ensuring data durability in S3. * Adds a simple cache to the checksum crate that will favor cached checksums from prior attempts if set * Adds new integration tests to verify retry behavior and re-use of checksums NOTE: If a user were to actually replace file contents between attempts with a different content length the Rust SDK uses the original content length set on the ByteStream. This happens early when we create the bytestream by taking the user provided content length OR calculating it from the file. We don't ever attempt to recalculate this and I see no great way of doing this. The result is a client side failure about stream length mismatch as opposed to sending the request to the server with the original checksum. ## Checklist <!--- If a checkbox below is not applicable, then please DELETE it rather than leaving it unchecked --> - [x] For changes to the AWS SDK, generated SDK code, or SDK runtime crates, I have created a changelog entry Markdown file in the `.changelog` directory, specifying "aws-sdk-rust" in the `applies_to` key. ---- _By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice._
1 parent fd9ab82 commit a0801ec

File tree

13 files changed

+578
-467
lines changed

13 files changed

+578
-467
lines changed

.changelog/1751482946.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
---
2+
applies_to:
3+
- client
4+
- aws-sdk-rust
5+
authors:
6+
- aajtodd
7+
references: []
8+
breaking: false
9+
new_feature: false
10+
bug_fix: false
11+
---
12+
re-use checksums on retry attempts for enhanced durability

aws/rust-runtime/Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

aws/rust-runtime/aws-config/Cargo.lock

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

aws/rust-runtime/aws-inlineable/src/http_request_checksum.rs

Lines changed: 95 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77

88
//! Interceptor for handling Smithy `@httpChecksum` request checksumming with AWS SigV4
99
10+
use crate::presigning::PresigningMarker;
1011
use aws_runtime::auth::PayloadSigningOverride;
1112
use aws_runtime::content_encoding::header_value::AWS_CHUNKED;
1213
use aws_runtime::content_encoding::{AwsChunkedBody, AwsChunkedBodyOptions};
14+
use aws_smithy_checksums::body::ChecksumCache;
1315
use aws_smithy_checksums::ChecksumAlgorithm;
1416
use aws_smithy_checksums::{body::calculate, http::HttpChecksum};
1517
use aws_smithy_runtime::client::sdk_feature::SmithySdkFeature;
@@ -28,10 +30,11 @@ use aws_smithy_types::error::operation::BuildError;
2830
use http::HeaderValue;
2931
use http_body::Body;
3032
use std::str::FromStr;
33+
use std::sync::atomic::AtomicBool;
34+
use std::sync::atomic::Ordering;
35+
use std::sync::Arc;
3136
use std::{fmt, mem};
3237

33-
use crate::presigning::PresigningMarker;
34-
3538
/// Errors related to constructing checksum-validated HTTP requests
3639
#[derive(Debug)]
3740
pub(crate) enum Error {
@@ -64,6 +67,8 @@ struct RequestChecksumInterceptorState {
6467
checksum_algorithm: Option<String>,
6568
/// This value is set in the model on the `httpChecksum` trait
6669
request_checksum_required: bool,
70+
calculate_checksum: Arc<AtomicBool>,
71+
checksum_cache: ChecksumCache,
6772
}
6873
impl Storable for RequestChecksumInterceptorState {
6974
type Storer = StoreReplace<Self>;
@@ -150,15 +155,15 @@ where
150155
layer.store_put(RequestChecksumInterceptorState {
151156
checksum_algorithm,
152157
request_checksum_required,
158+
checksum_cache: ChecksumCache::new(),
159+
calculate_checksum: Arc::new(AtomicBool::new(false)),
153160
});
154161
cfg.push_layer(layer);
155162

156163
Ok(())
157164
}
158165

159-
/// Calculate a checksum and modify the request to include the checksum as a header
160-
/// (for in-memory request bodies) or a trailer (for streaming request bodies).
161-
/// Streaming bodies must be sized or this will return an error.
166+
/// Setup state for calculating checksum and setting UA features
162167
fn modify_before_retry_loop(
163168
&self,
164169
context: &mut BeforeTransmitInterceptorContextMut<'_>,
@@ -207,14 +212,17 @@ where
207212
_ => true,
208213
};
209214

210-
// Calculate the checksum if necessary
215+
// If a checksum override is set in the ConfigBag we use that instead (currently only used by S3Express)
216+
// If we have made it this far without a checksum being set we set the default (currently Crc32)
217+
let checksum_algorithm =
218+
incorporate_custom_default(checksum_algorithm, cfg).unwrap_or_default();
219+
211220
if calculate_checksum {
212-
// If a checksum override is set in the ConfigBag we use that instead (currently only used by S3Express)
213-
// If we have made it this far without a checksum being set we set the default (currently Crc32)
214-
let checksum_algorithm =
215-
incorporate_custom_default(checksum_algorithm, cfg).unwrap_or_default();
221+
state.calculate_checksum.store(true, Ordering::Release);
216222

217223
// Set the user-agent metric for the selected checksum algorithm
224+
// NOTE: We have to do this in modify_before_retry_loop since UA interceptor also runs
225+
// in modify_before_signing but is registered before this interceptor (client level vs operation level).
218226
match checksum_algorithm {
219227
ChecksumAlgorithm::Crc32 => {
220228
cfg.interceptor_state()
@@ -241,12 +249,46 @@ where
241249
.store_append(SmithySdkFeature::FlexibleChecksumsReqSha256);
242250
}
243251
unsupported => tracing::warn!(
244-
more_info = "Unsupported value of ChecksumAlgorithm detected when setting user-agent metrics",
245-
unsupported = ?unsupported),
252+
more_info = "Unsupported value of ChecksumAlgorithm detected when setting user-agent metrics",
253+
unsupported = ?unsupported),
246254
}
255+
}
256+
257+
Ok(())
258+
}
259+
260+
/// Calculate a checksum and modify the request to include the checksum as a header
261+
/// (for in-memory request bodies) or a trailer (for streaming request bodies).
262+
/// Streaming bodies must be sized or this will return an error.
263+
fn modify_before_signing(
264+
&self,
265+
context: &mut BeforeTransmitInterceptorContextMut<'_>,
266+
_runtime_components: &RuntimeComponents,
267+
cfg: &mut ConfigBag,
268+
) -> Result<(), BoxError> {
269+
let state = cfg
270+
.load::<RequestChecksumInterceptorState>()
271+
.expect("set in `read_before_serialization`");
272+
273+
let checksum_cache = state.checksum_cache.clone();
274+
275+
let checksum_algorithm = state
276+
.checksum_algorithm
277+
.clone()
278+
.map(|s| ChecksumAlgorithm::from_str(s.as_str()))
279+
.transpose()?;
280+
281+
let calculate_checksum = state.calculate_checksum.load(Ordering::SeqCst);
282+
283+
// Calculate the checksum if necessary
284+
if calculate_checksum {
285+
// If a checksum override is set in the ConfigBag we use that instead (currently only used by S3Express)
286+
// If we have made it this far without a checksum being set we set the default (currently Crc32)
287+
let checksum_algorithm =
288+
incorporate_custom_default(checksum_algorithm, cfg).unwrap_or_default();
247289

248290
let request = context.request_mut();
249-
add_checksum_for_request_body(request, checksum_algorithm, cfg)?;
291+
add_checksum_for_request_body(request, checksum_algorithm, checksum_cache, cfg)?;
250292
}
251293

252294
Ok(())
@@ -295,6 +337,7 @@ fn incorporate_custom_default(
295337
fn add_checksum_for_request_body(
296338
request: &mut HttpRequest,
297339
checksum_algorithm: ChecksumAlgorithm,
340+
checksum_cache: ChecksumCache,
298341
cfg: &mut ConfigBag,
299342
) -> Result<(), BoxError> {
300343
match request.body().bytes() {
@@ -308,17 +351,34 @@ fn add_checksum_for_request_body(
308351
tracing::debug!("applying {checksum_algorithm:?} of the request body as a header");
309352
checksum.update(data);
310353

311-
request
312-
.headers_mut()
313-
.insert(checksum.header_name(), checksum.header_value());
354+
let calculated_headers = checksum.headers();
355+
let checksum_headers = if let Some(cached_headers) = checksum_cache.get() {
356+
if cached_headers != calculated_headers {
357+
tracing::warn!(cached = ?cached_headers, calculated = ?calculated_headers, "calculated checksum differs from cached checksum!");
358+
}
359+
cached_headers
360+
} else {
361+
checksum_cache.set(calculated_headers.clone());
362+
calculated_headers
363+
};
364+
365+
for (hdr_name, hdr_value) in checksum_headers.iter() {
366+
request
367+
.headers_mut()
368+
.insert(hdr_name.clone(), hdr_value.clone());
369+
}
314370
}
315371
}
316372
// Body is streaming: wrap the body so it will emit a checksum as a trailer.
317373
None => {
318374
tracing::debug!("applying {checksum_algorithm:?} of the request body as a trailer");
319375
cfg.interceptor_state()
320376
.store_put(PayloadSigningOverride::StreamingUnsignedPayloadTrailer);
321-
wrap_streaming_request_body_in_checksum_calculating_body(request, checksum_algorithm)?;
377+
wrap_streaming_request_body_in_checksum_calculating_body(
378+
request,
379+
checksum_algorithm,
380+
checksum_cache.clone(),
381+
)?;
322382
}
323383
}
324384
Ok(())
@@ -327,6 +387,7 @@ fn add_checksum_for_request_body(
327387
fn wrap_streaming_request_body_in_checksum_calculating_body(
328388
request: &mut HttpRequest,
329389
checksum_algorithm: ChecksumAlgorithm,
390+
checksum_cache: ChecksumCache,
330391
) -> Result<(), BuildError> {
331392
let checksum = checksum_algorithm.into_impl();
332393

@@ -347,7 +408,8 @@ fn wrap_streaming_request_body_in_checksum_calculating_body(
347408
body.map(move |body| {
348409
let checksum = checksum_algorithm.into_impl();
349410
let trailer_len = HttpChecksum::size(checksum.as_ref());
350-
let body = calculate::ChecksumBody::new(body, checksum);
411+
let body =
412+
calculate::ChecksumBody::new(body, checksum).with_cache(checksum_cache.clone());
351413
let aws_chunked_body_options =
352414
AwsChunkedBodyOptions::new(original_body_size, vec![trailer_len]);
353415

@@ -394,6 +456,7 @@ fn wrap_streaming_request_body_in_checksum_calculating_body(
394456
#[cfg(test)]
395457
mod tests {
396458
use crate::http_request_checksum::wrap_streaming_request_body_in_checksum_calculating_body;
459+
use aws_smithy_checksums::body::ChecksumCache;
397460
use aws_smithy_checksums::ChecksumAlgorithm;
398461
use aws_smithy_runtime_api::client::orchestrator::HttpRequest;
399462
use aws_smithy_types::base64;
@@ -417,8 +480,13 @@ mod tests {
417480
assert!(request.body().try_clone().is_some());
418481

419482
let checksum_algorithm: ChecksumAlgorithm = "crc32".parse().unwrap();
420-
wrap_streaming_request_body_in_checksum_calculating_body(&mut request, checksum_algorithm)
421-
.unwrap();
483+
let checksum_cache = ChecksumCache::new();
484+
wrap_streaming_request_body_in_checksum_calculating_body(
485+
&mut request,
486+
checksum_algorithm,
487+
checksum_cache,
488+
)
489+
.unwrap();
422490

423491
// ensure wrapped SdkBody is retryable
424492
let mut body = request.body().try_clone().expect("body is retryable");
@@ -463,8 +531,13 @@ mod tests {
463531
// ensure original SdkBody is retryable
464532
assert!(request.body().try_clone().is_some());
465533

466-
wrap_streaming_request_body_in_checksum_calculating_body(&mut request, checksum_algorithm)
467-
.unwrap();
534+
let checksum_cache = ChecksumCache::new();
535+
wrap_streaming_request_body_in_checksum_calculating_body(
536+
&mut request,
537+
checksum_algorithm,
538+
checksum_cache,
539+
)
540+
.unwrap();
468541

469542
// ensure wrapped SdkBody is retryable
470543
let mut body = request.body().try_clone().expect("body is retryable");

aws/sdk-codegen/src/main/kotlin/software/amazon/smithy/rustsdk/IntegrationTestDependencies.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import software.amazon.smithy.rust.codegen.core.rustlang.CargoDependency.Compani
2121
import software.amazon.smithy.rust.codegen.core.rustlang.CargoDependency.Companion.Hound
2222
import software.amazon.smithy.rust.codegen.core.rustlang.CargoDependency.Companion.Http1x
2323
import software.amazon.smithy.rust.codegen.core.rustlang.CargoDependency.Companion.HttpBody1x
24+
import software.amazon.smithy.rust.codegen.core.rustlang.CargoDependency.Companion.HttpBodyUtil
2425
import software.amazon.smithy.rust.codegen.core.rustlang.CargoDependency.Companion.SerdeJson
2526
import software.amazon.smithy.rust.codegen.core.rustlang.CargoDependency.Companion.Smol
2627
import software.amazon.smithy.rust.codegen.core.rustlang.CargoDependency.Companion.TempFile
@@ -186,6 +187,7 @@ class S3TestDependencies(private val runtimeConfig: RuntimeConfig) : LibRsCustom
186187
addDependency(FuturesUtil.toDevDependency())
187188
addDependency(HdrHistogram)
188189
addDependency(HttpBody1x.toDevDependency().copy(optional = false))
190+
addDependency(HttpBodyUtil.toDevDependency().copy(optional = false))
189191
addDependency(Smol)
190192
addDependency(TempFile)
191193
addDependency(TracingAppender)

aws/sdk/integration-tests/s3/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ futures-util = { version = "0.3.29", default-features = false, features = ["allo
3636
hdrhistogram = "7.5.2"
3737
http-1x = { package = "http", version = "1" }
3838
http-body-1x = { package = "http-body", version = "1" }
39+
http-body-util = "0.1.3"
3940
hyper = { version = "0.14.26", features = ["stream"] }
4041
pretty_assertions = "1.3"
4142
serde_json = "1"

0 commit comments

Comments
 (0)