Skip to content

Commit acb693f

Browse files
authored
IMDS retries (#3975)
## Motivation and Context <!--- Why is this change required? What problem does it solve? --> <!--- If it fixes an open issue, please link to the issue here --> Addressing awslabs/aws-sdk-rust#1233 ## Description <!--- Describe your changes in detail --> Add ability to configure the retry classifier on the IMDS client. ## Testing <!--- Please describe in detail how you tested your changes --> <!--- Include details of your testing environment, and the tests you ran to --> <!--- see how your change affects other areas of the code, etc. --> Added new unit test to ensure that a user defined retry classifer is being used. ## Checklist <!--- If a checkbox below is not applicable, then please DELETE it rather than leaving it unchecked --> - [x] For changes to the smithy-rs codegen or runtime crates, I have created a changelog entry Markdown file in the `.changelog` directory, specifying "client," "server," or both in the `applies_to` key. ---- _By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice._
1 parent 4ad631a commit acb693f

File tree

4 files changed

+170
-9
lines changed

4 files changed

+170
-9
lines changed

.changelog/imds-retries.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
applies_to: ["client"]
3+
authors: ["landonxjames"]
4+
references: ["aws-sdk-rust#1233"]
5+
breaking: false
6+
new_feature: true
7+
bug_fix: false
8+
---
9+
10+
Allow IMDS clients to be configured with a user-provided `SharedRetryClassifier`.

aws/rust-runtime/aws-config/Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

aws/rust-runtime/aws-config/external-types.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ allowed_external_types = [
3232
"aws_smithy_runtime_api::client::identity::ResolveIdentity",
3333
"aws_smithy_runtime_api::client::orchestrator::HttpResponse",
3434
"aws_smithy_runtime_api::client::result::SdkError",
35+
"aws_smithy_runtime_api::client::retries::classifiers::ClassifyRetry",
36+
"aws_smithy_runtime_api::client::retries::classifiers::SharedRetryClassifier",
3537
"aws_smithy_runtime_api::client::stalled_stream_protection::StalledStreamProtectionConfig",
3638
"aws_smithy_types::body::SdkBody",
3739
"aws_smithy_types::checksum_config::RequestChecksumCalculation",

aws/rust-runtime/aws-config/src/imds/client.rs

Lines changed: 157 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ const DEFAULT_TOKEN_TTL: Duration = Duration::from_secs(21_600);
5252
const DEFAULT_ATTEMPTS: u32 = 4;
5353
const DEFAULT_CONNECT_TIMEOUT: Duration = Duration::from_secs(1);
5454
const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(1);
55+
const DEFAULT_OPERATION_TIMEOUT: Duration = Duration::from_secs(30);
56+
const DEFAULT_OPERATION_ATTEMPT_TIMEOUT: Duration = Duration::from_secs(10);
5557

5658
fn user_agent() -> AwsUserAgent {
5759
AwsUserAgent::new_from_environment(Env::real(), ApiMetadata::new("imds", PKG_VERSION))
@@ -238,6 +240,7 @@ impl ImdsCommonRuntimePlugin {
238240
config: &ProviderConfig,
239241
endpoint_resolver: ImdsEndpointResolver,
240242
retry_config: RetryConfig,
243+
retry_classifier: SharedRetryClassifier,
241244
timeout_config: TimeoutConfig,
242245
) -> Self {
243246
let mut layer = Layer::new("ImdsCommonRuntimePlugin");
@@ -254,7 +257,7 @@ impl ImdsCommonRuntimePlugin {
254257
.with_http_client(config.http_client())
255258
.with_endpoint_resolver(Some(endpoint_resolver))
256259
.with_interceptor(UserAgentInterceptor::new())
257-
.with_retry_classifier(SharedRetryClassifier::new(ImdsResponseRetryClassifier))
260+
.with_retry_classifier(retry_classifier)
258261
.with_retry_strategy(Some(StandardRetryStrategy::new()))
259262
.with_time_source(Some(config.time_source()))
260263
.with_sleep_impl(config.sleep_impl()),
@@ -322,7 +325,10 @@ pub struct Builder {
322325
token_ttl: Option<Duration>,
323326
connect_timeout: Option<Duration>,
324327
read_timeout: Option<Duration>,
328+
operation_timeout: Option<Duration>,
329+
operation_attempt_timeout: Option<Duration>,
325330
config: Option<ProviderConfig>,
331+
retry_classifier: Option<SharedRetryClassifier>,
326332
}
327333

328334
impl Builder {
@@ -398,6 +404,32 @@ impl Builder {
398404
self
399405
}
400406

407+
/// Override the operation timeout for IMDS
408+
///
409+
/// This value defaults to 1 second
410+
pub fn operation_timeout(mut self, timeout: Duration) -> Self {
411+
self.operation_timeout = Some(timeout);
412+
self
413+
}
414+
415+
/// Override the operation attempt timeout for IMDS
416+
///
417+
/// This value defaults to 1 second
418+
pub fn operation_attempt_timeout(mut self, timeout: Duration) -> Self {
419+
self.operation_attempt_timeout = Some(timeout);
420+
self
421+
}
422+
423+
/// Override the retry classifier for IMDS
424+
///
425+
/// This defaults to only retrying on server errors and 401s. The [ImdsResponseRetryClassifier] in this
426+
/// module offers some configuration options and can be wrapped by[SharedRetryClassifier::new()] for use
427+
/// here or you can create your own fully customized [SharedRetryClassifier].
428+
pub fn retry_classifier(mut self, retry_classifier: SharedRetryClassifier) -> Self {
429+
self.retry_classifier = Some(retry_classifier);
430+
self
431+
}
432+
401433
/* TODO(https://github.com/awslabs/aws-sdk-rust/issues/339): Support customizing the port explicitly */
402434
/*
403435
pub fn port(mut self, port: u32) -> Self {
@@ -411,6 +443,11 @@ impl Builder {
411443
let timeout_config = TimeoutConfig::builder()
412444
.connect_timeout(self.connect_timeout.unwrap_or(DEFAULT_CONNECT_TIMEOUT))
413445
.read_timeout(self.read_timeout.unwrap_or(DEFAULT_READ_TIMEOUT))
446+
.operation_attempt_timeout(
447+
self.operation_attempt_timeout
448+
.unwrap_or(DEFAULT_OPERATION_ATTEMPT_TIMEOUT),
449+
)
450+
.operation_timeout(self.operation_timeout.unwrap_or(DEFAULT_OPERATION_TIMEOUT))
414451
.build();
415452
let endpoint_source = self
416453
.endpoint
@@ -421,10 +458,14 @@ impl Builder {
421458
};
422459
let retry_config = RetryConfig::standard()
423460
.with_max_attempts(self.max_attempts.unwrap_or(DEFAULT_ATTEMPTS));
461+
let retry_classifier = self.retry_classifier.unwrap_or(SharedRetryClassifier::new(
462+
ImdsResponseRetryClassifier::default(),
463+
));
424464
let common_plugin = SharedRuntimePlugin::new(ImdsCommonRuntimePlugin::new(
425465
&config,
426466
endpoint_resolver,
427467
retry_config,
468+
retry_classifier,
428469
timeout_config,
429470
));
430471
let operation = Operation::builder()
@@ -549,8 +590,20 @@ impl ResolveEndpoint for ImdsEndpointResolver {
549590
/// - 403 (IMDS disabled): **Not Retryable**
550591
/// - 404 (Not found): **Not Retryable**
551592
/// - >=500 (server error): **Retryable**
552-
#[derive(Clone, Debug)]
553-
struct ImdsResponseRetryClassifier;
593+
/// - Timeouts: Not retried by default, but this is configurable via [Self::with_retry_connect_timeouts()]
594+
#[derive(Clone, Debug, Default)]
595+
#[non_exhaustive]
596+
pub struct ImdsResponseRetryClassifier {
597+
retry_connect_timeouts: bool,
598+
}
599+
600+
impl ImdsResponseRetryClassifier {
601+
/// Indicate whether the IMDS client should retry on connection timeouts
602+
pub fn with_retry_connect_timeouts(mut self, retry_connect_timeouts: bool) -> Self {
603+
self.retry_connect_timeouts = retry_connect_timeouts;
604+
self
605+
}
606+
}
554607

555608
impl ClassifyRetry for ImdsResponseRetryClassifier {
556609
fn name(&self) -> &'static str {
@@ -567,7 +620,10 @@ impl ClassifyRetry for ImdsResponseRetryClassifier {
567620
// This catch-all includes successful responses that fail to parse. These should not be retried.
568621
_ => RetryAction::NoActionIndicated,
569622
}
623+
} else if self.retry_connect_timeouts {
624+
RetryAction::server_error()
570625
} else {
626+
// This is the default behavior.
571627
// Don't retry timeouts for IMDS, or else it will take ~30 seconds for the default
572628
// credentials provider chain to fail to provide credentials.
573629
// Also don't retry non-responses.
@@ -593,7 +649,9 @@ pub(crate) mod test {
593649
HttpRequest, HttpResponse, OrchestratorError,
594650
};
595651
use aws_smithy_runtime_api::client::result::ConnectorError;
596-
use aws_smithy_runtime_api::client::retries::classifiers::{ClassifyRetry, RetryAction};
652+
use aws_smithy_runtime_api::client::retries::classifiers::{
653+
ClassifyRetry, RetryAction, SharedRetryClassifier,
654+
};
597655
use aws_smithy_types::body::SdkBody;
598656
use aws_smithy_types::error::display::DisplayErrorContext;
599657
use aws_types::os_shim_internal::{Env, Fs};
@@ -603,6 +661,7 @@ pub(crate) mod test {
603661
use std::collections::HashMap;
604662
use std::error::Error;
605663
use std::io;
664+
use std::time::SystemTime;
606665
use std::time::{Duration, UNIX_EPOCH};
607666
use tracing_test::traced_test;
608667

@@ -933,7 +992,7 @@ pub(crate) mod test {
933992
let mut ctx = InterceptorContext::new(Input::doesnt_matter());
934993
ctx.set_output_or_error(Ok(Output::doesnt_matter()));
935994
ctx.set_response(imds_response("").map(|_| SdkBody::empty()));
936-
let classifier = ImdsResponseRetryClassifier;
995+
let classifier = ImdsResponseRetryClassifier::default();
937996
assert_eq!(
938997
RetryAction::NoActionIndicated,
939998
classifier.classify_retry(&ctx)
@@ -950,6 +1009,65 @@ pub(crate) mod test {
9501009
);
9511010
}
9521011

1012+
/// User provided retry classifier works
1013+
#[tokio::test]
1014+
async fn user_provided_retry_classifier() {
1015+
#[derive(Clone, Debug)]
1016+
struct UserProvidedRetryClassifier;
1017+
1018+
impl ClassifyRetry for UserProvidedRetryClassifier {
1019+
fn name(&self) -> &'static str {
1020+
"UserProvidedRetryClassifier"
1021+
}
1022+
1023+
// Don't retry anything
1024+
fn classify_retry(&self, _ctx: &InterceptorContext) -> RetryAction {
1025+
RetryAction::RetryForbidden
1026+
}
1027+
}
1028+
1029+
let events = vec![
1030+
ReplayEvent::new(
1031+
token_request("http://169.254.169.254", 21600),
1032+
token_response(0, TOKEN_A),
1033+
),
1034+
ReplayEvent::new(
1035+
imds_request("http://169.254.169.254/latest/metadata", TOKEN_A),
1036+
http::Response::builder()
1037+
.status(401)
1038+
.body(SdkBody::empty())
1039+
.unwrap(),
1040+
),
1041+
ReplayEvent::new(
1042+
token_request("http://169.254.169.254", 21600),
1043+
token_response(21600, TOKEN_B),
1044+
),
1045+
ReplayEvent::new(
1046+
imds_request("http://169.254.169.254/latest/metadata", TOKEN_B),
1047+
imds_response("ok"),
1048+
),
1049+
];
1050+
let http_client = StaticReplayClient::new(events);
1051+
1052+
let imds_client = super::Client::builder()
1053+
.configure(
1054+
&ProviderConfig::no_configuration()
1055+
.with_sleep_impl(InstantSleep::unlogged())
1056+
.with_http_client(http_client.clone()),
1057+
)
1058+
.retry_classifier(SharedRetryClassifier::new(UserProvidedRetryClassifier))
1059+
.build();
1060+
1061+
let res = imds_client
1062+
.get("/latest/metadata")
1063+
.await
1064+
.expect_err("Client should error");
1065+
1066+
// Assert that the operation errored on the initial 401 and did not retry and get
1067+
// the 200 (since the user provided retry classifier never retries)
1068+
assert_full_error_contains!(res, "401");
1069+
}
1070+
9531071
// since tokens are sent as headers, the tokens need to be valid header values
9541072
#[tokio::test]
9551073
async fn invalid_token() {
@@ -989,9 +1107,6 @@ pub(crate) mod test {
9891107
#[cfg(feature = "rustls")]
9901108
async fn one_second_connect_timeout() {
9911109
use crate::imds::client::ImdsError;
992-
use aws_smithy_types::error::display::DisplayErrorContext;
993-
use std::time::SystemTime;
994-
9951110
let client = Client::builder()
9961111
// 240.* can never be resolved
9971112
.endpoint("http://240.0.0.0")
@@ -1023,6 +1138,40 @@ pub(crate) mod test {
10231138
);
10241139
}
10251140

1141+
/// Retry classifier properly retries timeouts when configured to (meaning it takes ~30s to fail)
1142+
#[tokio::test]
1143+
async fn retry_connect_timeouts() {
1144+
let http_client = StaticReplayClient::new(vec![]);
1145+
let imds_client = super::Client::builder()
1146+
.retry_classifier(SharedRetryClassifier::new(
1147+
ImdsResponseRetryClassifier::default().with_retry_connect_timeouts(true),
1148+
))
1149+
.configure(&ProviderConfig::no_configuration().with_http_client(http_client.clone()))
1150+
.operation_timeout(Duration::from_secs(1))
1151+
.endpoint("http://240.0.0.0")
1152+
.expect("valid uri")
1153+
.build();
1154+
1155+
let now = SystemTime::now();
1156+
let _res = imds_client
1157+
.get("/latest/metadata")
1158+
.await
1159+
.expect_err("240.0.0.0 will never resolve");
1160+
let time_elapsed: Duration = now.elapsed().unwrap();
1161+
1162+
assert!(
1163+
time_elapsed > Duration::from_secs(1),
1164+
"time_elapsed should be greater than 1s but was {:?}",
1165+
time_elapsed
1166+
);
1167+
1168+
assert!(
1169+
time_elapsed < Duration::from_secs(2),
1170+
"time_elapsed should be less than 2s but was {:?}",
1171+
time_elapsed
1172+
);
1173+
}
1174+
10261175
#[derive(Debug, Deserialize)]
10271176
struct ImdsConfigTest {
10281177
env: HashMap<String, String>,

0 commit comments

Comments
 (0)