Skip to content

Commit 5816423

Browse files
authored
Ensure next-item queries look at gaps in both directions (#8215)
- We modified the next-item queries in #6551 to reduce memory consumption. In previous versions, we constructed the entire candidate range of next items and joined that with the target table, taking the first entry that doesn't exist in the table. But CRDB materializes all subqueries eagerly, thus the memory problems. The modified version used a self-join, between the list of allocated items and those items incremented by 1. This limited memory consumption to be linear in the number of _allocated_ items, rather than the full candidate range, which is generally much smaller. However, this new query isn't exactly the same as the original: it only looks at gaps _larger_ than allocated items, whereas the original query inspected the entire range. This commit reworks this self-join query to inspect gaps both above and below the target items. In doing so, I've also restructured the query to be much simpler and more direct, and I think much more efficient. All subqueries in it are limited to at most 1 element, which should consume constant rather than linear memory. - Fixes #8208.
1 parent 726e9b6 commit 5816423

File tree

3 files changed

+479
-127
lines changed

3 files changed

+479
-127
lines changed

nexus/db-queries/src/db/queries/network_interface.rs

Lines changed: 199 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ pub enum InsertError {
100100
/// interface that is associated with another VPC.
101101
ResourceSpansMultipleVpcs(Uuid),
102102
/// There are no available IP addresses in the requested subnet
103-
NoAvailableIpAddresses,
103+
NoAvailableIpAddresses { name: String, id: Uuid },
104104
/// An explicitly-requested IP address is already in use
105105
IpAddressNotAvailable(std::net::IpAddr),
106106
/// An explicity-requested MAC address is already in use
@@ -170,10 +170,11 @@ impl InsertError {
170170
) => {
171171
unimplemented!("probe network interface")
172172
}
173-
InsertError::NoAvailableIpAddresses => {
174-
external::Error::invalid_request(
175-
"No available IP addresses for interface",
176-
)
173+
InsertError::NoAvailableIpAddresses { name, id } => {
174+
external::Error::invalid_request(format!(
175+
"No available IP addresses for interface in \
176+
subnet '{name}' with ID '{id}'"
177+
))
177178
}
178179
InsertError::ResourceSpansMultipleVpcs(_) => {
179180
external::Error::invalid_request(concat!(
@@ -326,7 +327,10 @@ fn decode_database_error(
326327
DatabaseErrorKind::NotNullViolation,
327328
info,
328329
) if info.message() == IP_EXHAUSTION_ERROR_MESSAGE => {
329-
InsertError::NoAvailableIpAddresses
330+
InsertError::NoAvailableIpAddresses {
331+
name: interface.subnet.identity.name.to_string(),
332+
id: interface.subnet.identity.id,
333+
}
330334
}
331335

332336
// This catches the error intentionally introduced by the
@@ -2078,6 +2082,22 @@ mod tests {
20782082
)
20792083
.await
20802084
}
2085+
2086+
async fn delete_instance_nics(&self, instance_id: Uuid) {
2087+
let (.., authz_instance) =
2088+
LookupPath::new(self.opctx(), self.datastore())
2089+
.instance_id(instance_id)
2090+
.lookup_for(authz::Action::Modify)
2091+
.await
2092+
.expect("Failed to lookup instance");
2093+
self.datastore()
2094+
.instance_delete_all_network_interfaces(
2095+
self.opctx(),
2096+
&authz_instance,
2097+
)
2098+
.await
2099+
.expect("Failed to delete NICs");
2100+
}
20812101
}
20822102

20832103
#[tokio::test]
@@ -2810,7 +2830,7 @@ mod tests {
28102830
.instance_create_network_interface_raw(context.opctx(), interface)
28112831
.await;
28122832
assert!(
2813-
matches!(result, Err(InsertError::NoAvailableIpAddresses)),
2833+
matches!(result, Err(InsertError::NoAvailableIpAddresses { .. })),
28142834
"Address exhaustion should be detected and handled, found {:?}",
28152835
result,
28162836
);
@@ -2954,6 +2974,178 @@ mod tests {
29542974
context.success().await;
29552975
}
29562976

2977+
// Regression for https://github.com/oxidecomputer/omicron/issues/8208
2978+
#[tokio::test]
2979+
async fn allocation_and_deallocation_takes_next_smallest_address() {
2980+
let context = TestContext::new(
2981+
"allocation_and_deallocation_takes_next_smallest_address",
2982+
1,
2983+
)
2984+
.await;
2985+
2986+
// Create three instances, each with an interface.
2987+
const N_INSTANCES: usize = 3;
2988+
let mut instances = Vec::with_capacity(N_INSTANCES);
2989+
for _ in 0..N_INSTANCES {
2990+
let instance = context.create_stopped_instance().await;
2991+
let instance_id = InstanceUuid::from_untyped_uuid(instance.id());
2992+
let interface = IncompleteNetworkInterface::new_instance(
2993+
Uuid::new_v4(),
2994+
instance_id,
2995+
context.net1.subnets[0].clone(),
2996+
IdentityMetadataCreateParams {
2997+
name: "interface-c".parse().unwrap(),
2998+
description: String::from("description"),
2999+
},
3000+
None,
3001+
)
3002+
.unwrap();
3003+
let intf = context
3004+
.datastore()
3005+
.instance_create_network_interface_raw(
3006+
context.opctx(),
3007+
interface,
3008+
)
3009+
.await
3010+
.expect("Failed to insert interface");
3011+
instances.push((instance, intf));
3012+
}
3013+
3014+
// Delete the NIC on the first instance.
3015+
let original_ip = instances[0].1.ip.ip();
3016+
context.delete_instance_nics(instances[0].0.id()).await;
3017+
3018+
// And recreate it, ensuring we get the same IP address again.
3019+
let interface = IncompleteNetworkInterface::new_instance(
3020+
Uuid::new_v4(),
3021+
InstanceUuid::from_untyped_uuid(instances[0].0.id()),
3022+
context.net1.subnets[0].clone(),
3023+
IdentityMetadataCreateParams {
3024+
name: "interface-c".parse().unwrap(),
3025+
description: String::from("description"),
3026+
},
3027+
None,
3028+
)
3029+
.unwrap();
3030+
let intf = context
3031+
.datastore()
3032+
.instance_create_network_interface_raw(context.opctx(), interface)
3033+
.await
3034+
.expect("Failed to insert interface");
3035+
instances[0].1 = intf;
3036+
assert_eq!(
3037+
instances[0].1.ip.ip(),
3038+
original_ip,
3039+
"Should have recreated the first available IP address again"
3040+
);
3041+
3042+
// Now delete the NICs from the first and second instances.
3043+
for (inst, _) in instances[..2].iter() {
3044+
context.delete_instance_nics(inst.id()).await;
3045+
}
3046+
3047+
// Create a new one, and ensure we've taken the _second_ address. The
3048+
// allocation query looks at the first gap upwards and the first gap
3049+
// downwards from any allocated items.
3050+
let interface = IncompleteNetworkInterface::new_instance(
3051+
Uuid::new_v4(),
3052+
InstanceUuid::from_untyped_uuid(instances[0].0.id()),
3053+
context.net1.subnets[0].clone(),
3054+
IdentityMetadataCreateParams {
3055+
name: "interface-c".parse().unwrap(),
3056+
description: String::from("description"),
3057+
},
3058+
None,
3059+
)
3060+
.unwrap();
3061+
let intf = context
3062+
.datastore()
3063+
.instance_create_network_interface_raw(context.opctx(), interface)
3064+
.await
3065+
.expect("Failed to insert interface");
3066+
assert_eq!(
3067+
intf.ip.ip(),
3068+
instances[1].1.ip.ip(),
3069+
"Should have used the second address",
3070+
);
3071+
3072+
context.success().await;
3073+
}
3074+
3075+
// Regression for https://github.com/oxidecomputer/omicron/issues/8208
3076+
#[tokio::test]
3077+
async fn allocation_after_explicit_ip_address_takes_next_smallest_address()
3078+
{
3079+
let context = TestContext::new(
3080+
"allocation_after_explicit_ip_address_takes_next_smallest_address",
3081+
1,
3082+
)
3083+
.await;
3084+
3085+
// Create one instance, with a specific address.
3086+
let instance = context.create_stopped_instance().await;
3087+
let instance_id = InstanceUuid::from_untyped_uuid(instance.id());
3088+
const NTH: usize = 8;
3089+
let addr =
3090+
context.net2.subnets[0].ipv4_block.nth(NTH).unwrap_or_else(|| {
3091+
panic!("Should have been able to get the {NTH}-th address")
3092+
});
3093+
let interface = IncompleteNetworkInterface::new_instance(
3094+
Uuid::new_v4(),
3095+
instance_id,
3096+
context.net2.subnets[0].clone(),
3097+
IdentityMetadataCreateParams {
3098+
name: "interface-c".parse().unwrap(),
3099+
description: String::from("description"),
3100+
},
3101+
Some(IpAddr::V4(addr)),
3102+
)
3103+
.unwrap();
3104+
let _ = context
3105+
.datastore()
3106+
.instance_create_network_interface_raw(context.opctx(), interface)
3107+
.await
3108+
.expect("Failed to insert interface");
3109+
3110+
// Now create another one, attaching an automatic address.
3111+
let instance2 = context.create_stopped_instance().await;
3112+
let instance_id = InstanceUuid::from_untyped_uuid(instance2.id());
3113+
let interface = IncompleteNetworkInterface::new_instance(
3114+
Uuid::new_v4(),
3115+
instance_id,
3116+
context.net2.subnets[0].clone(),
3117+
IdentityMetadataCreateParams {
3118+
name: "interface-c".parse().unwrap(),
3119+
description: String::from("description"),
3120+
},
3121+
None,
3122+
)
3123+
.unwrap();
3124+
3125+
// The new address should be 1 less than the previous one.
3126+
let interface2 = context
3127+
.datastore()
3128+
.instance_create_network_interface_raw(context.opctx(), interface)
3129+
.await
3130+
.expect("Failed to insert interface");
3131+
assert_eq!(
3132+
IpAddr::V4(
3133+
context
3134+
.net2
3135+
.subnets[0]
3136+
.ipv4_block
3137+
.nth(NTH - 1)
3138+
.unwrap_or_else(|| {
3139+
panic!("Should have been able to get the {NTH}-1-th address")
3140+
})
3141+
),
3142+
interface2.ip.ip(),
3143+
"Should have allocated 1 less than the smallest existing address"
3144+
);
3145+
3146+
context.success().await;
3147+
}
3148+
29573149
#[test]
29583150
fn test_first_available_address() {
29593151
let subnet = "172.30.0.0/28".parse().unwrap();

0 commit comments

Comments
 (0)