Skip to content

Commit 8f65850

Browse files
committed
Use NTP admin API instead of sled agent, remove sled agent time_sync API
1 parent 8ad59a1 commit 8f65850

File tree

15 files changed

+86
-232
lines changed

15 files changed

+86
-232
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dev-tools/ls-apis/tests/api_dependencies.out

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ Nexus Internal API (client: nexus-client)
7171
consumed by: propolis-server (propolis/bin/propolis-server) via 3 paths
7272

7373
NTP Admin (client: ntp-admin-client)
74+
consumed by: omicron-sled-agent (omicron/sled-agent) via 2 paths
7475

7576
External API (client: oxide-client)
7677

openapi/sled-agent.json

Lines changed: 0 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1464,29 +1464,6 @@
14641464
}
14651465
}
14661466
},
1467-
"/timesync": {
1468-
"get": {
1469-
"operationId": "timesync_get",
1470-
"responses": {
1471-
"200": {
1472-
"description": "successful operation",
1473-
"content": {
1474-
"application/json": {
1475-
"schema": {
1476-
"$ref": "#/components/schemas/TimeSync"
1477-
}
1478-
}
1479-
}
1480-
},
1481-
"4XX": {
1482-
"$ref": "#/components/responses/Error"
1483-
},
1484-
"5XX": {
1485-
"$ref": "#/components/responses/Error"
1486-
}
1487-
}
1488-
}
1489-
},
14901467
"/v2p": {
14911468
"get": {
14921469
"summary": "List v2p mappings present on sled",
@@ -7326,50 +7303,6 @@
73267303
"uplinks"
73277304
]
73287305
},
7329-
"TimeSync": {
7330-
"type": "object",
7331-
"properties": {
7332-
"correction": {
7333-
"description": "The current offset between the NTP clock and system clock.",
7334-
"type": "number",
7335-
"format": "double"
7336-
},
7337-
"ip_addr": {
7338-
"description": "The NTP reference IP address.",
7339-
"type": "string",
7340-
"format": "ip"
7341-
},
7342-
"ref_id": {
7343-
"description": "The NTP reference ID.",
7344-
"type": "integer",
7345-
"format": "uint32",
7346-
"minimum": 0
7347-
},
7348-
"ref_time": {
7349-
"description": "The NTP reference time (i.e. what chrony thinks the current time is, not necessarily the current system time).",
7350-
"type": "number",
7351-
"format": "double"
7352-
},
7353-
"stratum": {
7354-
"description": "The NTP stratum (our upstream's stratum plus one).",
7355-
"type": "integer",
7356-
"format": "uint8",
7357-
"minimum": 0
7358-
},
7359-
"sync": {
7360-
"description": "The synchronization state of the sled, true when the system clock and the NTP clock are in sync (to within a small window).",
7361-
"type": "boolean"
7362-
}
7363-
},
7364-
"required": [
7365-
"correction",
7366-
"ip_addr",
7367-
"ref_id",
7368-
"ref_time",
7369-
"stratum",
7370-
"sync"
7371-
]
7372-
},
73737306
"TxEqConfig": {
73747307
"description": "Per-port tx-eq overrides. This can be used to fine-tune the transceiver equalization settings to improve signal integrity.",
73757308
"type": "object",

sled-agent/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ nexus-client.workspace = true
5959
nexus-config.workspace = true
6060
nexus-sled-agent-shared.workspace = true
6161
nexus-types.workspace = true
62+
ntp-admin-client.workspace = true
6263
omicron-common.workspace = true
6364
omicron-ddm-admin-client.workspace = true
6465
omicron-uuid-kinds.workspace = true

sled-agent/api/src/lib.rs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ use sled_agent_types::{
4242
VmmPutStateResponse, VmmUnregisterResponse,
4343
},
4444
sled::AddSledRequest,
45-
time_sync::TimeSync,
4645
zone_bundle::{
4746
BundleUtilization, CleanupContext, CleanupCount, PriorityOrder,
4847
ZoneBundleId, ZoneBundleMetadata,
@@ -477,14 +476,6 @@ pub trait SledAgentApi {
477476
rqctx: RequestContext<Self::Context>,
478477
) -> Result<HttpResponseOk<Vec<VirtualNetworkInterfaceHost>>, HttpError>;
479478

480-
#[endpoint {
481-
method = GET,
482-
path = "/timesync",
483-
}]
484-
async fn timesync_get(
485-
rqctx: RequestContext<Self::Context>,
486-
) -> Result<HttpResponseOk<TimeSync>, HttpError>;
487-
488479
#[endpoint {
489480
method = POST,
490481
path = "/switch-ports",

sled-agent/config-reconciler/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ illumos-utils.workspace = true
2626
installinator-common.workspace = true
2727
key-manager.workspace = true
2828
nexus-sled-agent-shared.workspace = true
29+
ntp-admin-client.workspace = true
2930
omicron-common.workspace = true
3031
omicron-uuid-kinds.workspace = true
3132
serde.workspace = true

sled-agent/config-reconciler/src/reconciler_task.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ impl ReconcilerTask {
503503

504504
// Collect the current timesync status (needed to start any new zones,
505505
// and also we want to report it as part of each reconciler result).
506-
let timesync_status = self.zones.check_timesync().await;
506+
let timesync_status = self.zones.check_timesync(&self.log).await;
507507

508508
// Call back into sled-agent and let it do any work that needs to happen
509509
// once time is sync'd (e.g., rewrite `uptime`).

sled-agent/config-reconciler/src/reconciler_task/zones.rs

Lines changed: 29 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult;
2424
use nexus_sled_agent_shared::inventory::OmicronZoneConfig;
2525
use nexus_sled_agent_shared::inventory::OmicronZoneImageSource;
2626
use nexus_sled_agent_shared::inventory::OmicronZoneType;
27+
use ntp_admin_client::types::TimeSync;
2728
use omicron_common::address::Ipv6Subnet;
2829
use omicron_uuid_kinds::OmicronZoneUuid;
29-
use sled_agent_types::time_sync::TimeSync;
3030
use sled_agent_types::zone_bundle::ZoneBundleCause;
3131
use sled_agent_types::zone_images::ResolverStatus;
3232
use sled_storage::config::MountConfig;
@@ -36,10 +36,7 @@ use slog::warn;
3636
use slog_error_chain::InlineErrorChain;
3737
use std::borrow::Cow;
3838
use std::collections::BTreeMap;
39-
use std::net::IpAddr;
40-
use std::net::Ipv6Addr;
4139
use std::num::NonZeroUsize;
42-
use std::str::FromStr as _;
4340
use std::sync::Arc;
4441

4542
use super::OmicronDatasets;
@@ -70,6 +67,8 @@ pub enum TimeSyncError {
7067
NoRunningNtpZone,
7168
#[error("multiple running NTP zones - this should never happen!")]
7269
MultipleRunningNtpZones,
70+
#[error("failed to communicate with NTP admin server")]
71+
NtpAdmin(#[from] ntp_admin_client::Error<ntp_admin_client::types::Error>),
7372
#[error("failed to execute chronyc within NTP zone")]
7473
ExecuteChronyc(#[source] RunCommandError),
7574
#[error(
@@ -465,10 +464,10 @@ impl OmicronZones {
465464
}
466465

467466
/// Check the timesync status from a running NTP zone (if it exists)
468-
pub(super) async fn check_timesync(&self) -> TimeSyncStatus {
467+
pub(super) async fn check_timesync(&self, log: &Logger) -> TimeSyncStatus {
469468
match &self.timesync_config {
470469
TimeSyncConfig::Normal => {
471-
match self.timesync_status_from_ntp_zone().await {
470+
match self.timesync_status_from_ntp_zone(log).await {
472471
Ok(timesync) => TimeSyncStatus::TimeSync(timesync),
473472
Err(err) => {
474473
TimeSyncStatus::FailedToGetSyncStatus(Arc::new(err))
@@ -481,83 +480,45 @@ impl OmicronZones {
481480

482481
async fn timesync_status_from_ntp_zone(
483482
&self,
483+
log: &Logger,
484484
) -> Result<TimeSync, TimeSyncError> {
485485
// Get the one and only running NTP zone, or return an error.
486-
let mut running_ntp_zones = self.zones.iter().filter_map(|z| {
486+
let mut ntp_admin_addresses = self.zones.iter().filter_map(|z| {
487487
if !z.config.zone_type.is_ntp() {
488488
return None;
489489
}
490+
// TODO(https://github.com/oxidecomputer/omicron/issues/6796):
491+
//
492+
// We could avoid hard-coding the port here if the zone was fully
493+
// specified to include both NTP and Admin server addresses.
494+
let mut addr = match z.config.zone_type {
495+
OmicronZoneType::BoundaryNtp { address, .. } => address,
496+
OmicronZoneType::InternalNtp { address, .. } => address,
497+
_ => return None,
498+
};
499+
addr.set_port(omicron_common::address::NTP_ADMIN_PORT);
490500

491501
match &z.state {
492-
ZoneState::Running(running_zone) => Some(running_zone),
502+
ZoneState::Running(_) => Some(addr),
493503
ZoneState::PartiallyShutDown { .. }
494504
| ZoneState::FailedToStart(_) => None,
495505
}
496506
});
497-
let running_ntp_zone =
498-
running_ntp_zones.next().ok_or(TimeSyncError::NoRunningNtpZone)?;
499-
if running_ntp_zones.next().is_some() {
507+
let ntp_admin_address = ntp_admin_addresses
508+
.next()
509+
.ok_or(TimeSyncError::NoRunningNtpZone)?;
510+
if ntp_admin_addresses.next().is_some() {
500511
return Err(TimeSyncError::MultipleRunningNtpZones);
501512
}
502513

503-
// XXXNTP - This could be replaced with a direct connection to the
504-
// daemon using a patched version of the chrony_candm crate to allow
505-
// a custom server socket path. From the GZ, it should be possible to
506-
// connect to the UNIX socket at
507-
// format!("{}/var/run/chrony/chronyd.sock", ntp_zone.root())
508-
509-
let stdout = running_ntp_zone
510-
.run_cmd(&["/usr/bin/chronyc", "-c", "tracking"])
511-
.map_err(TimeSyncError::ExecuteChronyc)?;
512-
513-
let v: Vec<&str> = stdout.split(',').collect();
514-
515-
if v.len() < 10 {
516-
return Err(TimeSyncError::FailedToParse {
517-
reason: "too few fields",
518-
stdout,
519-
});
520-
}
521-
522-
let Ok(ref_id) = u32::from_str_radix(v[0], 16) else {
523-
return Err(TimeSyncError::FailedToParse {
524-
reason: "bad ref_id",
525-
stdout,
526-
});
527-
};
528-
let ip_addr =
529-
IpAddr::from_str(v[1]).unwrap_or(Ipv6Addr::UNSPECIFIED.into());
530-
let Ok(stratum) = u8::from_str(v[2]) else {
531-
return Err(TimeSyncError::FailedToParse {
532-
reason: "bad stratum",
533-
stdout,
534-
});
535-
};
536-
let Ok(ref_time) = f64::from_str(v[3]) else {
537-
return Err(TimeSyncError::FailedToParse {
538-
reason: "bad ref_time",
539-
stdout,
540-
});
541-
};
542-
let Ok(correction) = f64::from_str(v[4]) else {
543-
return Err(TimeSyncError::FailedToParse {
544-
reason: "bad correction",
545-
stdout,
546-
});
547-
};
548-
549-
// Per `chronyc waitsync`'s implementation, if either the
550-
// reference IP address is not unspecified or the reference
551-
// ID is not 0 or 0x7f7f0101, we are synchronized to a peer.
552-
let peer_sync =
553-
!ip_addr.is_unspecified() || (ref_id != 0 && ref_id != 0x7f7f0101);
514+
let client = ntp_admin_client::Client::new(
515+
&format!("http://{ntp_admin_address}"),
516+
log.clone(),
517+
);
554518

555-
let sync = stratum < 10
556-
&& ref_time > 1234567890.0
557-
&& peer_sync
558-
&& correction.abs() <= 0.05;
519+
let timesync = client.timesync().await?.into_inner();
559520

560-
Ok(TimeSync { sync, ref_id, ip_addr, stratum, ref_time, correction })
521+
Ok(timesync)
561522
}
562523
}
563524

@@ -1111,6 +1072,7 @@ mod tests {
11111072
use sled_agent_types::zone_images::ZoneManifestStatus;
11121073
use std::collections::BTreeSet;
11131074
use std::collections::VecDeque;
1075+
use std::net::Ipv6Addr;
11141076
use std::sync::Mutex;
11151077
use tufaceous_artifact::ArtifactHash;
11161078

sled-agent/src/http_entrypoints.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ use sled_agent_types::instance::{
3636
VmmPutStateResponse, VmmUnregisterResponse,
3737
};
3838
use sled_agent_types::sled::AddSledRequest;
39-
use sled_agent_types::time_sync::TimeSync;
4039
use sled_agent_types::zone_bundle::{
4140
BundleUtilization, CleanupContext, CleanupCount, CleanupPeriod,
4241
StorageLimit, ZoneBundleId, ZoneBundleMetadata,
@@ -708,13 +707,6 @@ impl SledAgentApi for SledAgentImpl {
708707
Ok(HttpResponseOk(vnics))
709708
}
710709

711-
async fn timesync_get(
712-
rqctx: RequestContext<Self::Context>,
713-
) -> Result<HttpResponseOk<TimeSync>, HttpError> {
714-
let sa = rqctx.context();
715-
Ok(HttpResponseOk(sa.timesync_get().await.map_err(|e| Error::from(e))?))
716-
}
717-
718710
async fn uplink_ensure(
719711
rqctx: RequestContext<Self::Context>,
720712
body: TypedBody<SwitchPorts>,

0 commit comments

Comments
 (0)