Skip to content

Trying to collect timesync status in inventory #8603

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: ntp-admin-use
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions nexus/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ nexus-mgs-updates.workspace = true
nexus-networking.workspace = true
nexus-saga-recovery.workspace = true
nexus-test-interface.workspace = true
ntp-admin-client.workspace = true
num-integer.workspace = true
omicron-cockroach-metrics.workspace = true
openssl.workspace = true
Expand Down
36 changes: 34 additions & 2 deletions nexus/db-model/src/inventory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use nexus_db_schema::schema::{
inv_last_reconciliation_disk_result,
inv_last_reconciliation_orphaned_dataset,
inv_last_reconciliation_zone_result, inv_mupdate_override_non_boot,
inv_nvme_disk_firmware, inv_omicron_sled_config,
inv_ntp_timesync, inv_nvme_disk_firmware, inv_omicron_sled_config,
inv_omicron_sled_config_dataset, inv_omicron_sled_config_disk,
inv_omicron_sled_config_zone, inv_omicron_sled_config_zone_nic,
inv_physical_disk, inv_root_of_trust, inv_root_of_trust_page,
Expand All @@ -61,7 +61,7 @@ use nexus_sled_agent_shared::inventory::{
};
use nexus_types::inventory::{
BaseboardId, Caboose, CockroachStatus, Collection, NvmeFirmware,
PowerState, RotPage, RotSlot,
PowerState, RotPage, RotSlot, TimeSync,
};
use omicron_common::api::external;
use omicron_common::api::internal::shared::NetworkInterface;
Expand Down Expand Up @@ -2788,6 +2788,38 @@ impl TryFrom<InvCockroachStatus> for CockroachStatus {
}
}

#[derive(Queryable, Clone, Debug, Selectable, Insertable)]
#[diesel(table_name = inv_ntp_timesync)]
pub struct InvNtpTimesync {
pub inv_collection_id: DbTypedUuid<CollectionKind>,
pub ip: ipv6::Ipv6Addr,
pub port: SqlU16,
pub synced: bool,
}

impl InvNtpTimesync {
pub fn new(
inv_collection_id: CollectionUuid,
timesync: &TimeSync,
) -> Result<Self, anyhow::Error> {
Ok(Self {
inv_collection_id: inv_collection_id.into(),
ip: timesync.addr.ip().into(),
port: timesync.addr.port().into(),
synced: timesync.synced,
})
}
}

impl From<InvNtpTimesync> for nexus_types::inventory::TimeSync {
fn from(value: InvNtpTimesync) -> Self {
Self {
addr: SocketAddrV6::new(*value.ip, *value.port, 0, 0),
synced: value.synced,
}
}
}

#[cfg(test)]
mod test {
use nexus_types::inventory::NvmeFirmware;
Expand Down
52 changes: 52 additions & 0 deletions nexus/db-queries/src/db/datastore/inventory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ use nexus_db_model::InvLastReconciliationDatasetResult;
use nexus_db_model::InvLastReconciliationDiskResult;
use nexus_db_model::InvLastReconciliationOrphanedDataset;
use nexus_db_model::InvLastReconciliationZoneResult;
use nexus_db_model::InvNtpTimesync;
use nexus_db_model::InvNvmeDiskFirmware;
use nexus_db_model::InvOmicronSledConfig;
use nexus_db_model::InvOmicronSledConfigDataset;
Expand Down Expand Up @@ -93,6 +94,7 @@ use nexus_types::inventory::CockroachStatus;
use nexus_types::inventory::Collection;
use nexus_types::inventory::PhysicalDiskFirmware;
use nexus_types::inventory::SledAgent;
use nexus_types::inventory::TimeSync;
use omicron_cockroach_metrics::NodeId as CockroachNodeId;
use omicron_common::api::external::Error;
use omicron_common::api::external::InternalContext;
Expand Down Expand Up @@ -385,6 +387,13 @@ impl DataStore {
.collect::<Result<Vec<_>, _>>()
.map_err(|e| Error::internal_error(&e.to_string()))?;

let inv_ntp_timesync_records: Vec<InvNtpTimesync> = collection
.ntp_timesync
.iter()
.map(|timesync| InvNtpTimesync::new(collection_id, timesync))
.collect::<Result<Vec<_>, _>>()
.map_err(|e| Error::internal_error(&e.to_string()))?;

// This implementation inserts all records associated with the
// collection in one transaction. This is primarily for simplicity. It
// means we don't have to worry about other readers seeing a
Expand Down Expand Up @@ -1424,6 +1433,14 @@ impl DataStore {
.await?;
}

// Insert the NTP info we've observed
if !inv_ntp_timesync_records.is_empty() {
use nexus_db_schema::schema::inv_ntp_timesync::dsl;
diesel::insert_into(dsl::inv_ntp_timesync)
.values(inv_ntp_timesync_records)
.execute_async(&conn)
.await?;
}

// Finally, insert the list of errors.
{
Expand Down Expand Up @@ -1714,6 +1731,7 @@ impl DataStore {
nerrors: usize,
nclickhouse_keeper_membership: usize,
ncockroach_status: usize,
nntp_timesync: usize,
}

let NumRowsDeleted {
Expand Down Expand Up @@ -1744,6 +1762,7 @@ impl DataStore {
nerrors,
nclickhouse_keeper_membership,
ncockroach_status,
nntp_timesync,
} =
self.transaction_retry_wrapper("inventory_delete_collection")
.transaction(&conn, |conn| async move {
Expand Down Expand Up @@ -2000,6 +2019,17 @@ impl DataStore {
.execute_async(&conn)
.await?
};
// Remove rows for NTP timesync
let nntp_timesync = {
use nexus_db_schema::schema::inv_ntp_timesync::dsl;
diesel::delete(
dsl::inv_ntp_timesync.filter(
dsl::inv_collection_id.eq(db_collection_id),
),
)
.execute_async(&conn)
.await?
};

Ok(NumRowsDeleted {
ncollections,
Expand Down Expand Up @@ -2029,6 +2059,7 @@ impl DataStore {
nerrors,
nclickhouse_keeper_membership,
ncockroach_status,
nntp_timesync,
})
})
.await
Expand Down Expand Up @@ -2069,6 +2100,7 @@ impl DataStore {
"nerrors" => nerrors,
"nclickhouse_keeper_membership" => nclickhouse_keeper_membership,
"ncockroach_status" => ncockroach_status,
"nntp_timesync" => nntp_timesync,
);

Ok(())
Expand Down Expand Up @@ -3435,6 +3467,25 @@ impl DataStore {
.collect::<Result<BTreeMap<_, _>, Error>>()?
};

// Load TimeSync statuses
let ntp_timesync: IdOrdMap<TimeSync> = {
use nexus_db_schema::schema::inv_ntp_timesync::dsl;

let records: Vec<InvNtpTimesync> = dsl::inv_ntp_timesync
.filter(dsl::inv_collection_id.eq(db_id))
.select(InvNtpTimesync::as_select())
.load_async(&*conn)
.await
.map_err(|e| {
public_error_from_diesel(e, ErrorHandler::Server)
})?;

records
.into_iter()
.map(|record| TimeSync::from(record))
.collect::<IdOrdMap<_>>()
};

// Finally, build up the sled-agent map using the sled agent and
// omicron zone rows. A for loop is easier to understand than into_iter
// + filter_map + return Result + collect.
Expand Down Expand Up @@ -3681,6 +3732,7 @@ impl DataStore {
sled_agents,
clickhouse_keeper_cluster_membership,
cockroach_status,
ntp_timesync,
})
}
}
Expand Down
9 changes: 9 additions & 0 deletions nexus/db-schema/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1899,6 +1899,15 @@ table! {
}
}

table! {
inv_ntp_timesync (inv_collection_id, ip, port) {
inv_collection_id -> Uuid,
ip -> Inet,
port -> Int4,
synced -> Bool,
}
}

/* blueprints */

table! {
Expand Down
2 changes: 2 additions & 0 deletions nexus/inventory/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ id-map.workspace = true
iddqd.workspace = true
nexus-sled-agent-shared.workspace = true
nexus-types.workspace = true
ntp-admin-client.workspace = true
omicron-common.workspace = true
omicron-uuid-kinds.workspace = true
reqwest.workspace = true
Expand All @@ -31,6 +32,7 @@ sled-agent-client.workspace = true
sled-agent-types.workspace = true
sled-agent-zone-images-examples.workspace = true
slog.workspace = true
slog-error-chain.workspace = true
strum.workspace = true
swrite.workspace = true
thiserror.workspace = true
Expand Down
19 changes: 19 additions & 0 deletions nexus/inventory/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ use nexus_types::inventory::RotPageWhich;
use nexus_types::inventory::RotState;
use nexus_types::inventory::ServiceProcessor;
use nexus_types::inventory::SledAgent;
use nexus_types::inventory::TimeSync;
use nexus_types::inventory::Zpool;
use omicron_cockroach_metrics::CockroachMetric;
use omicron_cockroach_metrics::NodeId;
Expand Down Expand Up @@ -120,6 +121,7 @@ pub struct CollectionBuilder {
clickhouse_keeper_cluster_membership:
BTreeSet<ClickhouseKeeperClusterMembership>,
cockroach_status: BTreeMap<NodeId, CockroachStatus>,
ntp_timesync: IdOrdMap<TimeSync>,
// CollectionBuilderRng is taken by value, rather than passed in as a
// mutable ref, to encourage a tree-like structure where each RNG is
// generally independent.
Expand Down Expand Up @@ -150,6 +152,7 @@ impl CollectionBuilder {
sleds: IdOrdMap::new(),
clickhouse_keeper_cluster_membership: BTreeSet::new(),
cockroach_status: BTreeMap::new(),
ntp_timesync: IdOrdMap::new(),
rng: CollectionBuilderRng::from_entropy(),
}
}
Expand All @@ -173,6 +176,7 @@ impl CollectionBuilder {
clickhouse_keeper_cluster_membership: self
.clickhouse_keeper_cluster_membership,
cockroach_status: self.cockroach_status,
ntp_timesync: self.ntp_timesync,
}
}

Expand Down Expand Up @@ -550,6 +554,19 @@ impl CollectionBuilder {
self.clickhouse_keeper_cluster_membership.insert(membership);
}

/// Record information about timesync
pub fn found_ntp_timesync(
&mut self,
timesync: TimeSync,
) -> Result<(), anyhow::Error> {
self.ntp_timesync
.insert_unique(timesync)
.map_err(|err| err.into_owned())
.with_context(|| {
format!("NTP service reported time multiple times")
})
}

/// Record metrics from a CockroachDB node
pub fn found_cockroach_metrics(
&mut self,
Expand Down Expand Up @@ -621,6 +638,8 @@ mod test {
assert!(collection.cabooses_found.is_empty());
assert!(collection.rot_pages_found.is_empty());
assert!(collection.clickhouse_keeper_cluster_membership.is_empty());
assert!(collection.cockroach_status.is_empty());
assert!(collection.ntp_timesync.is_empty());
}

// Simple test of a single, fairly typical collection that contains just
Expand Down
Loading
Loading