|
| 1 | +// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ |
| 2 | +// SPDX-License-Identifier: Apache-2.0 |
| 3 | + |
| 4 | +//! This file contains code for fetching and sharing the info from the Datadog Agent. |
| 5 | +//! It will keep one fetcher per Endpoint. The SidecarServer is expected to keep the AgentInfoGuard |
| 6 | +//! alive for the lifetime of the session. |
| 7 | +//! The fetcher will remain alive for a short while after all guards have been dropped. |
| 8 | +//! It writes the raw agent response to shared memory at a fixed per-endpoint location, to be |
| 9 | +//! consumed be tracers. |
| 10 | +
|
| 11 | +use crate::one_way_shared_memory::{open_named_shm, OneWayShmReader, OneWayShmWriter}; |
| 12 | +use crate::primary_sidecar_identifier; |
| 13 | +use base64::prelude::BASE64_URL_SAFE_NO_PAD; |
| 14 | +use base64::Engine; |
| 15 | +use data_pipeline::agent_info::schema::AgentInfoStruct; |
| 16 | +use data_pipeline::agent_info::{fetch_info_with_state, FetchInfoStatus}; |
| 17 | +use datadog_ipc::platform::NamedShmHandle; |
| 18 | +use ddcommon::Endpoint; |
| 19 | +use futures::future::Shared; |
| 20 | +use futures::FutureExt; |
| 21 | +use http::uri::PathAndQuery; |
| 22 | +use manual_future::ManualFuture; |
| 23 | +use std::ffi::CString; |
| 24 | +use std::hash::{Hash, Hasher}; |
| 25 | +use std::sync::{Arc, Mutex}; |
| 26 | +use std::time::{Duration, Instant}; |
| 27 | +use tokio::time::sleep; |
| 28 | +use tracing::{error, warn}; |
| 29 | +use zwohash::{HashMap, ZwoHasher}; |
| 30 | + |
| 31 | +#[derive(Default, Clone)] |
| 32 | +pub struct AgentInfos(Arc<Mutex<HashMap<Endpoint, AgentInfoFetcher>>>); |
| 33 | + |
| 34 | +impl AgentInfos { |
| 35 | + /// Ensures a fetcher for the endpoints agent info and keeps it alive for at least as long as |
| 36 | + /// the returned guard exists. |
| 37 | + pub fn query_for(&self, endpoint: Endpoint) -> AgentInfoGuard { |
| 38 | + let mut infos_guard = self.0.lock().unwrap(); |
| 39 | + if let Some(info) = infos_guard.get_mut(&endpoint) { |
| 40 | + info.rc += 1; |
| 41 | + } else { |
| 42 | + infos_guard.insert( |
| 43 | + endpoint.clone(), |
| 44 | + AgentInfoFetcher::new(self.clone(), endpoint.clone()), |
| 45 | + ); |
| 46 | + } |
| 47 | + |
| 48 | + AgentInfoGuard { |
| 49 | + infos: self.clone(), |
| 50 | + endpoint, |
| 51 | + } |
| 52 | + } |
| 53 | +} |
| 54 | + |
| 55 | +pub struct AgentInfoGuard { |
| 56 | + infos: AgentInfos, |
| 57 | + endpoint: Endpoint, |
| 58 | +} |
| 59 | + |
| 60 | +impl AgentInfoGuard { |
| 61 | + pub fn get(&self) -> Shared<ManualFuture<AgentInfoStruct>> { |
| 62 | + let infos_guard = self.infos.0.lock().unwrap(); |
| 63 | + let infos = infos_guard.get(&self.endpoint).unwrap(); |
| 64 | + infos.infos.clone() |
| 65 | + } |
| 66 | +} |
| 67 | + |
| 68 | +impl Drop for AgentInfoGuard { |
| 69 | + fn drop(&mut self) { |
| 70 | + let mut infos_guard = self.infos.0.lock().unwrap(); |
| 71 | + let info = infos_guard.get_mut(&self.endpoint).unwrap(); |
| 72 | + info.last_update = Instant::now(); |
| 73 | + info.rc -= 1; |
| 74 | + } |
| 75 | +} |
| 76 | + |
| 77 | +pub struct AgentInfoFetcher { |
| 78 | + /// Once the last_update is too old, we'll stop the fetcher. |
| 79 | + last_update: Instant, |
| 80 | + /// Will be kept alive forever if rc > 0. |
| 81 | + rc: u32, |
| 82 | + /// The initial fetch is an unresolved future (to be able to await on it), subsequent fetches |
| 83 | + /// are simply directly replacing this with a resolved future. |
| 84 | + infos: Shared<ManualFuture<AgentInfoStruct>>, |
| 85 | +} |
| 86 | + |
| 87 | +impl AgentInfoFetcher { |
| 88 | + fn new(agent_infos: AgentInfos, endpoint: Endpoint) -> AgentInfoFetcher { |
| 89 | + let (future, completer) = ManualFuture::new(); |
| 90 | + tokio::spawn(async move { |
| 91 | + let mut state: Option<String> = None; |
| 92 | + let mut writer = None; |
| 93 | + let mut completer = Some(completer); |
| 94 | + let mut fetch_endpoint = endpoint.clone(); |
| 95 | + let mut parts = fetch_endpoint.url.into_parts(); |
| 96 | + parts.path_and_query = Some(PathAndQuery::from_static("/info")); |
| 97 | + fetch_endpoint.url = hyper::Uri::from_parts(parts).unwrap(); |
| 98 | + loop { |
| 99 | + let fetched = fetch_info_with_state(&fetch_endpoint, state.as_deref()).await; |
| 100 | + let mut complete_fut = None; |
| 101 | + { |
| 102 | + let mut infos_guard = agent_infos.0.lock().unwrap(); |
| 103 | + let infos = infos_guard.get_mut(&endpoint).unwrap(); |
| 104 | + if infos.rc == 0 && infos.last_update.elapsed().as_secs() > 60 { |
| 105 | + break; |
| 106 | + } |
| 107 | + match fetched { |
| 108 | + Ok(FetchInfoStatus::SameState) => {} |
| 109 | + Ok(FetchInfoStatus::NewState(status)) => { |
| 110 | + state = Some(status.state_hash); |
| 111 | + if writer.is_none() { |
| 112 | + writer = match OneWayShmWriter::<NamedShmHandle>::new(info_path( |
| 113 | + &endpoint, |
| 114 | + )) { |
| 115 | + Ok(writer) => Some(writer), |
| 116 | + Err(e) => { |
| 117 | + error!("Failed acquiring an agent info writer: {e:?}"); |
| 118 | + None |
| 119 | + } |
| 120 | + }; |
| 121 | + } |
| 122 | + if let Some(ref writer) = writer { |
| 123 | + writer.write(&serde_json::to_vec(&status.info).unwrap()) |
| 124 | + } |
| 125 | + if let Some(completer) = completer { |
| 126 | + complete_fut = Some(completer.complete(status.info)); |
| 127 | + } else { |
| 128 | + infos.infos = ManualFuture::new_completed(status.info).shared(); |
| 129 | + } |
| 130 | + completer = None; |
| 131 | + } |
| 132 | + Err(e) => { |
| 133 | + // We'll just return the old values as long as the endpoint is |
| 134 | + // unreachable. |
| 135 | + warn!( |
| 136 | + "The agent info for {} could not be fetched: {}", |
| 137 | + fetch_endpoint.url, e |
| 138 | + ); |
| 139 | + } |
| 140 | + } |
| 141 | + } |
| 142 | + if let Some(complete_fut) = complete_fut.take() { |
| 143 | + complete_fut.await; |
| 144 | + } |
| 145 | + sleep(Duration::from_secs(60)).await; |
| 146 | + } |
| 147 | + agent_infos.0.lock().unwrap().remove(&endpoint); |
| 148 | + }); |
| 149 | + |
| 150 | + AgentInfoFetcher { |
| 151 | + last_update: Instant::now(), |
| 152 | + rc: 1, |
| 153 | + infos: future.shared(), |
| 154 | + } |
| 155 | + } |
| 156 | +} |
| 157 | + |
| 158 | +fn info_path(endpoint: &Endpoint) -> CString { |
| 159 | + let mut hasher = ZwoHasher::default(); |
| 160 | + endpoint.hash(&mut hasher); |
| 161 | + let mut path = format!( |
| 162 | + "/ddinf{}-{}", |
| 163 | + primary_sidecar_identifier(), |
| 164 | + BASE64_URL_SAFE_NO_PAD.encode(hasher.finish().to_ne_bytes()), |
| 165 | + ); |
| 166 | + // datadog agent info, on macos we're restricted to 31 chars |
| 167 | + path.truncate(31); // should not be larger than 31 chars, but be sure. |
| 168 | + CString::new(path).unwrap() |
| 169 | +} |
| 170 | + |
| 171 | +pub struct AgentInfoReader { |
| 172 | + reader: OneWayShmReader<NamedShmHandle, CString>, |
| 173 | + info: Option<AgentInfoStruct>, |
| 174 | +} |
| 175 | + |
| 176 | +impl AgentInfoReader { |
| 177 | + pub fn new(endpoint: &Endpoint) -> AgentInfoReader { |
| 178 | + let path = info_path(endpoint); |
| 179 | + AgentInfoReader { |
| 180 | + reader: OneWayShmReader::new(open_named_shm(&path).ok(), path), |
| 181 | + info: None, |
| 182 | + } |
| 183 | + } |
| 184 | + |
| 185 | + pub fn read(&mut self) -> (bool, &Option<AgentInfoStruct>) { |
| 186 | + let (updated, data) = self.reader.read(); |
| 187 | + if updated { |
| 188 | + match serde_json::from_slice(data) { |
| 189 | + Ok(info) => self.info = Some(info), |
| 190 | + Err(e) => error!("Failed deserializing the agent info: {e:?}"), |
| 191 | + } |
| 192 | + } |
| 193 | + (updated, &self.info) |
| 194 | + } |
| 195 | +} |
| 196 | + |
| 197 | +#[cfg(test)] |
| 198 | +mod tests { |
| 199 | + use super::*; |
| 200 | + use httpmock::prelude::*; |
| 201 | + |
| 202 | + const TEST_INFO: &str = r#"{ |
| 203 | + "config": { |
| 204 | + "default_env": "testenv" |
| 205 | + } |
| 206 | + }"#; |
| 207 | + |
| 208 | + const TEST_INFO_HASH: &str = "8c732aba385d605b010cd5bd12c03fef402eaefce989f0055aa4c7e92fe30077"; |
| 209 | + |
| 210 | + #[cfg_attr(miri, ignore)] |
| 211 | + #[tokio::test] |
| 212 | + async fn test_fetch_info_without_state() { |
| 213 | + let server = MockServer::start(); |
| 214 | + let mock = server |
| 215 | + .mock_async(|when, then| { |
| 216 | + when.path("/info"); |
| 217 | + then.status(200) |
| 218 | + .header("content-type", "application/json") |
| 219 | + .header("datadog-agent-state", TEST_INFO_HASH) |
| 220 | + .body(TEST_INFO); |
| 221 | + }) |
| 222 | + .await; |
| 223 | + let endpoint = Endpoint::from_url(server.url("/").parse().unwrap()); |
| 224 | + let agent_infos = AgentInfos::default(); |
| 225 | + |
| 226 | + let mut reader = AgentInfoReader::new(&endpoint); |
| 227 | + assert_eq!(reader.read(), (false, &None)); |
| 228 | + |
| 229 | + let info = agent_infos.query_for(endpoint).get().await; |
| 230 | + mock.assert(); |
| 231 | + assert_eq!( |
| 232 | + info.config.unwrap().default_env, |
| 233 | + Some("testenv".to_string()) |
| 234 | + ); |
| 235 | + |
| 236 | + let (updated, info) = reader.read(); |
| 237 | + assert!(updated); |
| 238 | + assert_eq!( |
| 239 | + info.as_ref().unwrap().config.as_ref().unwrap().default_env, |
| 240 | + Some("testenv".to_string()) |
| 241 | + ); |
| 242 | + } |
| 243 | +} |
0 commit comments