diff --git a/Cargo.lock b/Cargo.lock index 31803476..c8727cd4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -24,6 +24,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" dependencies = [ "cfg-if", + "getrandom", "once_cell", "version_check", "zerocopy", @@ -113,6 +114,12 @@ version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + [[package]] name = "ascii-canvas" version = "3.0.0" @@ -576,6 +583,12 @@ version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" +[[package]] +name = "bytemuck" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" + [[package]] name = "byteorder" version = "1.5.0" @@ -734,6 +747,15 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +[[package]] +name = "cpp_demangle" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e8227005286ec39567949b33df9896bcadfa6051bccca2488129f108ca23119" +dependencies = [ + "cfg-if", +] + [[package]] name = "cpufeatures" version = "0.2.12" @@ -821,6 +843,15 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e962a19be5cfc3f3bf6dd8f61eb50107f356ad6270fbb3ed41476571db78be5" +[[package]] +name = "debugid" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d" +dependencies = [ + "uuid", +] + [[package]] name = "der-parser" version = "8.2.0" @@ -933,6 +964,7 @@ dependencies = [ "openssl", "opentelemetry", "opentelemetry-jaeger", + "pprof", "prometheus", "prost-wkt-types", "rcgen", @@ -1197,6 +1229,18 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" +[[package]] +name = "findshlibs" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64" +dependencies = [ + "cc", + "lazy_static", + "libc", + "winapi", +] + [[package]] name = "fixedbitset" version = "0.4.2" @@ -1894,6 +1938,24 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "inferno" +version = "0.11.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "321f0f839cd44a4686e9504b0a62b4d69a50b62072144c71c68f5873c167b8d9" +dependencies = [ + "ahash", + "indexmap 2.2.1", + "is-terminal", + "itoa", + "log", + "num-format", + "once_cell", + "quick-xml", + "rgb", + "str_stack", +] + [[package]] name = "instant" version = "0.1.12" @@ -1932,6 +1994,17 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" +[[package]] +name = "is-terminal" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "itertools" version = "0.10.5" @@ -2181,6 +2254,15 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +[[package]] +name = "memmap2" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" +dependencies = [ + "libc", +] + [[package]] name = "mime" version = "0.3.17" @@ -2296,6 +2378,17 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" +[[package]] +name = "nix" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "libc", +] + [[package]] name = "nom" version = "7.1.3" @@ -2336,6 +2429,16 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-format" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3" +dependencies = [ + "arrayvec", + "itoa", +] + [[package]] name = "num-integer" version = "0.1.45" @@ -2705,6 +2808,29 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "pprof" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef5c97c51bd34c7e742402e216abdeb44d415fbe6ae41d56b114723e953711cb" +dependencies = [ + "backtrace", + "cfg-if", + "findshlibs", + "inferno", + "libc", + "log", + "nix", + "once_cell", + "parking_lot", + "protobuf", + "protobuf-codegen-pure", + "smallvec", + "symbolic-demangle", + "tempfile", + "thiserror", +] + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -2913,6 +3039,34 @@ version = "2.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" +[[package]] +name = "protobuf-codegen" +version = "2.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "033460afb75cf755fcfc16dfaed20b86468082a2ea24e05ac35ab4a099a017d6" +dependencies = [ + "protobuf", +] + +[[package]] +name = "protobuf-codegen-pure" +version = "2.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95a29399fc94bcd3eeaa951c715f7bea69409b2445356b00519740bcd6ddd865" +dependencies = [ + "protobuf", + "protobuf-codegen", +] + +[[package]] +name = "quick-xml" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd" +dependencies = [ + "memchr", +] + [[package]] name = "quote" version = "1.0.35" @@ -3147,6 +3301,15 @@ dependencies = [ "winreg", ] +[[package]] +name = "rgb" +version = "0.8.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05aaa8004b64fd573fc9d002f4e632d51ad4f026c2b5ba95fcb6c2f32c2c47d8" +dependencies = [ + "bytemuck", +] + [[package]] name = "ring" version = "0.16.20" @@ -3574,6 +3737,18 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "str_stack" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb" + [[package]] name = "string_cache" version = "0.8.7" @@ -3599,6 +3774,29 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" +[[package]] +name = "symbolic-common" +version = "12.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cccfffbc6bb3bb2d3a26cd2077f4d055f6808d266f9d4d158797a4c60510dfe" +dependencies = [ + "debugid", + "memmap2", + "stable_deref_trait", + "uuid", +] + +[[package]] +name = "symbolic-demangle" +version = "12.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a99812da4020a67e76c4eb41f08c87364c14170495ff780f30dd519c221a68" +dependencies = [ + "cpp_demangle", + "rustc-demangle", + "symbolic-common", +] + [[package]] name = "syn" version = "1.0.109" diff --git a/Cargo.toml b/Cargo.toml index b72f3a95..20646edb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -74,5 +74,8 @@ anyhow = "1.0.81" toml_edit = "0.22.8" toml = "0.8.12" +[profile.release] +debug = true + [profile.bench] debug = true diff --git a/dragonfly-client-config/src/dfdaemon.rs b/dragonfly-client-config/src/dfdaemon.rs index 7adc56d3..f06e6277 100644 --- a/dragonfly-client-config/src/dfdaemon.rs +++ b/dragonfly-client-config/src/dfdaemon.rs @@ -96,6 +96,12 @@ fn default_metrics_server_port() -> u16 { 4002 } +// default_stats_server_port is the default port of the stats server. +#[inline] +fn default_stats_server_port() -> u16 { + 4004 +} + // default_download_rate_limit is the default rate limit of the download speed in bps(bytes per second). #[inline] fn default_download_rate_limit() -> u64 { @@ -851,6 +857,36 @@ pub struct Metrics { pub server: MetricsServer, } +// StatsServer is the stats server configuration for dfdaemon. +#[derive(Debug, Clone, Validate, Deserialize)] +#[serde(default, rename_all = "camelCase")] +pub struct StatsServer { + // ip is the listen ip of the stats server. + pub ip: Option, + + // port is the port to the stats server. + #[serde(default = "default_stats_server_port")] + pub port: u16, +} + +// StatsServer implements Default. +impl Default for StatsServer { + fn default() -> Self { + Self { + ip: None, + port: default_stats_server_port(), + } + } +} + +// Stats is the stats configuration for dfdaemon. +#[derive(Debug, Clone, Default, Validate, Deserialize)] +#[serde(default, rename_all = "camelCase")] +pub struct Stats { + // server is the stats server configuration for dfdaemon. + pub server: StatsServer, +} + // Tracing is the tracing configuration for dfdaemon. #[derive(Debug, Clone, Default, Validate, Deserialize)] #[serde(default, rename_all = "camelCase")] @@ -919,6 +955,10 @@ pub struct Config { #[validate] pub metrics: Metrics, + // stats is the stats configuration for dfdaemon. + #[validate] + pub stats: Stats, + // tracing is the tracing configuration for dfdaemon. #[validate] pub tracing: Tracing, @@ -983,6 +1023,15 @@ impl Config { } } + // Convert stats server listen ip. + if self.stats.server.ip.is_none() { + self.stats.server.ip = if self.network.enable_ipv6 { + Some(Ipv6Addr::UNSPECIFIED.into()) + } else { + Some(Ipv4Addr::UNSPECIFIED.into()) + } + } + // Convert proxy server listen ip. if self.proxy.server.ip.is_none() { self.proxy.server.ip = if self.network.enable_ipv6 { diff --git a/dragonfly-client/Cargo.toml b/dragonfly-client/Cargo.toml index 2a0cc6be..334c0dc1 100644 --- a/dragonfly-client/Cargo.toml +++ b/dragonfly-client/Cargo.toml @@ -59,6 +59,7 @@ tracing-appender = "0.2.3" tracing-opentelemetry = "0.18.0" opentelemetry = { version = "0.18.0", default-features = false, features = ["trace", "rt-tokio"] } opentelemetry-jaeger = { version = "0.17.0", features = ["rt-tokio"] } +pprof = { version = "0.13", features = ["flamegraph", "protobuf-codec"] } lazy_static = "1.4" prometheus = "0.13.3" tonic-health = "0.9.2" diff --git a/dragonfly-client/src/bin/dfdaemon/main.rs b/dragonfly-client/src/bin/dfdaemon/main.rs index f3ee4e11..fdf30d95 100644 --- a/dragonfly-client/src/bin/dfdaemon/main.rs +++ b/dragonfly-client/src/bin/dfdaemon/main.rs @@ -26,6 +26,7 @@ use dragonfly_client::health::Health; use dragonfly_client::metrics::Metrics; use dragonfly_client::proxy::Proxy; use dragonfly_client::shutdown; +use dragonfly_client::stats::Stats; use dragonfly_client::task::Task; use dragonfly_client::tracing::init_tracing; use dragonfly_client_backend::BackendFactory; @@ -200,6 +201,13 @@ async fn main() -> Result<(), anyhow::Error> { shutdown_complete_tx.clone(), ); + // Initialize stats server. + let stats = Stats::new( + SocketAddr::new(config.stats.server.ip.unwrap(), config.stats.server.port), + shutdown.clone(), + shutdown_complete_tx.clone(), + ); + // Initialize proxy server. let proxy = Proxy::new( config.clone(), @@ -272,6 +280,10 @@ async fn main() -> Result<(), anyhow::Error> { info!("metrics server exited"); }, + _ = tokio::spawn(async move { stats.run().await }) => { + info!("stats server exited"); + }, + _ = tokio::spawn(async move { proxy.run().await }) => { info!("proxy server exited"); }, diff --git a/dragonfly-client/src/lib.rs b/dragonfly-client/src/lib.rs index cbb8e5d5..2ba85e6a 100644 --- a/dragonfly-client/src/lib.rs +++ b/dragonfly-client/src/lib.rs @@ -22,5 +22,6 @@ pub mod health; pub mod metrics; pub mod proxy; pub mod shutdown; +pub mod stats; pub mod task; pub mod tracing; diff --git a/dragonfly-client/src/stats/mod.rs b/dragonfly-client/src/stats/mod.rs new file mode 100644 index 00000000..c849c372 --- /dev/null +++ b/dragonfly-client/src/stats/mod.rs @@ -0,0 +1,146 @@ +/* + * Copyright 2024 The Dragonfly Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +use crate::shutdown; +use pprof::protos::Message; +use pprof::ProfilerGuard; +use serde::{Deserialize, Serialize}; +use std::net::SocketAddr; +use std::time::Duration; +use tokio::sync::mpsc; +use tracing::{error, info}; +use warp::{Filter, Rejection, Reply}; + +// DEFAULT_PROFILER_SECONDS is the default seconds to start profiling. +const DEFAULT_PROFILER_SECONDS: u64 = 10; + +// DEFAULT_PROFILER_FREQUENCY is the default frequency to start profiling. +const DEFAULT_PROFILER_FREQUENCY: i32 = 1000; + +// PProfQueryParams is the query params to start profiling. +#[derive(Deserialize, Serialize)] +#[serde(default)] +pub struct PProfQueryParams { + // seconds is the seconds to start profiling. + pub seconds: u64, + + // frequency is the frequency to start profiling. + pub frequency: i32, +} + +// PProfQueryParams implements the default. +impl Default for PProfQueryParams { + fn default() -> Self { + Self { + seconds: DEFAULT_PROFILER_SECONDS, + frequency: DEFAULT_PROFILER_FREQUENCY, + } + } +} + +// Stats is the stats server. +#[derive(Debug)] +pub struct Stats { + // addr is the address of the stats server. + addr: SocketAddr, + + // shutdown is used to shutdown the stats server. + shutdown: shutdown::Shutdown, + + // _shutdown_complete is used to notify the stats server is shutdown. + _shutdown_complete: mpsc::UnboundedSender<()>, +} + +// Stats implements the stats server. +impl Stats { + // new creates a new Stats. + pub fn new( + addr: SocketAddr, + shutdown: shutdown::Shutdown, + shutdown_complete_tx: mpsc::UnboundedSender<()>, + ) -> Self { + Self { + addr, + shutdown, + _shutdown_complete: shutdown_complete_tx, + } + } + + // run starts the stats server. + pub async fn run(&self) { + // Clone the shutdown channel. + let mut shutdown = self.shutdown.clone(); + + // Create the stats route. + let stats_route = warp::path!("debug" / "pprof" / "profile") + .and(warp::get()) + .and(warp::query::()) + .and_then(Self::stats_handler); + + // Start the stats server and wait for it to finish. + info!("stats server listening on {}", self.addr); + tokio::select! { + _ = warp::serve(stats_route).run(self.addr) => { + // Stats server ended. + info!("stats server ended"); + } + _ = shutdown.recv() => { + // Stats server shutting down with signals. + info!("stats server shutting down"); + } + } + } + + // stats_handler handles the stats request. + async fn stats_handler(query_params: PProfQueryParams) -> Result { + info!( + "start profiling for {} seconds with {} frequency", + query_params.seconds, query_params.frequency + ); + let guard = match ProfilerGuard::new(query_params.frequency) { + Ok(guard) => guard, + Err(err) => { + error!("failed to create profiler guard: {}", err); + return Err(warp::reject::reject()); + } + }; + + tokio::time::sleep(Duration::from_secs(query_params.seconds)).await; + let report = match guard.report().build() { + Ok(report) => report, + Err(err) => { + error!("failed to build profiler report: {}", err); + return Err(warp::reject::reject()); + } + }; + + let profile = match report.pprof() { + Ok(profile) => profile, + Err(err) => { + error!("failed to get pprof profile: {}", err); + return Err(warp::reject::reject()); + } + }; + + let mut body: Vec = Vec::new(); + profile.write_to_vec(&mut body).map_err(|err| { + error!("failed to write pprof profile: {}", err); + warp::reject::reject() + })?; + + Ok(body) + } +}