From a49fe1e2fdda732539f12ccf318f20d00e64f1cc Mon Sep 17 00:00:00 2001 From: Gaius Date: Mon, 26 May 2025 16:24:21 +0800 Subject: [PATCH] feat(tracing): migrate to OpenTelemetry OTLP and enrich trace metadata Signed-off-by: Gaius --- Cargo.lock | 199 ++++++++------------- Cargo.toml | 16 +- dragonfly-client-init/src/bin/main.rs | 1 + dragonfly-client/Cargo.toml | 10 +- dragonfly-client/src/bin/dfcache/export.rs | 1 + dragonfly-client/src/bin/dfcache/import.rs | 1 + dragonfly-client/src/bin/dfcache/stat.rs | 1 + dragonfly-client/src/bin/dfdaemon/main.rs | 4 +- dragonfly-client/src/bin/dfget/main.rs | 1 + dragonfly-client/src/tracing/mod.rs | 60 +++++-- 10 files changed, 143 insertions(+), 151 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 28c5ab6e..95f65d12 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -824,19 +824,6 @@ dependencies = [ "typenum", ] -[[package]] -name = "dashmap" -version = "5.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" -dependencies = [ - "cfg-if", - "hashbrown 0.14.3", - "lock_api", - "once_cell", - "parking_lot_core 0.9.9", -] - [[package]] name = "data-encoding" version = "2.5.0" @@ -953,7 +940,7 @@ dependencies = [ [[package]] name = "dragonfly-client" -version = "0.2.30" +version = "0.2.31" dependencies = [ "anyhow", "bytes", @@ -983,7 +970,8 @@ dependencies = [ "lru", "openssl", "opentelemetry", - "opentelemetry-jaeger", + "opentelemetry-otlp", + "opentelemetry_sdk", "path-absolutize", "percent-encoding", "pprof", @@ -1011,7 +999,6 @@ dependencies = [ "tower", "tracing", "tracing-appender", - "tracing-log 0.2.0", "tracing-opentelemetry", "tracing-subscriber", "url", @@ -1022,7 +1009,7 @@ dependencies = [ [[package]] name = "dragonfly-client-backend" -version = "0.2.30" +version = "0.2.31" dependencies = [ "dragonfly-api", "dragonfly-client-core", @@ -1053,7 +1040,7 @@ dependencies = [ [[package]] name = "dragonfly-client-config" -version = "0.2.30" +version = "0.2.31" dependencies = [ "bytesize", "bytesize-serde", @@ -1081,7 +1068,7 @@ dependencies = [ [[package]] name = "dragonfly-client-core" -version = "0.2.30" +version = "0.2.31" dependencies = [ "headers 0.4.0", "hyper 1.6.0", @@ -1099,7 +1086,7 @@ dependencies = [ [[package]] name = "dragonfly-client-init" -version = "0.2.30" +version = "0.2.31" dependencies = [ "anyhow", "clap", @@ -1117,7 +1104,7 @@ dependencies = [ [[package]] name = "dragonfly-client-storage" -version = "0.2.30" +version = "0.2.31" dependencies = [ "bincode", "bytes", @@ -1145,7 +1132,7 @@ dependencies = [ [[package]] name = "dragonfly-client-util" -version = "0.2.30" +version = "0.2.31" dependencies = [ "base64 0.22.1", "bytesize", @@ -1560,7 +1547,7 @@ dependencies = [ [[package]] name = "hdfs" -version = "0.2.30" +version = "0.2.31" dependencies = [ "dragonfly-client-backend", "dragonfly-client-core", @@ -2115,12 +2102,6 @@ dependencies = [ "web-sys", ] -[[package]] -name = "integer-encoding" -version = "3.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" - [[package]] name = "inventory" version = "0.3.15" @@ -2832,85 +2813,61 @@ dependencies = [ [[package]] name = "opentelemetry" -version = "0.18.0" +version = "0.29.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69d6c3d7288a106c0a363e4b0e8d308058d56902adefb16f4936f417ffef086e" +checksum = "9e87237e2775f74896f9ad219d26a2081751187eb7c9f5c58dde20a23b95d16c" dependencies = [ - "opentelemetry_api", - "opentelemetry_sdk", + "futures-core", + "futures-sink", + "js-sys", + "pin-project-lite", + "thiserror 2.0.12", + "tracing", ] [[package]] -name = "opentelemetry-jaeger" -version = "0.17.0" +name = "opentelemetry-otlp" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e785d273968748578931e4dc3b4f5ec86b26e09d9e0d66b55adda7fce742f7a" +checksum = "d899720fe06916ccba71c01d04ecd77312734e2de3467fd30d9d580c8ce85656" dependencies = [ - "async-trait", - "futures", - "futures-executor", - "once_cell", + "futures-core", + "http 1.3.1", "opentelemetry", - "opentelemetry-semantic-conventions", - "thiserror 1.0.69", - "thrift", + "opentelemetry-proto", + "opentelemetry_sdk", + "prost 0.13.5", + "thiserror 2.0.12", "tokio", + "tonic", ] [[package]] -name = "opentelemetry-semantic-conventions" -version = "0.10.0" +name = "opentelemetry-proto" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b02e0230abb0ab6636d18e2ba8fa02903ea63772281340ccac18e0af3ec9eeb" +checksum = "8c40da242381435e18570d5b9d50aca2a4f4f4d8e146231adb4e7768023309b3" dependencies = [ "opentelemetry", -] - -[[package]] -name = "opentelemetry_api" -version = "0.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c24f96e21e7acc813c7a8394ee94978929db2bcc46cf6b5014fc612bf7760c22" -dependencies = [ - "fnv", - "futures-channel", - "futures-util", - "indexmap 1.9.3", - "js-sys", - "once_cell", - "pin-project-lite", - "thiserror 1.0.69", + "opentelemetry_sdk", + "prost 0.13.5", + "tonic", ] [[package]] name = "opentelemetry_sdk" -version = "0.18.0" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ca41c4933371b61c2a2f214bf16931499af4ec90543604ec828f7a625c09113" +checksum = "afdefb21d1d47394abc1ba6c57363ab141be19e27cc70d0e422b7f303e4d290b" dependencies = [ - "async-trait", - "crossbeam-channel", - "dashmap", - "fnv", "futures-channel", "futures-executor", "futures-util", - "once_cell", - "opentelemetry_api", + "glob", + "opentelemetry", "percent-encoding", - "rand 0.8.5", - "thiserror 1.0.69", - "tokio", - "tokio-stream", -] - -[[package]] -name = "ordered-float" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7" -dependencies = [ - "num-traits", + "rand 0.9.1", + "thiserror 2.0.12", ] [[package]] @@ -3617,10 +3574,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha", + "rand_chacha 0.3.1", "rand_core 0.6.4", ] +[[package]] +name = "rand" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.3", +] + [[package]] name = "rand_chacha" version = "0.3.1" @@ -3631,6 +3598,16 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.3", +] + [[package]] name = "rand_core" version = "0.3.1" @@ -3655,6 +3632,15 @@ dependencies = [ "getrandom 0.2.12", ] +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom 0.3.1", +] + [[package]] name = "rayon" version = "1.10.0" @@ -4695,28 +4681,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "threadpool" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" -dependencies = [ - "num_cpus", -] - -[[package]] -name = "thrift" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09678c4cdbb4eed72e18b7c2af1329c69825ed16fcbac62d083fc3e2b0590ff0" -dependencies = [ - "byteorder", - "integer-encoding", - "log", - "ordered-float", - "threadpool", -] - [[package]] name = "tikv-jemalloc-ctl" version = "0.5.4" @@ -5113,17 +5077,6 @@ dependencies = [ "valuable", ] -[[package]] -name = "tracing-log" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f751112709b4e791d8ce53e32c4ed2d353565a795ce84da2285393f41557bdf2" -dependencies = [ - "log", - "once_cell", - "tracing-core", -] - [[package]] name = "tracing-log" version = "0.2.0" @@ -5137,16 +5090,20 @@ dependencies = [ [[package]] name = "tracing-opentelemetry" -version = "0.18.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21ebb87a95ea13271332df069020513ab70bdb5637ca42d6e492dc3bbbad48de" +checksum = "fd8e764bd6f5813fd8bebc3117875190c5b0415be8f7f8059bffb6ecd979c444" dependencies = [ + "js-sys", "once_cell", "opentelemetry", + "opentelemetry_sdk", + "smallvec", "tracing", "tracing-core", - "tracing-log 0.1.4", + "tracing-log", "tracing-subscriber", + "web-time", ] [[package]] @@ -5166,7 +5123,7 @@ dependencies = [ "time", "tracing", "tracing-core", - "tracing-log 0.2.0", + "tracing-log", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 18becb0a..6e529dbe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ members = [ ] [workspace.package] -version = "0.2.30" +version = "0.2.31" authors = ["The Dragonfly Developers"] homepage = "https://d7y.io/" repository = "https://github.com/dragonflyoss/client.git" @@ -22,13 +22,13 @@ readme = "README.md" edition = "2021" [workspace.dependencies] -dragonfly-client = { path = "dragonfly-client", version = "0.2.30" } -dragonfly-client-core = { path = "dragonfly-client-core", version = "0.2.30" } -dragonfly-client-config = { path = "dragonfly-client-config", version = "0.2.30" } -dragonfly-client-storage = { path = "dragonfly-client-storage", version = "0.2.30" } -dragonfly-client-backend = { path = "dragonfly-client-backend", version = "0.2.30" } -dragonfly-client-util = { path = "dragonfly-client-util", version = "0.2.30" } -dragonfly-client-init = { path = "dragonfly-client-init", version = "0.2.30" } +dragonfly-client = { path = "dragonfly-client", version = "0.2.31" } +dragonfly-client-core = { path = "dragonfly-client-core", version = "0.2.31" } +dragonfly-client-config = { path = "dragonfly-client-config", version = "0.2.31" } +dragonfly-client-storage = { path = "dragonfly-client-storage", version = "0.2.31" } +dragonfly-client-backend = { path = "dragonfly-client-backend", version = "0.2.31" } +dragonfly-client-util = { path = "dragonfly-client-util", version = "0.2.31" } +dragonfly-client-init = { path = "dragonfly-client-init", version = "0.2.31" } dragonfly-api = "=2.1.39" thiserror = "2.0" futures = "0.3.31" diff --git a/dragonfly-client-init/src/bin/main.rs b/dragonfly-client-init/src/bin/main.rs index cb3d44fd..b69b5ffa 100644 --- a/dragonfly-client-init/src/bin/main.rs +++ b/dragonfly-client-init/src/bin/main.rs @@ -94,6 +94,7 @@ async fn main() -> Result<(), anyhow::Error> { args.log_level, args.log_max_files, None, + None, args.verbose, ); diff --git a/dragonfly-client/Cargo.toml b/dragonfly-client/Cargo.toml index b7b20fdd..af4b5d8a 100644 --- a/dragonfly-client/Cargo.toml +++ b/dragonfly-client/Cargo.toml @@ -63,13 +63,13 @@ lru.workspace = true fs2.workspace = true lazy_static.workspace = true futures.workspace = true -tracing-log = "0.2" -tracing-subscriber = { version = "0.3", features = ["env-filter", "time", "chrono"] } tracing-appender = "0.2.3" +tracing-subscriber = { version = "0.3", features = ["env-filter", "time", "chrono"] } +tracing-opentelemetry = "0.30.0" +opentelemetry = { version = "0.29.1", default-features = false, features = ["trace"] } +opentelemetry-otlp = { version = "0.29.0", default-features = false, features = ["trace", "grpc-tonic"] } +opentelemetry_sdk = { version = "0.29.0", default-features = false, features = ["trace"] } rolling-file = "0.2.0" -tracing-opentelemetry = "0.18.0" -opentelemetry = { version = "0.18.0", default-features = false, features = ["trace", "rt-tokio"] } -opentelemetry-jaeger = { version = "0.17.0", features = ["rt-tokio"] } pprof = { version = "0.14", features = ["flamegraph", "protobuf-codec"] } prometheus = { version = "0.13", features = ["process"] } tonic-health = "0.12.3" diff --git a/dragonfly-client/src/bin/dfcache/export.rs b/dragonfly-client/src/bin/dfcache/export.rs index 9045e7f5..c0aa5719 100644 --- a/dragonfly-client/src/bin/dfcache/export.rs +++ b/dragonfly-client/src/bin/dfcache/export.rs @@ -137,6 +137,7 @@ impl ExportCommand { self.log_level, self.log_max_files, None, + None, self.verbose, ); diff --git a/dragonfly-client/src/bin/dfcache/import.rs b/dragonfly-client/src/bin/dfcache/import.rs index ecc89630..11290e43 100644 --- a/dragonfly-client/src/bin/dfcache/import.rs +++ b/dragonfly-client/src/bin/dfcache/import.rs @@ -143,6 +143,7 @@ impl ImportCommand { self.log_level, self.log_max_files, None, + None, self.verbose, ); diff --git a/dragonfly-client/src/bin/dfcache/stat.rs b/dragonfly-client/src/bin/dfcache/stat.rs index 6e410c43..4bbc2968 100644 --- a/dragonfly-client/src/bin/dfcache/stat.rs +++ b/dragonfly-client/src/bin/dfcache/stat.rs @@ -89,6 +89,7 @@ impl StatCommand { self.log_level, self.log_max_files, None, + None, self.verbose, ); diff --git a/dragonfly-client/src/bin/dfdaemon/main.rs b/dragonfly-client/src/bin/dfdaemon/main.rs index 3e83a114..085e73d1 100644 --- a/dragonfly-client/src/bin/dfdaemon/main.rs +++ b/dragonfly-client/src/bin/dfdaemon/main.rs @@ -30,8 +30,7 @@ use dragonfly_client::shutdown; use dragonfly_client::stats::Stats; use dragonfly_client::tracing::init_tracing; use dragonfly_client_backend::BackendFactory; -use dragonfly_client_config::dfdaemon; -use dragonfly_client_config::VersionValueParser; +use dragonfly_client_config::{dfdaemon, VersionValueParser}; use dragonfly_client_storage::Storage; use dragonfly_client_util::id_generator::IDGenerator; use std::net::SocketAddr; @@ -151,6 +150,7 @@ async fn main() -> Result<(), anyhow::Error> { args.log_level, args.log_max_files, config.tracing.addr.to_owned(), + Some(config.host.clone()), args.verbose, ); diff --git a/dragonfly-client/src/bin/dfget/main.rs b/dragonfly-client/src/bin/dfget/main.rs index b547ff57..dfb2d073 100644 --- a/dragonfly-client/src/bin/dfget/main.rs +++ b/dragonfly-client/src/bin/dfget/main.rs @@ -308,6 +308,7 @@ async fn main() -> anyhow::Result<()> { args.log_level, args.log_max_files, None, + None, args.verbose, ); diff --git a/dragonfly-client/src/tracing/mod.rs b/dragonfly-client/src/tracing/mod.rs index 88144f2b..de003749 100644 --- a/dragonfly-client/src/tracing/mod.rs +++ b/dragonfly-client/src/tracing/mod.rs @@ -14,13 +14,17 @@ * limitations under the License. */ -use opentelemetry::sdk::propagation::TraceContextPropagator; +use dragonfly_client_config::dfdaemon::Host; +use opentelemetry::{global, trace::TracerProvider}; +use opentelemetry_otlp::WithExportConfig; +use opentelemetry_sdk::{propagation::TraceContextPropagator, Resource}; use rolling_file::*; use std::fs; use std::path::PathBuf; +use std::time::Duration; use tracing::{info, Level}; use tracing_appender::non_blocking::WorkerGuard; -use tracing_log::LogTracer; +use tracing_opentelemetry::OpenTelemetryLayer; use tracing_subscriber::{ filter::LevelFilter, fmt::{time::ChronoLocal, Layer}, @@ -28,6 +32,9 @@ use tracing_subscriber::{ EnvFilter, Registry, }; +/// SPAN_EXPORTER_TIMEOUT is the timeout for the span exporter. +const SPAN_EXPORTER_TIMEOUT: Duration = Duration::from_secs(10); + /// init_tracing initializes the tracing system. #[allow(clippy::too_many_arguments)] pub fn init_tracing( @@ -36,6 +43,7 @@ pub fn init_tracing( log_level: Level, log_max_files: usize, jaeger_addr: Option, + host: Option, verbose: bool, ) -> Vec { let mut guards = vec![]; @@ -95,24 +103,46 @@ pub fn init_tracing( // Setup jaeger layer. if let Some(jaeger_addr) = jaeger_addr { - opentelemetry::global::set_text_map_propagator(TraceContextPropagator::new()); - let tracer = opentelemetry_jaeger::new_agent_pipeline() - .with_service_name(name) + let otlp_exporter = opentelemetry_otlp::SpanExporter::builder() + .with_tonic() .with_endpoint(jaeger_addr) - .install_batch(opentelemetry::runtime::Tokio) - .expect("install"); - let jaeger_layer = tracing_opentelemetry::layer().with_tracer(tracer); - let subscriber = subscriber.with(jaeger_layer); + .with_timeout(SPAN_EXPORTER_TIMEOUT) + .build() + .expect("failed to create OTLP exporter"); + + let host = host.unwrap(); + let provider = opentelemetry_sdk::trace::SdkTracerProvider::builder() + .with_batch_exporter(otlp_exporter) + .with_resource( + Resource::builder() + .with_service_name(name.to_owned()) + .with_attribute(opentelemetry::KeyValue::new( + "idc", + host.idc.unwrap_or_default(), + )) + .with_attribute(opentelemetry::KeyValue::new( + "location", + host.location.unwrap_or_default(), + )) + .with_attribute(opentelemetry::KeyValue::new("hostname", host.hostname)) + .with_attribute(opentelemetry::KeyValue::new( + "ip", + host.ip.unwrap().to_string(), + )) + .build(), + ) + .build(); - tracing::subscriber::set_global_default(subscriber) - .expect("failed to set global subscriber"); + let tracer = provider.tracer(name.to_string()); + global::set_tracer_provider(provider.clone()); + global::set_text_map_propagator(TraceContextPropagator::new()); + + let jaeger_layer = OpenTelemetryLayer::new(tracer); + subscriber.with(jaeger_layer).init(); } else { - tracing::subscriber::set_global_default(subscriber) - .expect("failed to set global subscriber"); + subscriber.init(); } - LogTracer::init().expect("failed to init LogTracer"); - info!( "tracing initialized directory: {}, level: {}", log_dir.as_path().display(),