Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ version = "0.2.0"
edition = "2024"

[dependencies]
anyhow = { version = "1.0.99", features = ["backtrace"] }
libc = "0.2.175"
tracing = "0.1.41"
tracing-subscriber = { version = "0.3.20", features = ["env-filter"] }
Expand Down
27 changes: 22 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,33 @@
Acolyte is a lightweight resource monitoring tool designed to collect statistics in containerized environments,
particularly Kubernetes.

Acolyte monitors CPU, memory, and GPU utilization and writes the data to JSON files for easy consumption by other
services. It's designed to run alongside your application in the same container and built with compatibility in mind.
Acolyte monitors CPU, memory, and GPU utilization and writes the data as JSON to rotated files or standard output for
easy consumption by other services.
It's designed to run alongside your application in the same container and built with compatibility in mind.

Acolyte is configured through environment variables:
Acolyte is configured through environment variables – by default, you don't necessarily have to configure anything.

### General

* `RUST_LOG`: log level e.g. debug; default: info
* `ACOLYTE_STATS_DIR`: directory where stat files are written; default: /tmp/acolyte/stats
* `ACOLYTE_NO_RESTART`: if set, Acolyte will not restart itself if it encounters an error; default: false

### Collection

* `ACOLYTE_STAT_INTERVAL_MS`: interval between stats collection in milliseconds; default: 5000
* `ACOLYTE_MAX_STATS_ENTRIES`: maximum number of stat files to keep; default: 12
* `ACOLYTE_CPU_SAMPLE_RATE_MS`: sample window for CPU usage in milliseconds; default: 100

### Output

* `ACOLYTE_OUTPUT_MODE`: `dir` (default): write to files in `ACOLYTE_STATS_DIR`, `stdout`: write to standard output

#### Stats directory

* `ACOLYTE_STATS_DIR`: directory where stat files are written; default: /tmp/acolyte/stats
* `ACOLYTE_MAX_STATS_ENTRIES`: maximum number of stat files to keep; default: 12

### Sentry

* `SENTRY_DSN`: optional Sentry DSN for error reporting
* `CLUSTER_NAME`: optional cluster identification for Sentry

Expand Down
38 changes: 32 additions & 6 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,53 @@ use std::path::PathBuf;
use std::time::Duration;
use uuid::Uuid;

pub struct JsonlToStdoutConfig {
pub prefix: String,
}

pub struct StatsDirConfig {
pub dir: PathBuf,
pub max_stats_entries: usize,
}

pub enum OutputMode {
JsonlToStdout(JsonlToStdoutConfig),
StatsDir(StatsDirConfig),
}
pub struct Config {
pub sentry_dsn: Option<String>,
pub acolyte_id: Uuid,
pub cpu_sample_interval: Duration,
pub max_stats_entries: usize,
pub stat_interval: Duration,
pub stats_dir: Option<PathBuf>,
pub cluster_name: String,
pub output_mode: OutputMode,
}

impl Config {
pub fn from_env() -> Self {
Config {
pub fn from_env() -> anyhow::Result<Self> {
Ok(Config {
sentry_dsn: get_sentry_dsn(),
acolyte_id: get_or_create_acolyte_id(),
cpu_sample_interval: get_cpu_sample_interval(),
max_stats_entries: get_max_stats_entries(),
stat_interval: get_stat_interval(),
stats_dir: Some(get_stats_dir()),
output_mode: get_output_mode()?,
cluster_name: get_cluster_name(),
})
}
}

fn get_output_mode() -> anyhow::Result<OutputMode> {
let output_mode = env::var("ACOLYTE_OUTPUT_MODE").ok();
match output_mode.as_deref() {
Some("stdout") => {
let prefix = env::var("ACOLYTE_OUTPUT_PREFIX").unwrap_or_else(|_| "".to_string());
Ok(OutputMode::JsonlToStdout(JsonlToStdoutConfig { prefix }))
}
Some("dir") | None => Ok(OutputMode::StatsDir(StatsDirConfig {
dir: get_stats_dir(),
max_stats_entries: get_max_stats_entries(),
})),
Some(other) => Err(anyhow::anyhow!("Invalid ACOLYTE_OUTPUT_MODE: {other}.")),
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use acolyte::config::Config;
use acolyte::consts::{ID_ENV_VAR, MAX_RUN_ATTEMPTS, RESTART_DELAY_SECS};
use anyhow::Context;
use libc::{SIG_IGN, SIGHUP};
use std::time::Duration;
use std::{env, os::unix::process::CommandExt, panic, process, thread};
Expand Down Expand Up @@ -27,7 +28,7 @@ fn main() {

init_logging();

let config = Config::from_env();
let config = Config::from_env().context("Failed to load config").unwrap();
let sentry_guard = init_sentry(&config);
if sentry_guard.is_some() {
info!("Sentry initialized");
Expand Down
24 changes: 18 additions & 6 deletions src/store.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::config::Config;
use crate::config::{Config, StatsDirConfig};
use serde::Serialize;
use std::fs::{self, File};
use std::io::{self, Write};
Expand Down Expand Up @@ -55,7 +55,21 @@ impl StatsEntry {
}

pub fn write_stats_entry(entry: StatsEntry, config: &Config) -> io::Result<()> {
let dir_path = config.stats_dir.as_ref().unwrap(); // TODO(akx): handle None case
match &config.output_mode {
crate::config::OutputMode::JsonlToStdout(jsonl_config) => {
let prefix = &jsonl_config.prefix;
let as_json = serde_json::to_string(&entry)?;
println!("{prefix}{as_json}");
Ok(())
}
crate::config::OutputMode::StatsDir(stats_dir_config) => {
write_stats_dir_entry(entry, stats_dir_config)
}
}
}

fn write_stats_dir_entry(entry: StatsEntry, stats_dir_config: &StatsDirConfig) -> io::Result<()> {
let dir_path = &stats_dir_config.dir;
ensure_dir_exists(dir_path)?;

let timestamp_ms = (entry.time * 1000.0) as u64;
Expand All @@ -66,7 +80,7 @@ pub fn write_stats_entry(entry: StatsEntry, config: &Config) -> io::Result<()> {
let mut json_file = File::create(file_path)?;
json_file.write_all(as_json.as_bytes())?;

clean_up_old_stats_entries(dir_path, config)?;
clean_up_old_stats_entries(dir_path, stats_dir_config.max_stats_entries)?;
Ok(())
}

Expand All @@ -78,9 +92,7 @@ fn ensure_dir_exists(dir_path: &Path) -> io::Result<()> {
Ok(())
}

fn clean_up_old_stats_entries(dir_path: &Path, config: &Config) -> io::Result<()> {
let max_entries = config.max_stats_entries;

fn clean_up_old_stats_entries(dir_path: &Path, max_entries: usize) -> io::Result<()> {
let mut entries: Vec<PathBuf> = fs::read_dir(dir_path)?
.filter_map(|entry| entry.ok())
.map(|entry| entry.path())
Expand Down
Loading