Skip to content

Commit 6cbc64e

Browse files
committed
feat: add prometheus
1 parent 5e750ce commit 6cbc64e

File tree

10 files changed

+1117
-772
lines changed

10 files changed

+1117
-772
lines changed

.github/workflows/ci.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ name: CI
22

33
on:
44
push:
5-
branches: [ main, develop ]
6-
tags: [ 'v*.*.*' ]
5+
branches: [main, develop]
6+
tags: ["v*.*.*"]
77
pull_request:
8-
branches: [ main ]
8+
branches: [main]
99
workflow_dispatch: {}
1010

1111
permissions:
@@ -41,7 +41,7 @@ jobs:
4141
run: cargo install cargo-audit && cargo audit
4242

4343
- name: Run tests
44-
run: cargo test --all-features --workspace --locked
44+
run: cargo test --all-features --workspace
4545

4646
- name: Build release binary
4747
run: cargo build --release --locked --all-features

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
/target
1+
/target
2+
.cursor

Cargo.lock

Lines changed: 30 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,4 @@ tokio-util = { version = "0.7.15" }
2525
http = "1.3.1"
2626
tracing = "0.1"
2727
once_cell = "1.20"
28+
prometheus-client = { version = "0.23.1" }

Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ STOPSIGNAL SIGTERM
8888
# Distroless:nonroot automatically runs as UID 65532
8989
USER nonroot
9090

91+
EXPOSE 8000 9091
92+
9193
ENTRYPOINT ["/usr/local/bin/tini", "--", "/usr/local/bin/subgraph-mcp"]
9294

9395
# -----------------------------------------------------------------------------

README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,34 @@ Below is a reference for the `Subgraph Server Instructions`:
235235
* Use variables for dynamic values in queries.
236236
```
237237

238+
## Monitoring
239+
240+
The server exposes Prometheus metrics for monitoring its performance and behavior.
241+
242+
### Metrics Endpoint
243+
244+
When running in SSE mode, a metrics server is started on a separate port.
245+
246+
- **Endpoint**: `/metrics`
247+
- **Default Port**: `9091`
248+
249+
You can configure the port and host for the metrics server using the `METRICS_PORT` and `METRICS_HOST` environment variables.
250+
251+
### Exposed Metrics
252+
253+
The following application-specific metrics are exposed:
254+
255+
- `mcp_tool_calls_total{tool_name, status}`: A counter for the number of MCP tool calls.
256+
- `tool_name`: The name of the MCP tool being called (e.g., `get_schema_by_deployment_id`).
257+
- `status`: The result of the call (`success` or `error`).
258+
- `mcp_tool_call_duration_seconds{tool_name}`: A histogram of the duration of MCP tool calls.
259+
- `gateway_requests_total{endpoint_type, status}`: A counter for outgoing requests to The Graph's Gateway.
260+
- `endpoint_type`: The type of query or endpoint being hit (e.g., `get_schema_by_deployment_id`, `subgraphs/id`).
261+
- `status`: The result of the request (`success` or `error`).
262+
- `gateway_request_duration_seconds{endpoint_type}`: A histogram of the duration of Gateway requests.
263+
264+
Additionally, the `axum-prometheus` library provides standard HTTP request metrics for the metrics server itself (prefixed with `http_`).
265+
238266
## Contributing
239267

240268
Contributions are welcome! Please feel free to submit a Pull Request.

src/main.rs

Lines changed: 87 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,25 @@
22
pub mod constants;
33
pub mod error;
44
pub mod http_utils;
5+
pub mod metrics;
56
pub mod server;
67
pub mod server_helpers;
78
pub mod types;
9+
use crate::metrics::METRICS;
810
use anyhow::Result;
11+
use axum::{
12+
body::Body,
13+
extract::State,
14+
http::{header::CONTENT_TYPE, StatusCode},
15+
response::{IntoResponse, Response},
16+
};
17+
use prometheus_client::{encoding::text::encode, registry::Registry};
918
use rmcp::{
1019
transport::sse_server::{SseServer, SseServerConfig},
1120
ServiceExt,
1221
};
1322
pub use server::SubgraphServer;
14-
use std::{env, net::SocketAddr, time::Duration};
23+
use std::{env, net::SocketAddr, sync::Arc, time::Duration};
1524
use tokio::io;
1625
use tokio_util::sync::CancellationToken;
1726
use tracing::info;
@@ -24,7 +33,32 @@ async fn main() -> Result<()> {
2433
.unwrap_or_else(|e| eprintln!("env_logger init failed: {}", e));
2534

2635
if args.iter().any(|arg| arg == "--sse") {
27-
start_sse_server().await
36+
let shutdown_token = CancellationToken::new();
37+
38+
let sse_server_handle = tokio::spawn(start_sse_server(shutdown_token.clone()));
39+
let metrics_server_handle = tokio::spawn(start_metrics_server(shutdown_token.clone()));
40+
41+
let mut sigterm =
42+
tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())?;
43+
tokio::select! {
44+
_ = tokio::signal::ctrl_c() => {
45+
info!("Ctrl+C (SIGINT) received, initiating graceful shutdown...");
46+
},
47+
_ = sigterm.recv() => {
48+
info!("SIGTERM received, initiating graceful shutdown...");
49+
}
50+
};
51+
52+
info!("Signalling services to shut down...");
53+
shutdown_token.cancel();
54+
55+
let _ = sse_server_handle.await?;
56+
let _ = metrics_server_handle.await?;
57+
58+
tokio::time::sleep(Duration::from_secs(1)).await;
59+
60+
info!("All services shutdown complete.");
61+
Ok(())
2862
} else {
2963
start_stdio_server().await
3064
}
@@ -40,7 +74,7 @@ async fn start_stdio_server() -> Result<()> {
4074
Ok(())
4175
}
4276

43-
async fn start_sse_server() -> Result<()> {
77+
async fn start_sse_server(shutdown_token: CancellationToken) -> Result<()> {
4478
info!("Starting SSE Subgraph MCP Server");
4579
let host = env::var("HOST").unwrap_or_else(|_| "0.0.0.0".to_string());
4680
let port = env::var("PORT").unwrap_or_else(|_| "8000".to_string());
@@ -51,13 +85,11 @@ async fn start_sse_server() -> Result<()> {
5185
let sse_path = env::var("SSE_PATH").unwrap_or_else(|_| "/sse".to_string());
5286
let post_path = env::var("POST_PATH").unwrap_or_else(|_| "/messages".to_string());
5387

54-
let server_shutdown_token = CancellationToken::new();
55-
5688
let config = SseServerConfig {
5789
bind: bind_addr,
5890
sse_path,
5991
post_path,
60-
ct: server_shutdown_token.clone(),
92+
ct: shutdown_token.clone(),
6193
sse_keep_alive: Some(Duration::from_secs(30)),
6294
};
6395

@@ -67,23 +99,59 @@ async fn start_sse_server() -> Result<()> {
6799
let service_shutdown_token = sse_server.with_service(SubgraphServer::new);
68100
info!("Subgraph MCP Service attached to SSE server");
69101

70-
let mut sigterm = tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())?;
102+
shutdown_token.cancelled().await;
71103

72-
tokio::select! {
73-
_ = tokio::signal::ctrl_c() => {
74-
info!("Ctrl+C (SIGINT) received, initiating graceful shutdown...");
75-
},
76-
_ = sigterm.recv() => {
77-
info!("SIGTERM received, initiating graceful shutdown...");
78-
}
79-
};
80-
81-
info!("Signalling service and server to shut down...");
104+
info!("SSE Server shutdown signal received. Giving tasks a moment to finish...");
82105
service_shutdown_token.cancel();
83-
server_shutdown_token.cancel();
84-
85106
tokio::time::sleep(Duration::from_secs(1)).await;
86107

87108
info!("SSE Server shutdown complete.");
88109
Ok(())
89110
}
111+
112+
async fn metrics_handler(State(registry): State<Arc<Registry>>) -> impl IntoResponse {
113+
tokio::time::sleep(Duration::from_millis(50)).await;
114+
115+
let mut buffer = String::new();
116+
encode(&mut buffer, &registry).unwrap();
117+
Response::builder()
118+
.status(StatusCode::OK)
119+
.header(
120+
CONTENT_TYPE,
121+
"application/openmetrics-text; version=1.0.0; charset=utf-8",
122+
)
123+
.body(Body::from(buffer))
124+
.unwrap()
125+
}
126+
127+
async fn start_metrics_server(shutdown_token: CancellationToken) -> Result<()> {
128+
let mut registry = <Registry as Default>::default();
129+
METRICS.register(&mut registry);
130+
let registry = Arc::new(registry);
131+
132+
let host = env::var("METRICS_HOST").unwrap_or_else(|_| "0.0.0.0".to_string());
133+
let port = env::var("METRICS_PORT").unwrap_or_else(|_| "9091".to_string());
134+
let bind_addr: SocketAddr = format!("{}:{}", host, port).parse().map_err(|e| {
135+
anyhow::anyhow!(
136+
"Invalid METRICS BIND address format '{}:{}': {}",
137+
host,
138+
port,
139+
e
140+
)
141+
})?;
142+
143+
let app = axum::Router::new()
144+
.route("/metrics", axum::routing::get(metrics_handler))
145+
.with_state(registry);
146+
147+
info!("Metrics server listening on {}", bind_addr);
148+
let listener = tokio::net::TcpListener::bind(bind_addr).await?;
149+
axum::serve(listener, app)
150+
.with_graceful_shutdown(async move {
151+
shutdown_token.cancelled().await;
152+
info!("Metrics server shutting down.");
153+
})
154+
.await?;
155+
156+
Ok(())
157+
}

0 commit comments

Comments
 (0)