Skip to content

Introduce functions for interacting with Prometheus API #257

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
3 changes: 2 additions & 1 deletion .github/workflows/ci-pull-request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@ jobs:
matrix:
python_version:
# https://python-release-cycle.glitch.me/
- "3.7"
- "3.8"
- "3.9"
- "3.10"
- "3.11"
- "3.12"
- "3.13"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
Expand Down
165 changes: 165 additions & 0 deletions examples/get_data_promql_advanced.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
#!/usr/bin/env python
#
# This script shows the basics of getting data out of Sysdig Monitor by executing a PromQL query
# that returns the top 5 Kubernetes workloads consuming the highest percentage of their allocated CPU
# by comparing actual usage to defined CPU limits. The query is executed over a 5-minute time window.
#

import sys
import time
from datetime import datetime

from sdcclient import SdcClient


def print_prometheus_results_as_table(results):
if not results:
print("No data found for the query.")
return

# Store time series data
all_timestamps = set()
label_keys = []
time_series_by_label = {}

for series in results:
metric = series.get("metric", {})
label = ','.join(f'{k}={v}' for k, v in sorted(metric.items()))
label_keys.append(label)
time_series_by_label[label] = {}

for timestamp, value in series.get("values", []):
ts = int(float(timestamp))
all_timestamps.add(ts)
time_series_by_label[label][ts] = value

# Prepare header
label_keys = sorted(set(label_keys))
all_timestamps = sorted(all_timestamps)

print(f"{'Timestamp':<25} | " + " | ".join(f"{label}" for label in label_keys))
print("-" * (26 + len(label_keys) * 25))

# Print each row, filling in missing values with "N/A"
for ts in all_timestamps:
dt = datetime.fromtimestamp(ts).isoformat()
row_values = []
for label in label_keys:
value = time_series_by_label.get(label, {}).get(ts, "N/A")
row_values.append(value)
print(f"{dt:<25} | " + " | ".join(f"{val:>20}" for val in row_values))


#
# Parse arguments
#
if len(sys.argv) != 3:
print(('usage: %s <sysdig-token> <hostname>' % sys.argv[0]))
print('You can find your token at https://app.sysdigcloud.com/#/settings/user')
sys.exit(1)

sdc_token = sys.argv[1]
hostname = sys.argv[2]

sdclient = SdcClient(sdc_token, hostname)

#
# A PromQL query to execute. The query retrieves the top 5 workloads in a specific Kubernetes
# cluster that are using the highest percentage of their allocated CPU resources. It calculates
# this by comparing the actual CPU usage of each workload to the CPU limits set for them and
# then ranks the results to show the top 5.
#
query = '''
topk (5,
sum by (kube_cluster_name, kube_namespace_name, kube_workload_name) (
rate(
sysdig_container_cpu_cores_used{
kube_cluster_name="dev-cluster"
}[10m]
)
)
/
sum by (kube_cluster_name, kube_namespace_name, kube_workload_name) (
kube_pod_container_resource_limits{
kube_cluster_name="dev-cluster",
resource="cpu"
}
)
)
'''

#
# Time window:
# - end is the current time
# - start is the current time minus 5 minutes
#
end = int(time.time())
start = end - 5 * 60 # 5 minutes ago

#
# Step:
# - resolution step, how far should timestamp of each resulting sample be apart
#
step = 60

#
# Load data
#
ok, response_json = sdclient.get_data_promql(query, start, end, step)

#
# Show the result
#
if ok:
#
# Read the response. The JSON looks like this:
#
# {
# "data": {
# "result": [
# {
# "metric": {},
# "values": [
# [
# 1744210080,
# "0.58"
# ],
# [
# 1744210140,
# "0.58"
# ],
# [
# 1744210200,
# "0.58"
# ],
# [
# 1744210260,
# "0.5799999999999998"
# ],
# [
# 1744210320,
# "0.5799999999999998"
# ],
# [
# 1744210380,
# "0.5799999999999998"
# ]
# ]
# }
# ],
# "resultType": "matrix"
# },
# "status": "success"
# }
#


#
# Print summary (what, when)
#
results = response_json.get("data", {}).get("result", [])
print_prometheus_results_as_table(results)

else:
print(response_json)
sys.exit(1)
121 changes: 121 additions & 0 deletions examples/get_data_promql_instant_advanced.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#!/usr/bin/env python
#
# This script shows the basics of getting data out of Sysdig Monitor by executing a PromQL query
# that returns the top 5 Kubernetes workloads consuming the highest percentage of their allocated CPU
# by comparing actual usage to defined CPU limits. The query is executed at a timestamp 5 minutes ago.
#

import sys
import time
from datetime import datetime

from sdcclient import SdcClient


def print_prometheus_instant_result(result):
if not result:
print("No data found for the instant query.")
return

# Determine if any result has labels
has_labels = any(entry.get("metric") for entry in result)

if has_labels:
print(f"{'Timestamp':<25} | {'Metric':<40} | {'Value':>10}")
print("-" * 80)
else:
print(f"{'Timestamp':<25} | {'Value':>10}")
print("-" * 40)

for entry in result:
timestamp, value = entry.get("value", [None, None])
dt = datetime.fromtimestamp(float(timestamp)).isoformat() if timestamp else "N/A"
metric = entry.get("metric", {})

if has_labels:
label_str = ', '.join(f'{k}="{v}"' for k, v in sorted(metric.items()))
print(f"{dt:<25} | {label_str:<40} | {value:>10}")
else:
print(f"{dt:<25} | {value:>10}")


#
# Parse arguments
#
if len(sys.argv) != 3:
print(('usage: %s <sysdig-token> <hostname>' % sys.argv[0]))
print('You can find your token at https://app.sysdigcloud.com/#/settings/user')
sys.exit(1)

sdc_token = sys.argv[1]
hostname = sys.argv[2]

sdclient = SdcClient(sdc_token, hostname)

#
# A PromQL query to execute. The query retrieves the top 5 workloads in a specific Kubernetes
# cluster that are using the highest percentage of their allocated CPU resources. It calculates
# this by comparing the actual CPU usage of each workload to the CPU limits set for them and
# then ranks the results to show the top 5.
#
query = '''
topk(5,
sum by (kube_cluster_name, kube_namespace_name, kube_workload_name) (
rate(
sysdig_container_cpu_cores_used{
kube_cluster_name="dev-cluster"
}[10m]
)
)
/
sum by (kube_cluster_name, kube_namespace_name, kube_workload_name) (
kube_pod_container_resource_limits{
kube_cluster_name="dev-cluster",
resource="cpu"
}
)
)
'''

#
# Time:
# - the parameter is optional; if not set, the current time is used
#
time = int(time.time()) - 5 * 60 # 5 minutes ago

#
# Load data
#
ok, response_json = sdclient.get_data_promql_instant(query, 1744273000)

#
# Show the result
#
if ok:
#
# Read the response. The JSON looks like this:
#
# {
# "result": [
# {
# "metric": {},
# "value": [
# 1744272414,
# "0.58"
# ]
# }
# ],
# "resultType": "vector"
# }
#


#
# Print summary (what, when)
#
results = response_json.get("data", {}).get("result", [])
print_prometheus_instant_result(results)

else:
print(response_json)
sys.exit(1)
Loading