Skip to content

Commit a0ac9bf

Browse files
committed
(benchmark): Add a script to download and parse logs
1 parent 1252d9e commit a0ac9bf

File tree

7 files changed

+151
-0
lines changed

7 files changed

+151
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
benchmark/data/*

.python-version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.13.2

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Each lambda implementation has the same functionality:
1616
- NPM
1717
- ESBuild availble globally
1818
- OpenTofu (see `.opentofu-version`)
19+
- Python
1920

2021
## Deploying
2122

@@ -37,3 +38,15 @@ cd infra
3738
tofu init
3839
tofu apply
3940
```
41+
42+
## Analysing the data
43+
44+
Install the Python requirements and run the script to download the report data from Cloudwatch:
45+
46+
```bash
47+
cd benchmark/scripts
48+
pip install -r requirements.txt
49+
python download_log_data.py
50+
```
51+
52+
This will save a CSV of the results in `benchmark/data`

TODO.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
## Benchmark
1616

17+
- Filter out reports from invocations that failed
1718
- Plan how to benchmark the lambdas
1819
- Where to put data for analysis
1920
- Initial analysis - see what's there
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
from typing import List, TypedDict, Optional
2+
from datetime import datetime, timedelta, timezone
3+
from time import sleep, time
4+
from csv import DictWriter
5+
import re
6+
import boto3
7+
8+
LIVE_LAMBDAS = ["typescript"]
9+
10+
11+
class Row(TypedDict):
12+
timestamp: str
13+
message: str
14+
15+
16+
class InvocationSummary(TypedDict):
17+
name: str
18+
execution_time_ms: float
19+
cold_start: bool
20+
init_duration_ms: Optional[float]
21+
22+
23+
def submit_logs_query(client, lambda_name: str) -> str:
24+
finish = datetime.now(tz=timezone.utc)
25+
start = datetime.now(tz=timezone.utc) - timedelta(hours=1)
26+
27+
response = client.start_query(
28+
logGroupName=f"/aws/lambda/{lambda_name}",
29+
startTime=int(start.timestamp()),
30+
endTime=int(finish.timestamp()),
31+
queryString="fields @timestamp, @message",
32+
limit=10000,
33+
)
34+
35+
return response["queryId"]
36+
37+
38+
def poll_for_query_result(client, query_id: str):
39+
results = client.get_query_results(queryId=query_id)
40+
while results["status"] not in [
41+
"Complete",
42+
"Failed",
43+
"Cancelled",
44+
"Timeout",
45+
"Unknown",
46+
]:
47+
print(f"Query ID {query_id} not ready. Waiting")
48+
sleep(10)
49+
results = client.get_query_results(queryId=query_id)
50+
51+
return results.get("results", [])
52+
53+
54+
def parse_result_row(row) -> Row:
55+
result = Row(timestamp="", message="")
56+
for field in row:
57+
if field["field"] == "@timestamp":
58+
result["timestamp"] = field["value"]
59+
elif field["field"] == "@message":
60+
result["message"] = field["value"]
61+
62+
return result
63+
64+
65+
def process_report_rows(rows: List[Row], name: str) -> List[InvocationSummary]:
66+
result = []
67+
for row in rows:
68+
if row["message"].startswith("REPORT"):
69+
result.append(calc_report_summary(row["message"], name))
70+
71+
return result
72+
73+
74+
def calc_report_summary(message: str, name: str) -> InvocationSummary:
75+
cold_start = False
76+
init_duration_ms = None
77+
if init_match := re.search(
78+
r"Init Duration: (\d*\.?\d+) ms", message, re.IGNORECASE
79+
):
80+
init_duration_ms = float(init_match.group(1))
81+
cold_start = True
82+
83+
if exec_match := re.search(
84+
r"Billed Duration: (\d*\.?\d+) ms", message, re.IGNORECASE
85+
):
86+
return InvocationSummary(
87+
name=name,
88+
execution_time_ms=float(exec_match.group(1)),
89+
cold_start=cold_start,
90+
init_duration_ms=init_duration_ms,
91+
)
92+
93+
print(f"Malformed message: {message}")
94+
raise ValueError("Malformed message")
95+
96+
97+
def write_to_csv(data):
98+
with open("../data/parsed_cloudwatch_logs.csv", "w") as f:
99+
writer = DictWriter(
100+
f,
101+
fieldnames=["name", "execution_time_ms", "cold_start", "init_duration_ms"],
102+
)
103+
writer.writeheader()
104+
writer.writerows(data)
105+
106+
107+
if __name__ == "__main__":
108+
logs_client = boto3.client("logs")
109+
print("Submitting queries")
110+
queries = [
111+
(submit_logs_query(logs_client, f"lambda_benchmark_{name}_lambda"), name)
112+
for name in LIVE_LAMBDAS
113+
]
114+
sleep(1)
115+
116+
raw_results = {}
117+
for query_id, name in queries:
118+
print(f"Querying results for {name}")
119+
raw_results[name] = poll_for_query_result(logs_client, query_id)
120+
121+
parsed: List[InvocationSummary] = []
122+
for name, results in raw_results.items():
123+
rows = [parse_result_row(row) for row in results]
124+
for report in process_report_rows(rows, name):
125+
parsed.append(report)
126+
127+
write_to_csv(parsed)

benchmark/scripts/requirements.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
boto3==1.38.6
2+
botocore==1.38.6
3+
jmespath==1.0.1
4+
python-dateutil==2.9.0.post0
5+
s3transfer==0.12.0
6+
six==1.17.0
7+
urllib3==2.4.0

infra/dynamodb.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ resource "aws_dynamodb_table" "table" {
22
name = var.dynamodb_table_name
33
billing_mode = "PAY_PER_REQUEST"
44
hash_key = "eventId"
5+
tags = var.default_tags
56

67
attribute {
78
name = "eventId"

0 commit comments

Comments
 (0)