Skip to content

Commit bbcf30a

Browse files
authored
fix-aws-s3-lambda-code (#278)
1 parent 6054cc1 commit bbcf30a

File tree

1 file changed

+75
-9
lines changed

1 file changed

+75
-9
lines changed

send-data/aws-s3.mdx

Lines changed: 75 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ import requests
3939
import csv
4040
import io
4141
import ndjson
42+
import re
4243

4344
def lambda_handler(event, context):
4445
# Extract the bucket name and object key from the event
@@ -62,9 +63,38 @@ def lambda_handler(event, context):
6263
if file_extension == '.csv':
6364
csv_data = csv.DictReader(io.StringIO(log_data))
6465
json_logs = list(csv_data)
65-
elif file_extension == '.txt' or file_extension == '.log':
66+
elif file_extension == '.txt':
6667
log_lines = log_data.strip().split("\n")
6768
json_logs = [{'message': line} for line in log_lines]
69+
elif file_extension == '.log':
70+
# IMPORTANT: Log files can be in various formats (JSON, XML, syslog, etc.)
71+
try:
72+
# First, try to parse as JSON (either one JSON object per line or a JSON array)
73+
if log_data.strip().startswith('[') and log_data.strip().endswith(']'):
74+
# Appears to be a JSON array
75+
json_logs = json.loads(log_data)
76+
else:
77+
# Try parsing as NDJSON (one JSON object per line)
78+
try:
79+
json_logs = ndjson.loads(log_data)
80+
except:
81+
# If not valid NDJSON, check if each line might be JSON
82+
log_lines = log_data.strip().split("\n")
83+
json_logs = []
84+
for line in log_lines:
85+
try:
86+
# Try to parse each line as JSON
87+
parsed_line = json.loads(line)
88+
json_logs.append(parsed_line)
89+
except:
90+
# Create a dictionary and let json module handle the escaping
91+
message_dict = {'message': line}
92+
json_logs.append(message_dict)
93+
except:
94+
# If JSON parsing fails, default to treating as plain text
95+
log_lines = log_data.strip().split("\n")
96+
json_logs = [{'message': line} for line in log_lines]
97+
print("Warning: Log file format could not be determined. Treating as plain text.")
6898
elif file_extension == '.ndjson' or file_extension == '.jsonl':
6999
json_logs = ndjson.loads(log_data)
70100
else:
@@ -80,14 +110,14 @@ def lambda_handler(event, context):
80110
"Content-Type": "application/json"
81111
}
82112

83-
# Send logs to Axiom
84-
for log in json_logs:
85-
try:
86-
response = requests.post(axiom_api_url, headers=axiom_headers, json=log)
87-
if response.status_code != 200:
88-
print(f"Failed to send log to Axiom: {response.text}")
89-
except Exception as e:
90-
print(f"Error sending to Axiom: {str(e)}. Log: {log}")
113+
try:
114+
response = requests.post(axiom_api_url, headers=axiom_headers, json=json_logs)
115+
if response.status_code != 200:
116+
print(f"Failed to send logs to Axiom: {response.text}")
117+
else:
118+
print(f"Successfully sent logs to Axiom. Response: {response.text}")
119+
except Exception as e:
120+
print(f"Error sending to Axiom: {str(e)}")
91121

92122
print(f"Processed {len(json_logs)} log entries")
93123
```
@@ -101,6 +131,42 @@ In the environment variables section of the Lambda function configuration, add t
101131
This example uses Python for the Lambda function. To use another language, change the code above accordingly.
102132
</CallOut>
103133

134+
### Understanding log format
135+
136+
The `.log` extension doesn't guarantee any specific format. Log files might contain:
137+
138+
- JSON (single object or array)
139+
- NDJSON/JSONL (one JSON object per line)
140+
- Syslog format
141+
- XML
142+
- Application-specific formats (Apache, Nginx, ELB, etc.)
143+
- Custom formats with quoted strings and special characters
144+
145+
The example code includes format detection for common formats, but you'll need to customize this based on your specific log structure.
146+
147+
#### Example: Custom parser for structured logs
148+
For logs with a specific structure (like AWS ELB logs), you have to implement a custom parser. Here's a simplified example:
149+
150+
```py
151+
import shlex
152+
import re
153+
154+
class Parser:
155+
def parse_line(self, line):
156+
try:
157+
line = re.sub(r"[\[\]]", "", line)
158+
data = shlex.split(line)
159+
result = {
160+
"protocol": data[0],
161+
"timestamp": data[1],
162+
"client_ip_port": data[2],
163+
# ...more fields...
164+
}
165+
return result
166+
except Exception as e:
167+
raise e
168+
```
169+
104170
## Configure S3 to trigger Lambda
105171

106172
In the Amazon S3 console, select the bucket where your log files are stored. Go to the properties tab, find the event notifications section, and create an event notification. Select All object create events as the event type and choose the Lambda function you created earlier as the destination. For more information, see the [AWS documentation](https://docs.aws.amazon.com/lambda/latest/dg/with-s3-example.html).

0 commit comments

Comments
 (0)