diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f7275bb --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +venv/ diff --git a/config/endpoints.yaml b/config/endpoints.yaml new file mode 100644 index 0000000..ea98201 --- /dev/null +++ b/config/endpoints.yaml @@ -0,0 +1,9 @@ +- name: Example Google + url: https://www.google.com + +- name: Example Post Request + url: https://httpbin.org/post + method: POST + headers: + Content-Type: application/json + body: '{"test": "hello"}' diff --git a/main.py b/main.py index e3f2bef..0c4952c 100644 --- a/main.py +++ b/main.py @@ -2,28 +2,41 @@ import requests import time from collections import defaultdict +from urllib.parse import urlparse +import time as t # Function to load configuration from the YAML file def load_config(file_path): with open(file_path, 'r') as file: return yaml.safe_load(file) -# Function to perform health checks def check_health(endpoint): url = endpoint['url'] - method = endpoint.get('method') + method = endpoint.get('method', 'GET') headers = endpoint.get('headers') body = endpoint.get('body') try: - response = requests.request(method, url, headers=headers, json=body) - if 200 <= response.status_code < 300: + start = t.time() + response = requests.request(method, url, headers=headers, json=body, timeout=5) + elapsed_ms = (t.time() - start) * 1000 + + print(f"{url} responded in {int(elapsed_ms)}ms with status {response.status_code}") + + if 200 <= response.status_code < 300 and elapsed_ms <= 500: return "UP" else: return "DOWN" - except requests.RequestException: + except requests.RequestException as e: + print(f"Request to {url} failed: {e}") return "DOWN" +# Function to extract domain name from URL (ignoring ports) +def get_domain(url): + parsed_url = urlparse(url) + domain = parsed_url.hostname + return domain + # Main function to monitor endpoints def monitor_endpoints(file_path): config = load_config(file_path) @@ -31,7 +44,7 @@ def monitor_endpoints(file_path): while True: for endpoint in config: - domain = endpoint["url"].split("//")[-1].split("/")[0] + domain = get_domain(endpoint["url"]) result = check_health(endpoint) domain_stats[domain]["total"] += 1 @@ -58,4 +71,4 @@ def monitor_endpoints(file_path): try: monitor_endpoints(config_file) except KeyboardInterrupt: - print("\nMonitoring stopped by user.") \ No newline at end of file + print("\nMonitoring stopped by user.") diff --git a/output/Output.png b/output/Output.png new file mode 100644 index 0000000..d73cd26 Binary files /dev/null and b/output/Output.png differ diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..52c8558 --- /dev/null +++ b/readme.md @@ -0,0 +1,91 @@ +# Site Reliability Engineering - Endpoint Availability Monitor + +This is a command-line tool written in Python to monitor the availability of HTTP endpoints, as part of the Fetch Rewards Site Reliability Engineering take-home exercise. + +## 📋 Overview + +As a Site Reliability Engineer, it's important to monitor service uptime and build processes that help others identify and respond to incidents. This tool checks HTTP endpoints periodically and reports cumulative availability by domain, helping identify reliability trends over time. + +--- + +## ✅ Features + +- Accepts configuration via a YAML file. +- Periodic health checks every 15 seconds. +- Availability calculated **cumulatively** per domain. +- Endpoints are considered **available** only if: + - HTTP status code is between `200` and `299`. + - Response time is `≤ 500ms`. +- Port numbers in URLs are ignored when grouping by domain. + +--- + +## 🚀 Getting Started + +### Prerequisites + +- Python 3.7+ +- `pip` for managing dependencies +- (Optional but recommended) a virtual environment + +### Install Dependencies + +```bash +pip install -r requirements.txt +``` + +or manually + +```bash +pip install requests pyyaml +``` + +### ✅ Check for endpoints.yaml + +If config/endpoints.yaml file doesn't exist then create a YAML file like config/endpoints.yaml: + +```bash +- name: Google + url: https://www.google.com +- name: HTTPBin + url: https://httpbin.org/status/200 + method: GET +``` + +### Run the Montior + +```bash +python main.py config/endpoints.yaml +``` + +### Your output should look like + +![Monitor Output](output/Output.png) + +## 🛠️ Code Changes and Improvements + +### 1. Availability Calculation + +- **Issue:** The initial code did not calculate the availability cumulatively over time. +- **Solution:** Implemented logic to track the number of "UP" and "DOWN" responses for each domain across multiple check cycles, and calculated the availability as a percentage. + +### 2. Response Time Validation + +- **Issue:** There was no check for response time, leading to endpoints potentially being marked as "UP" even if they took longer than 500ms to respond. +- **Solution:** Added a validation step that ensures an endpoint is only considered "UP" if its response time is ≤ 500ms. + +### 3. Domain Parsing + +- **Issue:** Domain names were not parsed correctly, especially when port numbers were included in the URL. +- **Solution:** Implemented a function that extracts the domain name from the URL while ignoring port numbers. + +### 4. Error Handling for Failed Requests + +- **Issue:** The initial code did not properly handle failed HTTP requests. +- **Solution:** Introduced exception handling to catch request failures and classify those endpoints as "DOWN". + +--- + +### 📋 Conclusion + +These changes ensure the tool meets the provided requirements, including cumulative availability reporting, response time validation, and ignoring port numbers when determining domain availability. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..15fb30c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +certifi==2025.1.31 +charset-normalizer==3.4.1 +idna==3.10 +PyYAML==6.0.2 +requests==2.32.3 +urllib3==2.4.0