Skip to content

Commit f998184

Browse files
authored
Merge pull request #105 from LogicNet-Subnet/upload-log-minio
Upload validator pm2 log to Minio Storage
2 parents 13cc1dc + 84972fb commit f998184

File tree

8 files changed

+149
-26
lines changed

8 files changed

+149
-26
lines changed

.DS_Store

6 KB
Binary file not shown.

app_validator.config.sample.js

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,20 @@
11
module.exports = {
22
apps: [{
3-
name: "your_validator_name", // update this name
3+
name: "sn35-validator", // update this name
44
script: "neurons/validator/validator.py",
55
interpreter: "/root/miniconda3/envs/sn35-env/bin/python", // update this path
66
env: {
7+
APP_NAME: "sn35-validator",
78
PYTHONPATH: './:${PYTHONPATH}',
89
OPENAI_API_KEY: "your_openai_key",
910
USE_TORCH: 1,
1011
VALIDATOR_USERNAME: "datapool_username",
1112
VALIDATOR_PASSWORD: "datapool_password",
12-
TASK_POOL_URL: "server_datapool_endpoint"
13+
TASK_POOL_URL: "server_datapool_endpoint",
14+
MINIO_ENDPOINT: "minio_endpoint",
15+
MINIO_ACCESS_KEY: "minio_access_key",
16+
MINIO_SECRET_KEY: "minio_secret_key",
17+
PM2_LOG_DIR: "/root/.pm2/logs/"
1318
},
1419
args: [
1520
"--netuid", "35",

docs/VALIDATOR.md

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ This setup allows you to run the Validator locally by hosting a vLLM server. Whi
8585
#### Prerequisites
8686

8787
- **OpenAI API Key**: Obtain from the OpenAI platform dashboard.
88-
- **Wandb API Key**: Obtain from the Wandb platform dashboard.
8988
- **Python 3.10**
9089
- **PM2 Process Manager**: For running and managing the Validator process. *OPTIONAL*
9190

@@ -117,6 +116,10 @@ This setup allows you to run the Validator locally by hosting a vLLM server. Whi
117116
echo "TASK_POOL_URL=server_datapool_endpoint"
118117
echo "VALIDATOR_USERNAME=datapool_username" >> .env
119118
echo "VALIDATOR_PASSWORD=datapool_account" >> .env
119+
echo MINIO_ENDPOINT="server_minio_endpoint" >> .env
120+
echo MINIO_ACCESS_KEY="minio_username" >> .env
121+
echo MINIO_SECRET_KEY="minio_password" >> .env
122+
echo APP_NAME="sn35-validator" >> .env
120123
echo "USE_TORCH=1" >> .env
121124
```
122125

@@ -157,26 +160,10 @@ This setup allows you to run the Validator locally by hosting a vLLM server. Whi
157160
```bash
158161
--axon.port "your-public-open-port"
159162
```
160-
161163
---
162164

163165
### Additional Features
164166

165-
#### Wandb Integration
166-
167-
Configure Wandb to track and analyze Validator performance.
168-
169-
1. Add Wandb API key to `.env`:
170-
```bash
171-
echo "WANDB_API_KEY=your_wandb_api_key" >> .env
172-
```
173-
2. It's already configured for mainnet as default.
174-
3. Run Validator with Wandb on Testnet:
175-
```bash
176-
--wandb.project_name logicnet-testnet \
177-
--wandb.entity ait-ai
178-
```
179-
180167
---
181168

182169
### Troubleshooting & Support

install.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ pip install -e .
66
# Uninstall uvloop if it's installed
77
echo "Uninstalling uvloop..."
88
pip uninstall uvloop -y
9+
bash pm2_setup.sh
910

1011
# Check if USE_TORCH=1 is already set in .env
1112
if grep -q '^USE_TORCH=1$' .env; then

logicnet/utils/minio_manager.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
from minio import Minio
2+
from minio.error import S3Error
3+
import os
4+
import time
5+
import glob
6+
7+
class MinioManager:
8+
def __init__(self, minio_endpoint, access_key, secret_key):
9+
self.minio_endpoint = minio_endpoint
10+
self.access_key = access_key
11+
self.secret_key = secret_key
12+
13+
self.minio_client = self.initialize_minio_client()
14+
15+
def initialize_minio_client(self):
16+
"""Initialize and return MinIO client."""
17+
return Minio(
18+
self.minio_endpoint,
19+
access_key=self.access_key,
20+
secret_key=self.secret_key,
21+
secure=False # Set to True if using HTTPS
22+
)
23+
24+
def ensure_bucket_exists(self, bucket_name):
25+
"""Check if bucket exists, create if it doesn't."""
26+
try:
27+
if not self.minio_client.bucket_exists(bucket_name):
28+
self.minio_client.make_bucket(bucket_name)
29+
print(f"Bucket '{bucket_name}' created")
30+
else:
31+
print(f"Bucket '{bucket_name}' already exists")
32+
except S3Error as e:
33+
print(f"Error checking/creating bucket: {e}")
34+
raise
35+
36+
def upload_file(self, file_path, bucket_name, minio_folder_path):
37+
"""Upload a single file to MinIO."""
38+
self.ensure_bucket_exists(bucket_name)
39+
try:
40+
object_name = os.path.basename(file_path)
41+
if not os.path.exists(file_path):
42+
print(f"File '{file_path}' not found, skipping")
43+
return False
44+
self.minio_client.fput_object(bucket_name, f"{minio_folder_path}/{object_name}", file_path)
45+
print(f"Uploaded '{file_path}' to bucket '{bucket_name}' as '{object_name}'")
46+
return True
47+
except S3Error as e:
48+
print(f"Error uploading '{file_path}': {e}")
49+
return False
50+
except Exception as e:
51+
print(f"Unexpected error uploading '{file_path}': {e}")
52+
return False
53+
54+
def get_uploaded_files(self, bucket_name):
55+
"""Get list of files already uploaded to MinIO."""
56+
try:
57+
objects = self.minio_client.list_objects(bucket_name, recursive=True)
58+
return {obj.object_name for obj in objects}
59+
except S3Error as e:
60+
print(f"Error listing objects in bucket: {e}")
61+
return set()

neurons/validator/validator.py

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,25 @@
1515
from copy import deepcopy
1616
import bittensor as bt
1717
import logicnet as ln
18-
from concurrent.futures import ThreadPoolExecutor, as_completed
1918
from neurons.validator.validator_proxy import ValidatorProxy
2019
from logicnet.base.validator import BaseValidatorNeuron
21-
from logicnet.validator import MinerManager, LogicChallenger, LogicRewarder, MinerInfo
20+
from logicnet.validator import MinerManager, LogicChallenger, LogicRewarder
2221
from logicnet.utils.wandb_manager import WandbManager
2322
from logicnet.utils.text_uts import modify_question
2423
from logicnet.protocol import LogicSynapse
2524
from neurons.validator.core.serving_queue import QueryQueue
26-
from collections import defaultdict
27-
import wandb
2825
from threading import Lock
2926
import queue
27+
from logicnet.utils.minio_manager import MinioManager
28+
import glob
3029

30+
log_bucket_name = "logs"
31+
app_name = os.getenv("APP_NAME", "validator")
32+
validator_username = os.getenv("VALIDATOR_USERNAME")
33+
minio_endpoint = os.getenv("MINIO_ENDPOINT")
34+
access_key = os.getenv("MINIO_ACCESS_KEY")
35+
secret_key = os.getenv("MINIO_SECRET_KEY")
36+
pm2_log_dir = os.getenv("PM2_LOG_DIR", "/root/.pm2/logs")
3137

3238
def init_category(config=None, model_pool=None):
3339
category = {
@@ -50,6 +56,14 @@ def init_category(config=None, model_pool=None):
5056
"mistralai/Mistral-7B-Instruct"
5157
]
5258

59+
def get_latest_previous_log_file(log_files):
60+
"""Return the second-most-recent log file based on modification time."""
61+
if len(log_files) < 2:
62+
return None # Not enough files to have a "previous" file
63+
# Sort files by modification time (most recent first)
64+
sorted_files = sorted(log_files, key=lambda x: os.path.getmtime(x), reverse=True)
65+
return sorted_files[1] # Second file is the latest previous
66+
5367
class Validator(BaseValidatorNeuron):
5468
def __init__(self, config=None):
5569
"""
@@ -568,10 +582,57 @@ def _log_wandb(self, log):
568582
except Exception as e:
569583
bt.logging.error(f"Error logging to wandb: {e}")
570584

571-
572585
# The main function parses the configuration and runs the validator.
573586
if __name__ == "__main__":
587+
last_err_file_name = ""
588+
last_out_file_name = ""
589+
590+
try:
591+
minio_manager = MinioManager(minio_endpoint, access_key, secret_key)
592+
except Exception as e:
593+
bt.logging.error(f"Error initializing MinioManager: {e}")
594+
574595
with Validator() as validator:
575596
while True:
576597
bt.logging.info("\033[1;32m🟢 Validator running...\033[0m", time.time())
577-
time.sleep(360)
598+
599+
#########################################################
600+
# UPLOAD OUT LOG FILES
601+
out_log_files = glob.glob(os.path.join(pm2_log_dir, f"*{app_name}-out*.log"))
602+
# bt.logging.info(out_log_files)
603+
604+
current_file_count = len(out_log_files)
605+
# Detect rotation (new file added)
606+
if current_file_count >= 2:
607+
# A new file was created, so upload the latest previous file
608+
previous_file = get_latest_previous_log_file(out_log_files)
609+
if previous_file != last_out_file_name and previous_file:
610+
last_out_file_name = previous_file
611+
file_name = os.path.basename(previous_file)
612+
if file_name not in minio_manager.get_uploaded_files(log_bucket_name):
613+
bt.logging.info(f"Uploading {previous_file} to MinIO")
614+
if minio_manager.upload_file(previous_file, log_bucket_name, validator_username):
615+
bt.logging.info(f"\033[1;32m✅ Uploaded {file_name} to MinIO\033[0m")
616+
#########################################################
617+
618+
619+
#########################################################
620+
# UPLOAD ERR LOG FILES
621+
err_log_files = glob.glob(os.path.join(pm2_log_dir, f"*{app_name}-error*.log"))
622+
# bt.logging.info(err_log_files)
623+
current_file_count = len(err_log_files)
624+
625+
# Detect rotation (new file added)
626+
if current_file_count >= 2:
627+
# A new file was created, so upload the latest previous file
628+
previous_file = get_latest_previous_log_file(err_log_files)
629+
if previous_file != last_err_file_name and previous_file:
630+
last_err_file_name = previous_file
631+
file_name = os.path.basename(previous_file)
632+
if file_name not in minio_manager.get_uploaded_files(log_bucket_name):
633+
bt.logging.info(f"Uploading {previous_file} to MinIO")
634+
if minio_manager.upload_file(previous_file, log_bucket_name, validator_username):
635+
bt.logging.info(f"\033[1;32m✅ Uploaded {file_name} to MinIO\033[0m")
636+
#########################################################
637+
638+
time.sleep(60)

pm2_setup.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
pm2 install pm2-logrotate
2+
3+
pm2 set pm2-logrotate:max_size 10M
4+
pm2 set pm2-logrotate:compress false
5+
# Rotate every 10 minutes
6+
pm2 set pm2-logrotate:rotateInterval '*/10 * * * *'
7+
pm2 set pm2-logrotate:retain 100

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,5 @@ python-dotenv==1.0.1
1313
loguru
1414
sympy
1515
wandb
16-
datasets
16+
datasets
17+
minio

0 commit comments

Comments
 (0)