generated from fastai/nbdev_template
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Open
Labels
🐛 bugSomething isn't workingSomething isn't working
Description
ERROR 06-23 06:03:48 [core.py:515] EngineCore failed to start.
ERROR 06-23 06:03:48 [core.py:515] Traceback (most recent call last):
ERROR 06-23 06:03:48 [core.py:515] File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/v1/engine/core.py", line 506, in run_engine_core
ERROR 06-23 06:03:48 [core.py:515] engine_core = EngineCoreProc(*args, **kwargs)
ERROR 06-23 06:03:48 [core.py:515] File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/v1/engine/core.py", line 390, in __init__
ERROR 06-23 06:03:48 [core.py:515] super().__init__(vllm_config, executor_class, log_stats,
ERROR 06-23 06:03:48 [core.py:515] File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/v1/engine/core.py", line 76, in __init__
ERROR 06-23 06:03:48 [core.py:515] self.model_executor = executor_class(vllm_config)
ERROR 06-23 06:03:48 [core.py:515] File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/executor/executor_base.py", line 53, in __init__
ERROR 06-23 06:03:48 [core.py:515] self._init_executor()
ERROR 06-23 06:03:48 [core.py:515] File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/executor/uniproc_executor.py", line 47, in _init_executor
ERROR 06-23 06:03:48 [core.py:515] self.collective_rpc("init_device")
ERROR 06-23 06:03:48 [core.py:515] File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/executor/uniproc_executor.py", line 57, in collective_rpc
ERROR 06-23 06:03:48 [core.py:515] answer = run_method(self.driver_worker, method, args, kwargs)
ERROR 06-23 06:03:48 [core.py:515] File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/utils.py", line 2671, in run_method
ERROR 06-23 06:03:48 [core.py:515] return func(*args, **kwargs)
ERROR 06-23 06:03:48 [core.py:515] File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/worker/worker_base.py", line 606, in init_device
ERROR 06-23 06:03:48 [core.py:515] self.worker.init_device() # type: ignore
ERROR 06-23 06:03:48 [core.py:515] File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/v1/worker/gpu_worker.py", line 153, in init_device
ERROR 06-23 06:03:48 [core.py:515] init_worker_distributed_environment(self.vllm_config, self.rank,
ERROR 06-23 06:03:48 [core.py:515] File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/v1/worker/gpu_worker.py", line 367, in init_worker_distributed_environment
ERROR 06-23 06:03:48 [core.py:515] init_distributed_environment(parallel_config.world_size, rank,
ERROR 06-23 06:03:48 [core.py:515] File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/distributed/parallel_state.py", line 942, in init_distributed_environment
ERROR 06-23 06:03:48 [core.py:515] torch.distributed.init_process_group(
ERROR 06-23 06:03:48 [core.py:515] File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/torch/distributed/c10d_logger.py", line 81, in wrapper
ERROR 06-23 06:03:48 [core.py:515] return func(*args, **kwargs)
ERROR 06-23 06:03:48 [core.py:515] File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/torch/distributed/c10d_logger.py", line 95, in wrapper
ERROR 06-23 06:03:48 [core.py:515] func_return = func(*args, **kwargs)
ERROR 06-23 06:03:48 [core.py:515] File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1710, in init_process_group
ERROR 06-23 06:03:48 [core.py:515] store, rank, world_size = next(rendezvous_iterator)
ERROR 06-23 06:03:48 [core.py:515] File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/torch/distributed/rendezvous.py", line 230, in _tcp_rendezvous_handler
ERROR 06-23 06:03:48 [core.py:515] store = _create_c10d_store(
ERROR 06-23 06:03:48 [core.py:515] File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/torch/distributed/rendezvous.py", line 189, in _create_c10d_store
ERROR 06-23 06:03:48 [core.py:515] return TCPStore(
ERROR 06-23 06:03:48 [core.py:515] torch.distributed.DistNetworkError: The client socket has timed out after 600000ms while trying to connect to (33.117.211.18, 38711).
Process EngineCore_0:
Traceback (most recent call last):
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/v1/engine/core.py", line 519, in run_engine_core
raise e
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/v1/engine/core.py", line 506, in run_engine_core
engine_core = EngineCoreProc(*args, **kwargs)
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/v1/engine/core.py", line 390, in __init__
super().__init__(vllm_config, executor_class, log_stats,
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/v1/engine/core.py", line 76, in __init__
self.model_executor = executor_class(vllm_config)
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/executor/executor_base.py", line 53, in __init__
self._init_executor()
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/executor/uniproc_executor.py", line 47, in _init_executor
self.collective_rpc("init_device")
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/executor/uniproc_executor.py", line 57, in collective_rpc
answer = run_method(self.driver_worker, method, args, kwargs)
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/utils.py", line 2671, in run_method
return func(*args, **kwargs)
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/worker/worker_base.py", line 606, in init_device
self.worker.init_device() # type: ignore
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/v1/worker/gpu_worker.py", line 153, in init_device
init_worker_distributed_environment(self.vllm_config, self.rank,
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/v1/worker/gpu_worker.py", line 367, in init_worker_distributed_environment
init_distributed_environment(parallel_config.world_size, rank,
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/distributed/parallel_state.py", line 942, in init_distributed_environment
torch.distributed.init_process_group(
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/torch/distributed/c10d_logger.py", line 81, in wrapper
return func(*args, **kwargs)
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/torch/distributed/c10d_logger.py", line 95, in wrapper
func_return = func(*args, **kwargs)
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1710, in init_process_group
store, rank, world_size = next(rendezvous_iterator)
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/torch/distributed/rendezvous.py", line 230, in _tcp_rendezvous_handler
store = _create_c10d_store(
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/torch/distributed/rendezvous.py", line 189, in _create_c10d_store
return TCPStore(
torch.distributed.DistNetworkError: The client socket has timed out after 600000ms while trying to connect to (33.117.211.18, 38711).
[rank0]: Traceback (most recent call last):
[rank0]: File "/root/code/aha_test/run_grpo.py", line 291, in <module>
[rank0]: main()
[rank0]: File "/root/code/aha_test/run_grpo.py", line 287, in main
[rank0]: grpo_function(model_args, script_args, training_args)
[rank0]: File "/root/code/aha_test/run_grpo.py", line 226, in grpo_function
[rank0]: trainer = GRPOTrainer(
[rank0]: File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/trl/trainer/grpo_trainer.py", line 313, in __init__
[rank0]: self.llm = LLM(
[rank0]: File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/entrypoints/llm.py", line 243, in __init__
[rank0]: self.llm_engine = LLMEngine.from_engine_args(
[rank0]: File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/engine/llm_engine.py", line 501, in from_engine_args
[rank0]: return engine_cls.from_vllm_config(
[rank0]: File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/v1/engine/llm_engine.py", line 124, in from_vllm_config
[rank0]: return cls(vllm_config=vllm_config,
[rank0]: File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/v1/engine/llm_engine.py", line 101, in __init__
[rank0]: self.engine_core = EngineCoreClient.make_client(
[rank0]: File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/v1/engine/core_client.py", line 75, in make_client
[rank0]: return SyncMPClient(vllm_config, executor_class, log_stats)
[rank0]: File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/v1/engine/core_client.py", line 558, in __init__
[rank0]: super().__init__(
[rank0]: File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/v1/engine/core_client.py", line 422, in __init__
[rank0]: self._init_engines_direct(vllm_config, local_only,
[rank0]: File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/v1/engine/core_client.py", line 491, in _init_engines_direct
[rank0]: self._wait_for_engine_startup(handshake_socket, input_address,
[rank0]: File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/v1/engine/core_client.py", line 511, in _wait_for_engine_startup
[rank0]: wait_for_engine_startup(
[rank0]: File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/vllm/v1/utils.py", line 494, in wait_for_engine_startup
[rank0]: raise RuntimeError("Engine core initialization failed. "
[rank0]: RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {}
[rank0]:[W623 06:03:51.898663852 ProcessGroupNCCL.cpp:1476] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator())
primusb37434c298ccb21d03021ea4ez-master-0:67682:71918 [0] NCCL INFO misc/socket.cc:64 -> 3
primusb37434c298ccb21d03021ea4ez-master-0:67682:68241 [0] NCCL INFO misc/socket.cc:881 -> 3
primusb37434c298ccb21d03021ea4ez-master-0:67682:71918 [0] NCCL INFO misc/socket.cc:80 -> 3
primusb37434c298ccb21d03021ea4ez-master-0:67682:71918 [0] NCCL INFO misc/socket.cc:829 -> 3
primusb37434c298ccb21d03021ea4ez-master-0:67682:71918 [0] NCCL INFO comm 0x56191a4e0f80 rank 0 nranks 3 cudaDev 0 busId 4a000 - Abort COMPLETE
W0623 06:03:53.297900 67194 site-packages/torch/distributed/elastic/multiprocessing/api.py:900] Sending process 67683 closing signal SIGTERM
W0623 06:03:53.303901 67194 site-packages/torch/distributed/elastic/multiprocessing/api.py:900] Sending process 67684 closing signal SIGTERM
E0623 06:03:53.475666 67194 site-packages/torch/distributed/elastic/multiprocessing/api.py:874] failed (exitcode: 1) local_rank: 0 (pid: 67682) of binary: /root/storage/miniconda3/envs/kktrl/bin/python3.10
Traceback (most recent call last):
File "/root/storage/miniconda3/envs/kktrl/bin/accelerate", line 8, in <module>
sys.exit(main())
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/accelerate/commands/accelerate_cli.py", line 50, in main
args.func(args)
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/accelerate/commands/launch.py", line 1183, in launch_command
deepspeed_launcher(args)
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/accelerate/commands/launch.py", line 868, in deepspeed_launcher
distrib_run.run(args)
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/torch/distributed/run.py", line 883, in run
elastic_launch(
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 139, in __call__
return launch_agent(self._config, self._entrypoint, list(args))
File "/root/storage/miniconda3/envs/kktrl/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 270, in launch_agent
raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
============================================================
run_grpo.py FAILED
------------------------------------------------------------
Failures:
<NO_OTHER_FAILURES>
------------------------------------------------------------
Root Cause (first observed failure):
[0]:
time : 2025-06-23_06:03:53
host : primusb37434c298ccb21d03021ea4ez-master-0
rank : 0 (local_rank: 0)
exitcode : 1 (pid: 67682)
error_file: <N/A>
traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html
/root/code/aha_test/run_grpo.sh
NPROC_PER_NODE=4
CUDA_VISIBLE_DEVICES=0,1,2,3
MASTER_PORT=45678
PYTORCH_CUDA_ALLOC_CONF='expandable_segments:True'
accelerate launch --num_processes 3 --config_file deepspeed_zero3.yaml run_grpo.py --config grpo-countdown.yaml
/root/code/aha_test/run_grpo.py
import ipdb
import logging
import os
from dataclasses import dataclass
from datetime import datetime
import logging
import os
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
import random
import re
import torch
from transformers.trainer_utils import get_last_checkpoint
from transformers import AutoTokenizer
from datasets import load_dataset
from trl import GRPOConfig, GRPOTrainer, get_peft_config, ModelConfig, TrlParser
########################
# Custom dataclasses
########################
@dataclass
class ScriptArguments:
dataset_id_or_path: str = "/root/code/Countdown-Tasks-3to4"
dataset_splits: str = "train"
tokenizer_name_or_path: str = None
########################
# Setup logging
########################
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
handler.setFormatter(
logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
)
logger.addHandler(handler)
########################
# Helper functions
########################
def format_reward_func(completions, target, **kwargs):
"""
Format: <think>...</think><answer>...</answer>
Args:
completions (list[str]): Generated outputs
target (list[str]): Expected answers
Returns:
list[float]: Reward scores
"""
rewards = []
for completion, gt in zip(completions, target):
try:
# add synthetic <think> as its already part of the prompt and prefilled for the assistant to more easily match the regex
completion = "<think>" + completion
if random.random() < 0.1: # 1% chance to write samples into a file
os.makedirs("completion_samples", exist_ok=True)
log_file = os.path.join("completion_samples", "completion_samples.txt")
with open(log_file, "a") as f:
f.write(f"\n\n==============\n")
f.write(completion)
# Check if the format is correct
regex = r"^<think>([^<]*(?:<(?!/?think>)[^<]*)*)<\/think>\n<answer>([\s\S]*?)<\/answer>$"
match = re.search(regex, completion, re.DOTALL)
# if the format is not correct, reward is 0
if match is None or len(match.groups()) != 2:
rewards.append(0.0)
else:
rewards.append(1.0)
except Exception:
rewards.append(0.0)
return rewards
def equation_reward_func(completions, target, nums, **kwargs):
"""
Evaluates completions based on:
2. Mathematical correctness of the answer
Args:
completions (list[str]): Generated outputs
target (list[str]): Expected answers
nums (list[str]): Available numbers
Returns:
list[float]: Reward scores
"""
rewards = []
for completion, gt, numbers in zip(completions, target, nums):
try:
# add synthetic <think> as its already part of the prompt and prefilled for the assistant to more easily match the regex
completion = "<think>" + completion
# Check if the format is correct
match = re.search(r"<answer>(.*?)<\/answer>", completion)
if match is None:
rewards.append(0.0)
continue
# Extract the "answer" part from the completion
equation = match.group(1).strip()
# Extract all numbers from the equation
used_numbers = [int(n) for n in re.findall(r'\d+', equation)]
# Check if all numbers are used exactly once
if sorted(used_numbers) != sorted(numbers):
rewards.append(0.0)
continue
# Define a regex pattern that only allows numbers, operators, parentheses, and whitespace
allowed_pattern = r'^[\d+\-*/().\s]+$'
if not re.match(allowed_pattern, equation):
rewards.append(0.0)
continue
# Evaluate the equation with restricted globals and locals
result = eval(equation, {"__builtins__": None}, {})
# Check if the equation is correct and matches the ground truth
if abs(float(result) - float(gt)) < 1e-5:
rewards.append(1.0)
if random.random() < 0.10: # 10% chance to write fully successful samples into a file
os.makedirs("completion_samples", exist_ok=True)
log_file = os.path.join("completion_samples", "success_completion_samples.txt")
with open(log_file, "a") as f:
f.write(f"\n\n==============\n")
f.write(completion)
else:
rewards.append(0.0)
except Exception:
# If evaluation fails, reward is 0
rewards.append(0.0)
return rewards
def get_checkpoint(training_args: GRPOConfig):
last_checkpoint = None
if os.path.isdir(training_args.output_dir):
last_checkpoint = get_last_checkpoint(training_args.output_dir)
return last_checkpoint
def grpo_function(
model_args: ModelConfig, script_args: ScriptArguments, training_args: GRPOConfig
):
#########################
# Log parameters
#########################
logger.info(f"Model parameters {model_args}")
logger.info(f"Training/evaluation parameters {training_args}")
################
# Load tokenizer
################
tokenizer = AutoTokenizer.from_pretrained(
(
script_args.tokenizer_name_or_path
if script_args.tokenizer_name_or_path
else model_args.model_name_or_path
),
revision=model_args.model_revision,
trust_remote_code=model_args.trust_remote_code,
)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
###############
# Load datasets
###############
# Load dataset from Hugging Face Hub
dataset = load_dataset(script_args.dataset_id_or_path, split=script_args.dataset_splits)
# select a random subset of 50k samples
dataset = dataset.shuffle(seed=42).select(range(50000))
#####################
# Prepare and format dataset
#####################
# gemerate r1 prompt with a prefix for the model to already start with the thinking process
def generate_r1_prompt(numbers, target):
r1_prefix = [{
"role": "system",
"content": "You are a helpful assistant. You first thinks about the reasoning process in the mind and then provides the user with the answer."
},
{
"role": "user",
"content": f"Using the numbers {numbers}, create an equation that equals {target}. You can use basic arithmetic operations (+, -, *, /) one or multiple times but each number can only be used once. Show your work in <think> </think> tags. And return the final equation in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>. Think step by step inside <think> tags."
},
{
"role": "assistant",
"content": "Let me solve this step by step.\n<think>"
}]
return {"prompt": tokenizer.apply_chat_template(r1_prefix, tokenize=False, continue_final_message=True), "target": target, "nums": numbers}
# convert our dataset to the r1 prompt
dataset = dataset.map(lambda x: generate_r1_prompt(x["nums"], x["target"]))
# split the dataset into train and test
train_test_split = dataset.train_test_split(test_size=0.1)
train_dataset = train_test_split["train"]
test_dataset = train_test_split["test"]
trainer = GRPOTrainer(
model=model_args.model_name_or_path,
reward_funcs=[format_reward_func, equation_reward_func],
args=training_args,
train_dataset=train_dataset,
eval_dataset=test_dataset,
peft_config=get_peft_config(model_args),
)
###############
# Training loop
###############
# Check for last checkpoint
last_checkpoint = get_checkpoint(training_args)
if last_checkpoint is not None and training_args.resume_from_checkpoint is None:
logger.info(f"Checkpoint detected, resuming training at {last_checkpoint}.")
# Train the model
logger.info(
f'*** Starting training {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} for {training_args.num_train_epochs} epochs***'
)
train_result = trainer.train(resume_from_checkpoint=last_checkpoint)
# Log and save metrics
metrics = train_result.metrics
metrics["train_samples"] = len(train_dataset)
trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()
logger.info("*** Training complete ***")
##################################
# Save model and create model card
##################################
logger.info("*** Save model ***")
trainer.model.config.use_cache = True
trainer.save_model(training_args.output_dir)
logger.info(f"Model saved to {training_args.output_dir}")
training_args.distributed_state.wait_for_everyone() # wait for all processes to load
tokenizer.save_pretrained(training_args.output_dir)
logger.info(f"Tokenizer saved to {training_args.output_dir}")
# Save everything else on main process
if trainer.accelerator.is_main_process:
trainer.create_model_card({"tags": ["rl","grpo", "tutorial", "philschmid"]})
# push to hub if needed
if training_args.push_to_hub is True:
logger.info("Pushing to hub...")
trainer.push_to_hub()
logger.info("*** Training complete! ***")
def main():
parser = TrlParser((ModelConfig, ScriptArguments, GRPOConfig))
model_args, script_args, training_args = parser.parse_args_and_config()
# ipdb.set_trace()
# Run the main training loop
grpo_function(model_args, script_args, training_args)
if __name__ == "__main__":
main()
/root/code/aha_test/grpo-countdown.yaml
# Model arguments
model_name_or_path: /primus_source_checkpoint/Qwen3-1_7B
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
bf16: true
tf32: true
output_dir: /root/code/aha_test/output
# train_type: lora
# Dataset arguments
dataset_id_or_path: /root/code/Countdown-Tasks-3to4
# Lora Arguments
# No LoRA is used here
# Training arguments
max_steps: 450
per_device_train_batch_size: 8
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
learning_rate: 5.0e-7 # 1.0e-6 as in the deepseek math paper 5-e7 from https://hijkzzz.notion.site/unraveling-rlhf-and-its-variants-engineering-insights#147d9a33ecc9806090f3d5c749d31f05
lr_scheduler_type: cosine
warmup_ratio: 0.03
# GRPO specific parameters
beta: 0.001 # 0.04 as in the deepseek math paper 0.001 from https://hijkzzz.notion.site/unraveling-rlhf-and-its-variants-engineering-insights#147d9a33ecc9806090f3d5c749d31f05
max_prompt_length: 256
max_completion_length: 1024
num_generations: 8
use_vllm: true
vllm_device: "cuda:0"
vllm_gpu_memory_utilization: 0.2
vllm_max_model_len: 2048
sleep_level: 1
offload_optimizer: true
offload_model: true
gc_collect_after_offload: true
# Logging arguments
logging_strategy: steps
logging_steps: 1
report_to:
- tensorboard
- wandb
logging_dir: /root/code/aha_test/tensorboard_logs
log_completions: True
save_strategy: "steps"
save_steps: 45
seed: 42
# # Hugging Face Hub
# push_to_hub: true
# # hub_model_id: llama-3-1-8b-math-orca-qlora-10k-ep1 # if not defined same as output_dir
# hub_strategy: every_save
/root/code/aha_test/deepspeed_zero3.yaml
compute_environment: LOCAL_MACHINE
debug: false
deepspeed_config:
deepspeed_multinode_launcher: standard
offload_optimizer_device: none
offload_param_device: none
zero3_init_flag: true
zero3_save_16bit_model: true
zero_stage: 3
distributed_type: DEEPSPEED
downcast_bf16: 'no'
machine_rank: 0
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 4
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
using python==3.10.0
Package Version
---------------------------------------- -------------
absl-py 2.3.0
accelerate 1.7.0
aiohappyeyeballs 2.6.1
aiohttp 3.12.13
aiosignal 1.3.2
airportsdata 20250523
annotated-types 0.7.0
antlr4-python3-runtime 4.13.2
anyio 4.9.0
astor 0.8.1
asttokens 3.0.0
async-timeout 5.0.1
attrs 25.3.0
blake3 1.0.5
cachetools 6.1.0
certifi 2025.6.15
charset-normalizer 3.4.2
click 8.2.1
cloudpickle 3.1.1
comm 0.2.2
compressed-tensors 0.10.1
cupy-cuda12x 13.4.1
datasets 3.6.0
debugpy 1.8.14
decorator 5.2.1
deepspeed 0.15.4
depyf 0.18.0
dill 0.3.8
diskcache 5.6.3
distro 1.9.0
dnspython 2.7.0
einops 0.8.1
email_validator 2.2.0
exceptiongroup 1.3.0
executing 2.2.0
fastapi 0.115.12
fastapi-cli 0.0.7
fastrlock 0.8.3
filelock 3.18.0
flash_attn 2.8.0.post2
frozenlist 1.7.0
fsspec 2025.3.0
gguf 0.17.0
gitdb 4.0.12
GitPython 3.1.44
googleapis-common-protos 1.70.0
grpcio 1.73.0
h11 0.16.0
hf_transfer 0.1.9
hf-xet 1.1.4
hjson 3.1.0
httpcore 1.0.9
httptools 0.6.4
httpx 0.28.1
huggingface-hub 0.33.0
idna 3.10
importlib_metadata 8.7.0
interegular 0.3.3
ipdb 0.13.13
ipykernel 6.29.5
ipython 8.37.0
ipywidgets 8.1.7
jedi 0.19.2
Jinja2 3.1.6
jiter 0.10.0
jsonschema 4.24.0
jsonschema-specifications 2025.4.1
jupyter_client 8.6.3
jupyter_core 5.8.1
jupyterlab_widgets 3.0.15
lark 1.2.2
latex2sympy2 1.9.1
latex2sympy2_extended 1.10.1
llguidance 0.7.29
llvmlite 0.44.0
lm-format-enforcer 0.10.11
Markdown 3.8
markdown-it-py 3.0.0
MarkupSafe 3.0.2
math-verify 0.7.0
matplotlib-inline 0.1.7
mdurl 0.1.2
mistral_common 1.6.2
mpmath 1.3.0
msgpack 1.1.1
msgspec 0.19.0
multidict 6.4.4
multiprocess 0.70.16
nest-asyncio 1.6.0
networkx 3.4.2
ninja 1.11.1.4
numba 0.61.2
numpy 2.2.6
nvidia-cublas-cu12 12.6.4.1
nvidia-cuda-cupti-cu12 12.6.80
nvidia-cuda-nvrtc-cu12 12.6.77
nvidia-cuda-runtime-cu12 12.6.77
nvidia-cudnn-cu12 9.5.1.17
nvidia-cufft-cu12 11.3.0.4
nvidia-cufile-cu12 1.11.1.6
nvidia-curand-cu12 10.3.7.77
nvidia-cusolver-cu12 11.7.1.2
nvidia-cusparse-cu12 12.5.4.2
nvidia-cusparselt-cu12 0.6.3
nvidia-ml-py 12.575.51
nvidia-nccl-cu12 2.26.2
nvidia-nvjitlink-cu12 12.6.85
nvidia-nvtx-cu12 12.6.77
openai 1.88.0
opencv-python-headless 4.11.0.86
opentelemetry-api 1.34.1
opentelemetry-exporter-otlp 1.34.1
opentelemetry-exporter-otlp-proto-common 1.34.1
opentelemetry-exporter-otlp-proto-grpc 1.34.1
opentelemetry-exporter-otlp-proto-http 1.34.1
opentelemetry-proto 1.34.1
opentelemetry-sdk 1.34.1
opentelemetry-semantic-conventions 0.55b1
opentelemetry-semantic-conventions-ai 0.4.9
outlines 0.1.11
outlines_core 0.1.26
packaging 25.0
pandas 2.3.0
parso 0.8.4
partial-json-parser 0.2.1.1.post5
peft 0.14.0
pexpect 4.9.0
pillow 11.2.1
pip 25.1
platformdirs 4.3.8
prometheus_client 0.22.1
prometheus-fastapi-instrumentator 7.1.0
prompt_toolkit 3.0.51
propcache 0.3.2
protobuf 5.29.5
psutil 7.0.0
ptyprocess 0.7.0
pure_eval 0.2.3
py-cpuinfo 9.0.0
pyarrow 20.0.0
pycountry 24.6.1
pydantic 2.11.7
pydantic_core 2.33.2
Pygments 2.19.1
python-dateutil 2.9.0.post0
python-dotenv 1.1.0
python-json-logger 3.3.0
python-multipart 0.0.20
pytz 2025.2
PyYAML 6.0.2
pyzmq 27.0.0
ray 2.47.0
referencing 0.36.2
regex 2024.11.6
requests 2.32.4
rich 14.0.0
rich-toolkit 0.14.7
rpds-py 0.25.1
safetensors 0.5.3
scipy 1.15.3
sentencepiece 0.2.0
sentry-sdk 2.30.0
setproctitle 1.3.6
setuptools 78.1.1
shellingham 1.5.4
six 1.17.0
smmap 5.0.2
sniffio 1.3.1
stack-data 0.6.3
starlette 0.46.2
sympy 1.14.0
tensorboard 2.19.0
tensorboard-data-server 0.7.2
tiktoken 0.9.0
tokenizers 0.21.1
tomli 2.2.1
torch 2.7.0
torch-tb-profiler 0.4.3
torchaudio 2.7.0
torchvision 0.22.0
tornado 6.5.1
tqdm 4.67.1
traitlets 5.14.3
transformers 4.52.4
triton 3.3.0
trl 0.14.0
typer 0.16.0
typing_extensions 4.14.0
typing-inspection 0.4.1
tzdata 2025.2
urllib3 2.4.0
uvicorn 0.34.3
uvloop 0.21.0
vllm 0.9.1
wandb 0.20.1
watchfiles 1.1.0
wcwidth 0.2.13
websockets 15.0.1
Werkzeug 3.1.3
wheel 0.45.1
widgetsnbextension 4.0.14
xformers 0.0.30
xgrammar 0.1.19
xxhash 3.5.0
yarl 1.20.1
zipp 3.23.0
Jiaviz
Metadata
Metadata
Assignees
Labels
🐛 bugSomething isn't workingSomething isn't working