We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 92f7b8a commit f4fdb3cCopy full SHA for f4fdb3c
sota-implementations/grpo/grpo-sync.py
@@ -156,6 +156,11 @@ def train(
156
project="grpo-sync", exp_name="-".join(["grpo-sync"] + experiment_name)
157
)
158
159
+ # Wait for the replay buffer to be filled
160
+ while (replay_buffer.write_count < replay_buffer.batch_size):
161
+ torchrl_logger.info(f"Waiting for replay buffer to be filled, {replay_buffer.write_count=}")
162
+ time.sleep(1)
163
+
164
# Training loop
165
torchrl_logger.info("Starting training loop.")
166
pbar = tqdm.tqdm(collector)
0 commit comments