Fixes for various warnings that appear in examples. (pyg-team#10357)

drivanov · puririshi98 · web-flow · commit 1b6455eb84bf · 2025-07-16T10:53:34.000-07:00
This PR addresses several warnings that appear in the example scripts,
including:

- UserWarnings triggered by unsafe tensor operations or missing
configurations.
- FutureWarnings indicating upcoming behavior changes in dependencies.

By resolving these issues, we aim to improve the clarity of the output
and ensure better compatibility with future library versions. No changes
to core logic or results are introduced.

```
/workspace/examples/gcn.py:84: UserWarning: Converting a tensor with requires_grad=True to a scalar may lead to unexpected behavior.
Consider using tensor.detach() first. (Triggered internally at /opt/pytorch/pytorch/torch/csrc/autograd/generated/python_variable_methods.cpp:835.)
  return float(loss)

/workspace/examples/compile/gcn.py:65: UserWarning: Converting a tensor with requires_grad=True to a scalar may lead to unexpected behavior.
Consider using tensor.detach() first. (Triggered internally at /opt/pytorch/pytorch/torch/csrc/autograd/generated/python_variable_methods.cpp:835.)

usr/local/lib/python3.12/dist-packages/torch/cuda/memory.py:491: FutureWarning: torch.cuda.reset_max_memory_allocated now calls torch.cuda.reset_peak_memory_stats, which resets /all/ peak memory stats.
  warnings.warn(
```

Co-authored-by: Rishi Puri &lt;riship@nvidia.com&gt;
diff --git a/examples/compile/gcn.py b/examples/compile/gcn.py
@@ -62,7 +62,7 @@ def train():
     loss = F.cross_entropy(out[data.train_mask], data.y[data.train_mask])
     loss.backward()
     optimizer.step()
-    return float(loss)
+    return float(loss.detach())
 
 
 @torch.no_grad()
diff --git a/examples/compile/gin.py b/examples/compile/gin.py
@@ -73,7 +73,7 @@ def train():
         loss = F.cross_entropy(out, data.y)
         loss.backward()
         optimizer.step()
-        total_loss += float(loss) * data.num_graphs
+        total_loss += float(loss.detach()) * data.num_graphs
     return total_loss / len(train_loader.dataset)
 
 
diff --git a/examples/llm/g_retriever.py b/examples/llm/g_retriever.py
@@ -328,7 +328,7 @@ def adjust_learning_rate(param_group, LR, epoch):
                                      step / len(train_loader) + epoch)
 
             optimizer.step()
-            epoch_loss = epoch_loss + float(loss)
+            epoch_loss = epoch_loss + float(loss.detach())
 
             if (step + 1) % 2 == 0:
                 lr = optimizer.param_groups[0]['lr']
@@ -353,7 +353,7 @@ def adjust_learning_rate(param_group, LR, epoch):
 
     # Clean up memory
     torch.cuda.empty_cache()
-    torch.cuda.reset_max_memory_allocated()
+    torch.cuda.reset_peak_memory_stats()
 
     # Load best checkpoint if necessary
     if checkpointing and best_epoch != num_epochs - 1:
diff --git a/examples/llm/g_retriever_utils/minimal_demo.py b/examples/llm/g_retriever_utils/minimal_demo.py
@@ -212,7 +212,7 @@ def adjust_learning_rate(param_group, LR, epoch):
             best_epoch = epoch
             save_params_dict(model, f'{model_save_name}_best_val_loss_ckpt.pt')
     torch.cuda.empty_cache()
-    torch.cuda.reset_max_memory_allocated()
+    torch.cuda.reset_peak_memory_stats()
 
     if checkpointing and best_epoch != num_epochs - 1:
         print("Loading best checkpoint...")
@@ -343,7 +343,7 @@ def benchmark_models(models: List[Type[nn.Module]], model_names: List[str],
                                                  model=pure_llm,
                                                  dataset=dataset)
             torch.cuda.empty_cache()
-            torch.cuda.reset_max_memory_allocated()
+            torch.cuda.reset_peak_memory_stats()
             gc.collect()
             e2e_time = round(time.time() - since, 2)
             model_log["tuned_llm"]["prep_time"] = prep_time
@@ -386,7 +386,7 @@ def benchmark_models(models: List[Type[nn.Module]], model_names: List[str],
                 tiny_llama=tiny_llama, dataset=dataset,
                 model_save_name=root_dir + '/' + name, model=model)
             torch.cuda.empty_cache()
-            torch.cuda.reset_max_memory_allocated()
+            torch.cuda.reset_peak_memory_stats()
             gc.collect()
             e2e_time = round(time.time() - since, 2)
             model_log[name]["prep_time"] = prep_time
diff --git a/examples/llm/git_mol.py b/examples/llm/git_mol.py
@@ -108,7 +108,7 @@ def train(
                 f'gitmol_pretrain_epoch{best_epoch}_val_loss{best_val_loss:4f}_ckpt.pt'  # noqa: E501
             )
     torch.cuda.empty_cache()
-    torch.cuda.reset_max_memory_allocated()
+    torch.cuda.reset_peak_memory_stats()
 
     # Test
     test_loss = eval(model, test_loader)

Original file line number	Diff line number	Diff line change
`@@ -108,7 +108,7 @@ def train(`
`108`	`108`	`f'gitmol_pretrain_epoch{best_epoch}_val_loss{best_val_loss:4f}_ckpt.pt' # noqa: E501`
`109`	`109`	`)`
`110`	`110`	`torch.cuda.empty_cache()`
`111`		`- torch.cuda.reset_max_memory_allocated()`
	`111`	`+ torch.cuda.reset_peak_memory_stats()`
`112`	`112`
`113`	`113`	`# Test`
`114`	`114`	`test_loss = eval(model, test_loader)`