explicitly call gc collect

kylesayrs · kylesayrs · commit 71820e191eef · 2025-06-03T15:25:17.000-04:00
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/llmcompressor/transformers/sparsification/compressed_tensors_utils.py b/src/llmcompressor/transformers/sparsification/compressed_tensors_utils.py
@@ -56,10 +56,6 @@ def save_pretrained_compressed(save_pretrained_method):
         model_class = model_ref().__class__
         del save_pretrained_method
 
-        # hotfix: create a weak reference to the model to avoid circular dep
-        # TODO: determine why circular dep is not collected and how to clean up this fn
-        model_ref = weakref.ref(model)
-
         @wraps(original_save_pretrained)
         def save_pretrained_wrapper(
             save_directory: str,
@@ -99,11 +95,11 @@ def save_pretrained_wrapper(
             state_dict = kwargs.pop("state_dict", None)
             if state_dict is None:
                 logger.info("Fetching state_dict - this may take some time")
-                state_dict = get_state_dict_offloaded_model(model_ref())
+                state_dict = get_state_dict_offloaded_model(model)
 
             logger.info("Fetching compressor")
             compressor = get_model_compressor(
-                model=model_ref(),
+                model=model,
                 sparsity_config=sparsity_config,
                 quantization_format=quantization_format,
                 save_compressed=save_compressed,
@@ -115,7 +111,7 @@ def save_pretrained_wrapper(
             if compressor is None:
                 # model is not compressed or quantized, save as normal
                 original_save_pretrained_func = original_save_pretrained.__get__(
-                    model_ref(), model_class
+                    model, model_class
                 )
                 original_save_pretrained_func(
                     save_directory, state_dict=state_dict, **kwargs
@@ -125,10 +121,10 @@ def save_pretrained_wrapper(
             # make sure we're on the main process when saving
             if state_dict is not None and len(state_dict) > 0:
                 compressed_state_dict = compressor.compress(
-                    model_ref(), state_dict, show_progress=True
+                    model, state_dict, show_progress=True
                 )
                 logger.info("Saving compressed model to disk")
-                original_save_pretrained.__get__(model_ref(), model_class)(
+                original_save_pretrained.__get__(model, model_class)(
                     save_directory,
                     state_dict=compressed_state_dict,
                     safe_serialization=safe_serialization,
@@ -137,10 +133,10 @@ def save_pretrained_wrapper(
                 compressor.update_config(save_directory)
 
             # update existing recipe
-            update_and_save_recipe(model_ref().name_or_path, save_directory)
+            update_and_save_recipe(model.name_or_path, save_directory)
 
             # copy python files from cache dir to save_path if any
-            copy_python_files_from_model_cache(model_ref(), save_directory)
+            copy_python_files_from_model_cache(model, save_directory)
 
         save_pretrained_wrapper._overridden = True
         return save_pretrained_wrapper
diff --git a/tests/llmcompressor/conftest.py b/tests/llmcompressor/conftest.py
@@ -1,3 +1,4 @@
+import gc
 import os
 import shutil
 import tempfile
@@ -80,7 +81,7 @@ def check_for_created_files():
 
 
 @pytest.fixture(autouse=True, scope="function")
-def setup_fresh_session():
+def llm_compressor_setup_teardown():
     """
     setup any state tied to the execution of the given method in a
     class.  setup_method is invoked for every test method of a class.
@@ -92,3 +93,6 @@ def setup_fresh_session():
     yield
     # reset the session after each test
     reset_session()
+    # explictly collect memory to catch memory bugs,
+    # see https://github.com/vllm-project/llm-compressor/pull/1503
+    gc.collect()
diff --git a/tests/llmcompressor/transformers/obcq/obcq_configs/sparse/gpu/llama_7b_sparse_second.yaml b/tests/llmcompressor/transformers/obcq/obcq_configs/sparse/gpu/llama_7b_sparse_second.yaml
@@ -0,0 +1,7 @@
+cadence: "nightly"
+test_type: "regression"
+model: "meta-llama/Llama-2-7b-hf"
+dataset: open_platypus
+recipe: "tests/llmcompressor/transformers/obcq/recipes/sparse.yaml"
+sparsity: 0.3
+device: "cuda:0"