Merge pull request #90 from OpenMOSS/update-source-dtype-fix

Hzfinfdu · web-flow · commit 743ff47ad5d9 · 2025-02-15T00:18:48.000+08:00
fix(activation): preserve tokens type during dtype conversion
diff --git a/src/lm_saes/activation/processors/cached_activation.py b/src/lm_saes/activation/processors/cached_activation.py
@@ -237,7 +237,9 @@ def process(self, data: None = None, **kwargs) -> Iterable[dict[str, Any]]:
                 device=self.device,
             )
             if self.dtype is not None:
-                activations = {k: v.to(self.dtype) for k, v in activations.items()}
+                for k, v in activations.items():
+                    if k in self.hook_points:
+                        activations[k] = v.to(self.dtype)
             yield activations  # Use pin_memory to load data on cpu, then transfer them to cuda in the main process, as advised in https://discuss.pytorch.org/t/dataloader-multiprocessing-with-dataset-returning-a-cuda-tensor/151022/2.
             # I wrote this utils function as I notice it is used multiple times in this repo. Do we need to apply it elsewhere?
 
@@ -259,4 +261,4 @@ def __getitem__(self, chunk_idx):
         return self.activation_loader.load_chunk_for_hooks(
             chunk_idx,
             self.hook_chunks,
-        )
+        )