saprmarks
diff --git a/‎.github/workflows/build.yml
Lines changed: 90 additions & 0 deletions b/‎.github/workflows/build.yml
Lines changed: 90 additions & 0 deletions
diff --git a/‎.gitignore
Lines changed: 1 addition & 1 deletion b/‎.gitignore
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md
Lines changed: 1 addition & 5 deletions b/‎README.md
Lines changed: 1 addition & 5 deletions
diff --git a/‎__init__.py
Lines changed: 0 additions & 2 deletions b/‎__init__.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎dictionary_learning/__init__.py
Lines changed: 6 additions & 0 deletions b/‎dictionary_learning/__init__.py
Lines changed: 6 additions & 0 deletions
diff --git a/‎buffer.py renamed to ‎dictionary_learning/buffer.py b/‎buffer.py renamed to ‎dictionary_learning/buffer.py
diff --git a/‎config.py renamed to ‎dictionary_learning/config.py b/‎config.py renamed to ‎dictionary_learning/config.py
diff --git a/‎dictionary.py renamed to ‎dictionary_learning/dictionary.py b/‎dictionary.py renamed to ‎dictionary_learning/dictionary.py
diff --git a/‎evaluation.py renamed to ‎dictionary_learning/evaluation.py b/‎evaluation.py renamed to ‎dictionary_learning/evaluation.py
diff --git a/‎grad_pursuit.py renamed to ‎dictionary_learning/grad_pursuit.py b/‎grad_pursuit.py renamed to ‎dictionary_learning/grad_pursuit.py
diff --git a/‎interp.py renamed to ‎dictionary_learning/interp.py
Lines changed: 1 addition & 1 deletion b/‎interp.py renamed to ‎dictionary_learning/interp.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎trainers/__init__.py renamed to ‎dictionary_learning/trainers/__init__.py
Lines changed: 12 additions & 0 deletions b/‎trainers/__init__.py renamed to ‎dictionary_learning/trainers/__init__.py
Lines changed: 12 additions & 0 deletions
diff --git a/‎trainers/batch_top_k.py renamed to ‎dictionary_learning/trainers/batch_top_k.py b/‎trainers/batch_top_k.py renamed to ‎dictionary_learning/trainers/batch_top_k.py
diff --git a/‎trainers/gated_anneal.py renamed to ‎dictionary_learning/trainers/gated_anneal.py b/‎trainers/gated_anneal.py renamed to ‎dictionary_learning/trainers/gated_anneal.py
diff --git a/‎trainers/gdm.py renamed to ‎dictionary_learning/trainers/gdm.py b/‎trainers/gdm.py renamed to ‎dictionary_learning/trainers/gdm.py
diff --git a/‎trainers/jumprelu.py renamed to ‎dictionary_learning/trainers/jumprelu.py b/‎trainers/jumprelu.py renamed to ‎dictionary_learning/trainers/jumprelu.py
diff --git a/‎trainers/matryoshka_batch_top_k.py renamed to ‎dictionary_learning/trainers/matryoshka_batch_top_k.py b/‎trainers/matryoshka_batch_top_k.py renamed to ‎dictionary_learning/trainers/matryoshka_batch_top_k.py
diff --git a/‎trainers/p_anneal.py renamed to ‎dictionary_learning/trainers/p_anneal.py b/‎trainers/p_anneal.py renamed to ‎dictionary_learning/trainers/p_anneal.py
diff --git a/‎trainers/standard.py renamed to ‎dictionary_learning/trainers/standard.py b/‎trainers/standard.py renamed to ‎dictionary_learning/trainers/standard.py
diff --git a/‎trainers/top_k.py renamed to ‎dictionary_learning/trainers/top_k.py b/‎trainers/top_k.py renamed to ‎dictionary_learning/trainers/top_k.py
diff --git a/‎trainers/trainer.py renamed to ‎dictionary_learning/trainers/trainer.py b/‎trainers/trainer.py renamed to ‎dictionary_learning/trainers/trainer.py
diff --git a/‎training.py renamed to ‎dictionary_learning/training.py b/‎training.py renamed to ‎dictionary_learning/training.py
diff --git a/‎utils.py renamed to ‎dictionary_learning/utils.py b/‎utils.py renamed to ‎dictionary_learning/utils.py
diff --git a/‎pyproject.toml
Lines changed: 45 additions & 0 deletions b/‎pyproject.toml
Lines changed: 45 additions & 0 deletions
diff --git a/‎requirements.txt
Lines changed: 0 additions & 13 deletions b/‎requirements.txt
Lines changed: 0 additions & 13 deletions
diff --git a/‎tests/test_end_to_end.py
Lines changed: 6 additions & 4 deletions b/‎tests/test_end_to_end.py
Lines changed: 6 additions & 4 deletions
diff --git a/‎tests/unit/test_dictionary.py
Lines changed: 136 additions & 0 deletions b/‎tests/unit/test_dictionary.py
Lines changed: 136 additions & 0 deletions
@@ -0,0 +1,90 @@
+name: build
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Cache Huggingface assets
+        uses: actions/cache@v4
+        with:
+          key: huggingface-0-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/pyproject.toml') }}
+          path: ~/.cache/huggingface
+          restore-keys: |
+            huggingface-0-${{ runner.os }}-${{ matrix.python-version }}-
+      - name: Load cached Poetry installation
+        id: cached-poetry
+        uses: actions/cache@v4
+        with:
+          path: ~/.local # the path depends on the OS
+          key: poetry-${{ runner.os }}-${{ matrix.python-version }}-1 # increment to reset cache
+      - name: Install Poetry
+        if: steps.cached-poetry.outputs.cache-hit != 'true'
+        uses: snok/install-poetry@v1
+        with:
+          virtualenvs-create: true
+          virtualenvs-in-project: true
+          installer-parallel: true
+      - name: Load cached venv
+        id: cached-poetry-dependencies
+        uses: actions/cache@v4
+        with:
+          path: .venv
+          key: venv-0-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/pyproject.toml') }}
+          restore-keys: |
+            venv-0-${{ runner.os }}-${{ matrix.python-version }}-
+      - name: Install dependencies
+        if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
+        run: poetry install --no-interaction
+      - name: Run Unit Tests
+        run: poetry run pytest tests/unit
+      - name: Build package
+        run: poetry build
+
+  release:
+    needs: build
+    permissions:
+      contents: write
+      id-token: write
+    # https://github.community/t/how-do-i-specify-job-dependency-running-in-another-workflow/16482
+    if: github.event_name == 'push' && github.ref == 'refs/heads/main' && !contains(github.event.head_commit.message, 'chore(release):')
+    runs-on: ubuntu-latest
+    concurrency: release
+    environment:
+      name: pypi
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Semantic Release
+        id: release
+        uses: python-semantic-release/python-semantic-release@v8.0.7
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+      - name: Publish package distributions to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        if: steps.release.outputs.released == 'true'
+      - name: Publish package distributions to GitHub Releases
+        uses: python-semantic-release/upload-to-gh-release@main
+        if: steps.release.outputs.released == 'true'
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
@@ -99,7 +99,7 @@ ipython_config.py
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-#poetry.lock
+poetry.lock
 
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 
@@ -8,13 +8,9 @@ Some dictionaries trained using this repository (and associated training checkpo
 
 Navigate to the to the location where you would like to clone this repo, clone and enter the repo, and install the requirements.
 ```bash
-git clone https://github.com/saprmarks/dictionary_learning
-cd dictionary_learning
-pip install -r requirements.txt
+pip install dictionary-learning
 ```
 
-To use `dictionary_learning`, include it as a subdirectory in some project's directory and import it; see the examples below.
-
 We also provide a [demonstration](https://github.com/adamkarvonen/dictionary_learning_demo), which trains and evaluates 2 SAEs in ~30 minutes before plotting the results.
 
 # Using trained dictionaries
 
@@ -0,0 +1,6 @@
+__version__ = "0.1.0"
+
+from .dictionary import AutoEncoder, GatedAutoEncoder, JumpReluAutoEncoder
+from .buffer import ActivationBuffer
+
+__all__ = ["AutoEncoder", "GatedAutoEncoder", "JumpReluAutoEncoder", "ActivationBuffer"]
@@ -188,4 +188,4 @@ def feature_umap(
             hover_name=df.index,
             color=colors,
         )
-    raise ValueError("n_components must be 2 or 3")
+    raise ValueError("n_components must be 2 or 3")
@@ -5,3 +5,15 @@
 from .top_k import TopKTrainer
 from .jumprelu import JumpReluTrainer
 from .batch_top_k import BatchTopKTrainer, BatchTopKSAE
+
+
+__all__ = [
+    "StandardTrainer",
+    "GatedSAETrainer",
+    "PAnnealTrainer",
+    "GatedAnnealTrainer",
+    "TopKTrainer",
+    "JumpReluTrainer",
+    "BatchTopKTrainer",
+    "BatchTopKSAE",
+]
@@ -0,0 +1,45 @@
+[tool.poetry]
+name = "dictionary-learning"
+version = "0.1.0"
+description = "Dictionary learning via sparse autoencoders on neural network activations"
+authors = ["Samuel Marks", "Adam Karvonen", "Aaron Mueller"]
+packages = [{ include = "dictionary_learning" }]
+license = "MIT"
+readme = "README.md"
+keywords = [
+    "deep-learning",
+    "sparse-autoencoders",
+    "mechanistic-interpretability",
+    "PyTorch",
+]
+classifiers = ["Topic :: Scientific/Engineering :: Artificial Intelligence"]
+repository = "https://github.com/saprmarks/dictionary_learning"
+homepage = "https://github.com/saprmarks/dictionary_learning"
+
+
+[tool.poetry.dependencies]
+python = "^3.10"
+circuitsvis = ">=1.43.2"
+datasets = ">=2.18.0"
+einops = ">=0.7.0"
+nnsight = ">=0.3.0,<0.4.0"
+pandas = ">=2.2.1"
+plotly = ">=5.18.0"
+tqdm = ">=4.66.1"
+zstandard = ">=0.22.0"
+wandb = ">=0.12.0"
+umap-learn = ">=0.5.6"
+llvmlite = ">=0.40.0"
+
+[tool.poetry.group.dev.dependencies]
+pytest = "^8.3.4"
+
+[build-system]
+requires = ["poetry-core>=2.0.0,<3.0.0"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.semantic_release]
+version_variables = ["dictionary_learning/__init__.py:__version__"]
+version_toml = ["pyproject.toml:tool.poetry.version"]
+branch = "main"
+build_command = "pip install poetry && poetry build"
@@ -7,7 +7,11 @@
 from dictionary_learning.training import trainSAE
 from dictionary_learning.trainers.standard import StandardTrainer
 from dictionary_learning.trainers.top_k import TopKTrainer, AutoEncoderTopK
-from dictionary_learning.utils import hf_dataset_to_generator, get_nested_folders, load_dictionary
+from dictionary_learning.utils import (
+    hf_dataset_to_generator,
+    get_nested_folders,
+    load_dictionary,
+)
 from dictionary_learning.buffer import ActivationBuffer
 from dictionary_learning.dictionary import (
     AutoEncoder,
@@ -62,10 +66,8 @@ def test_sae_training():
     """End to end test for training an SAE. Takes ~2 minutes on an RTX 3090.
     This isn't a nice suite of unit tests, but it's better than nothing.
     I have observed that results can slightly vary with library versions. For full determinism,
-    use pytorch 2.5.1 and nnsight 0.3.7.
+    use pytorch 2.5.1 and nnsight 0.3.7."""
 
-    NOTE: `dictionary_learning` is meant to be used as a submodule. Thus, to run this test, you need to use `dictionary_learning` as a submodule
-    and run the test from the root of the repository using `pytest -s`. Refer to https://github.com/adamkarvonen/dictionary_learning_demo for an example"""
     random.seed(RANDOM_SEED)
     t.manual_seed(RANDOM_SEED)
 
 
@@ -0,0 +1,136 @@
+import torch as t
+import pytest
+from dictionary_learning.dictionary import (
+    AutoEncoder,
+    GatedAutoEncoder,
+    AutoEncoderNew,
+    JumpReluAutoEncoder,
+)
+
+
+@pytest.mark.parametrize(
+    "sae_cls", [AutoEncoder, GatedAutoEncoder, JumpReluAutoEncoder]
+)
+def test_forward_equals_decode_encode(sae_cls: type) -> None:
+    """Test that forward pass equals decode(encode(x)) for all SAE types"""
+    batch_size = 4
+    act_dim = 8
+    dict_size = 6
+    x = t.randn(batch_size, act_dim)
+
+    sae = sae_cls(activation_dim=act_dim, dict_size=dict_size)
+
+    # Test without output_features
+    forward_out = sae(x)
+    encode_decode = sae.decode(sae.encode(x))
+    assert t.allclose(forward_out, encode_decode)
+
+    # Test with output_features
+    forward_out, features = sae(x, output_features=True)
+    encode_features = sae.encode(x)
+    assert t.allclose(features, encode_features)
+
+
+def test_simple_autoencoder() -> None:
+    """Test AutoEncoder with simple weight matrices"""
+    sae = AutoEncoder(activation_dim=2, dict_size=2)
+
+    # Set simple weights
+    with t.no_grad():
+        sae.encoder.weight.data = t.tensor([[1.0, 0.0], [0.0, 1.0]])
+        sae.decoder.weight.data = t.tensor([[1.0, 0.0], [0.0, 1.0]])
+        sae.encoder.bias.data = t.zeros(2)
+        sae.bias.data = t.zeros(2)
+
+    # Test encoding
+    x = t.tensor([[2.0, -1.0]])
+    encoded = sae.encode(x)
+    assert t.allclose(encoded, t.tensor([[2.0, 0.0]]))  # ReLU clips negative value
+
+    # Test decoding
+    decoded = sae.decode(encoded)
+    assert t.allclose(decoded, t.tensor([[2.0, 0.0]]))
+
+
+def test_simple_gated_autoencoder() -> None:
+    """Test GatedAutoEncoder with simple weight matrices"""
+    sae = GatedAutoEncoder(activation_dim=2, dict_size=2)
+
+    # Set simple weights and biases
+    with t.no_grad():
+        sae.encoder.weight.data = t.tensor([[1.0, 0.0], [0.0, 1.0]])
+        sae.decoder.weight.data = t.tensor([[1.0, 0.0], [0.0, 1.0]])
+        sae.gate_bias.data = t.zeros(2)
+        sae.mag_bias.data = t.zeros(2)
+        sae.r_mag.data = t.zeros(2)
+        sae.decoder_bias.data = t.zeros(2)
+
+    x = t.tensor([[2.0, -1.0]])
+    encoded = sae.encode(x)
+    assert t.allclose(
+        encoded, t.tensor([[2.0, 0.0]])
+    )  # Only positive values pass through
+
+
+def test_normalize_decoder() -> None:
+    """Test that normalize_decoder maintains output while normalizing weights"""
+    sae = AutoEncoder(activation_dim=4, dict_size=3)
+    x = t.randn(2, 4)
+
+    # Get initial output
+    initial_output = sae(x)
+
+    # Normalize decoder
+    sae.normalize_decoder()
+
+    # Check decoder weights are normalized
+    norms = t.norm(sae.decoder.weight, dim=0)
+    assert t.allclose(norms, t.ones_like(norms))
+
+    # Check output is maintained
+    new_output = sae(x)
+    assert t.allclose(initial_output, new_output, atol=1e-4)
+
+
+def test_scale_biases() -> None:
+    """Test that scale_biases correctly scales all bias terms"""
+    sae = AutoEncoder(activation_dim=4, dict_size=3)
+
+    # Record initial biases
+    initial_encoder_bias = sae.encoder.bias.data.clone()
+    initial_bias = sae.bias.data.clone()
+
+    scale = 2.0
+    sae.scale_biases(scale)
+
+    assert t.allclose(sae.encoder.bias.data, initial_encoder_bias * scale)
+    assert t.allclose(sae.bias.data, initial_bias * scale)
+
+
+@pytest.mark.parametrize(
+    "sae_cls", [AutoEncoder, GatedAutoEncoder, AutoEncoderNew, JumpReluAutoEncoder]
+)
+def test_output_shapes(sae_cls: type) -> None:
+    """Test that output shapes are correct for all operations"""
+    batch_size = 3
+    act_dim = 4
+    dict_size = 5
+    x = t.randn(batch_size, act_dim)
+
+    sae = sae_cls(activation_dim=act_dim, dict_size=dict_size)
+
+    # Test encode shape
+    encoded = sae.encode(x)
+    assert encoded.shape == (batch_size, dict_size)
+
+    # Test decode shape
+    decoded = sae.decode(encoded)
+    assert decoded.shape == (batch_size, act_dim)
+
+    # Test forward shape with and without features
+    output = sae(x)
+    assert output.shape == (batch_size, act_dim)
+
+    output, features = sae(x, output_features=True)
+    assert output.shape == (batch_size, act_dim)
+    assert features.shape == (batch_size, dict_size)
Original file line number	Diff line number	Diff line change
`@@ -188,4 +188,4 @@ def feature_umap(`
`188`	`188`	`hover_name=df.index,`
`189`	`189`	`color=colors,`
`190`	`190`	`)`
`191`		`- raise ValueError("n_components must be 2 or 3")`
	`191`	`+ raise ValueError("n_components must be 2 or 3")`