learning-at-home · mryab · Mar 15, 2025 · Feb 10, 2025 · Feb 10, 2025 · Feb 10, 2025
diff --git a/.github/workflows/check-style.yml b/.github/workflows/check-style.yml
@@ -5,20 +5,24 @@ on:
     branches: [ master ]
   pull_request:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
   black:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - uses: psf/black@stable
         with:
           options: "--check --diff"
           version: "22.3.0"
   isort:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-python@v3
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
         with:
           python-version: 3.11
       - uses: isort/isort-action@master
@@ -28,7 +32,7 @@ jobs:
   codespell:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - uses: codespell-project/actions-codespell@v1
         with:
           only_warn: 1

diff --git a/.github/workflows/push-docker-image.yml b/.github/workflows/push-docker-image.yml
@@ -8,13 +8,17 @@ on:
   pull_request:
     branches: [ master ]
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
   build:
     runs-on: ubuntu-latest
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Docker meta
         id: meta

diff --git a/.github/workflows/run-benchmarks.yml b/.github/workflows/run-benchmarks.yml
@@ -5,19 +5,23 @@ on:
     branches: [ master ]
   pull_request:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
   run_benchmarks:
 
     runs-on: ubuntu-latest
     timeout-minutes: 10
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Set up Python
-        uses: actions/setup-python@v3
+        uses: actions/setup-python@v5
         with:
           python-version: 3.11
       - name: Cache dependencies
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/.cache/pip
           key: Key-v1-3.11-${{ hashFiles('requirements.txt') }}-${{ hashFiles('requirements-dev.txt') }}
@@ -28,7 +32,7 @@ jobs:
           pip install -r requirements-dev.txt
       - name: Build bitsandbytes
         run: |
-          pip install bitsandbytes==0.41.1
+          pip install bitsandbytes==0.45.2
       - name: Build hivemind
         run: |
           pip install .

diff --git a/.github/workflows/run-tests-on-modal.yml b/.github/workflows/run-tests-on-modal.yml
@@ -0,0 +1,112 @@
+name: Modal tests
+
+on:
+  push:
+    branches: [master]
+  pull_request:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  run_tests:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+      fail-fast: false
+    env:
+      MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
+      MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
+      PYTHON_VERSION: ${{ matrix.python-version }}
+    timeout-minutes: 15
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Install Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Cache dependencies
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: Key-v1-3.12-modal
+
+      - name: Install build dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install modal==0.73.32
+
+      - name: Run tests
+        run: |
+          modal run modal_ci.py::run_tests
+
+  measure_coverage:
+    runs-on: ubuntu-latest
+    env:
+      MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
+      MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
+      CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+      GITHUB_EVENT_NAME: ${{ github.event_name }}
+      GITHUB_EVENT_NUMBER: ${{ github.event.number }}
+      GITHUB_EVENT_PULL_REQUEST_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
+      PYTHON_VERSION: "3.11"
+    timeout-minutes: 15
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Install Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Cache dependencies
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: Key-v1-3.12-modal
+
+      - name: Install build dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install modal==0.73.32
+
+      - name: Measure and upload coverage
+        run: |
+          modal run modal_ci.py::run_codecov
+
+  build_and_test_p2pd:
+    runs-on: ubuntu-latest
+    env:
+      MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
+      MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
+      PYTHON_VERSION: "3.11"
+    timeout-minutes: 10
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Install Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Cache dependencies
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: Key-v1-3.12-modal
+
+      - name: Install build dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install modal==0.73.32
+
+      - name: Run p2pd tests
+        run: |
+          modal run modal_ci.py::build_and_test_p2pd
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
@@ -1,9 +1,11 @@
 name: Tests
 
-on:
-  push:
-    branches: [ master ]
-  pull_request:
+# Tests in GHA only run manually, see run-tests-on-modal.yml for the same tests in CI
+on: workflow_dispatch
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
 
 jobs:
   run_tests:
@@ -15,13 +17,13 @@ jobs:
       fail-fast: false
     timeout-minutes: 15
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Set up Python
-        uses: actions/setup-python@v3
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
       - name: Cache dependencies
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/.cache/pip
           key: Key-v1-${{ matrix.python-version }}-${{ hashFiles('requirements.txt') }}-${{ hashFiles('requirements-dev.txt') }}
@@ -32,7 +34,7 @@ jobs:
           pip install -r requirements-dev.txt
       - name: Build bitsandbytes
         run: |
-          pip install bitsandbytes==0.41.1
+          pip install bitsandbytes==0.45.2
       - name: Build hivemind
         run: |
           pip install .
@@ -94,7 +96,7 @@ jobs:
           pip install -r requirements-dev.txt
       - name: Build bitsandbytes
         run: |
-          pip install bitsandbytes==0.41.1
+          pip install bitsandbytes==0.45.2
       - name: Build hivemind
         run: |
           pip install -e . --no-use-pep517

diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -2,6 +2,7 @@ version: 2
 
 sphinx:
   fail_on_warning: true
+  configuration: docs/conf.py
 
 python:
   install:

diff --git a/README.md b/README.md
@@ -118,6 +118,10 @@ the [contributing guidelines](https://github.com/learning-at-home/hivemind/blob/
 more about other ways to contribute, read
 our [guide](https://learning-at-home.readthedocs.io/en/latest/user/contributing.html).
 
+## Collaborators and Sponsorship
+
+* [Prime Intellect](https://www.primeintellect.ai/) sponsoring compute resources over [Modal](https://modal.com/) for CI
+
 ## Citation
 
 If you found hivemind or its underlying algorithms useful for your research, please cite the following source:

diff --git a/hivemind/compression/base.py b/hivemind/compression/base.py
@@ -107,14 +107,14 @@ def extract(self, serialized_tensor: runtime_pb2.Tensor) -> torch.Tensor:
         if serialized_tensor.dtype == "bfloat16":
             numel = shape.numel()
             if numel > 0 and len(serialized_tensor.buffer) // numel == 4:
-                array = np.frombuffer(serialized_tensor.buffer, dtype=np.float32)
+                array = np.frombuffer(bytearray(serialized_tensor.buffer), dtype=np.float32)
                 tensor = torch.as_tensor(array, dtype=torch.bfloat16)
             else:
-                array = np.frombuffer(serialized_tensor.buffer, dtype=np.int16)
+                array = np.frombuffer(bytearray(serialized_tensor.buffer), dtype=np.int16)
                 # reinterpret_cast from an arbitrary 2-byte type supported by numpy
                 tensor = torch.as_tensor(array).view(torch.bfloat16)
         else:
-            array = np.frombuffer(serialized_tensor.buffer, dtype=np.dtype(serialized_tensor.dtype))
+            array = np.frombuffer(bytearray(serialized_tensor.buffer), dtype=np.dtype(serialized_tensor.dtype))
             tensor = torch.as_tensor(array)
         return tensor.reshape(shape)
 

diff --git a/hivemind/compression/quantization.py b/hivemind/compression/quantization.py
@@ -14,6 +14,7 @@
 warnings.filterwarnings("ignore", module="bitsandbytes", category=UserWarning)
 
 EXECUTOR = ThreadPoolExecutor(max_workers=int(os.environ.get("QUANTIZATION_THREADS", 128)))
+_BLOCKWISE_QUANTIZATION_BLOCKSIZE = 4096
 
 
 class Quantization(CompressionBase, ABC):
@@ -140,8 +141,15 @@ def quantize(
         except ImportError:
             raise ImportError(BNB_MISSING_MESSAGE)
 
-        quantized, (absmax, codebook, *extra_params) = quantize_blockwise(tensor, blocksize=4096, nested=False)
-        assert tuple(extra_params) == self.EXTRA_PARAMS  # blocksize, nested, dtype, offset, state2
+        assert tensor.dtype == torch.float32
+
+        quantized, quant_state = quantize_blockwise(tensor, blocksize=_BLOCKWISE_QUANTIZATION_BLOCKSIZE, nested=False)
+        absmax, codebook = quant_state.absmax, quant_state.code
+        assert quant_state.blocksize == _BLOCKWISE_QUANTIZATION_BLOCKSIZE
+        assert quant_state.nested is False
+        assert quant_state.dtype == self.EXTRA_PARAMS[2]
+        assert quant_state.offset == self.EXTRA_PARAMS[3]
+        assert quant_state.state2 == self.EXTRA_PARAMS[4]
         return quantized.numpy(), (absmax.numpy(), codebook.numpy())
 
     def compress(self, tensor: torch.Tensor, info: CompressionInfo, allow_inplace: bool = False) -> runtime_pb2.Tensor:
@@ -187,5 +195,7 @@ def extract(self, serialized_tensor: runtime_pb2.Tensor) -> torch.Tensor:
         absmax = torch.as_tensor(absmax)
         codebook = torch.as_tensor(codebook)
         quantized = torch.as_tensor(quantized).reshape(tuple(serialized_tensor.size))
-        result = dequantize_blockwise(quantized, (absmax, codebook, *self.EXTRA_PARAMS))
+        result = dequantize_blockwise(
+            quantized, absmax=absmax, code=codebook, blocksize=_BLOCKWISE_QUANTIZATION_BLOCKSIZE, nested=False
+        )
         return result.to(getattr(torch, serialized_tensor.dtype)).requires_grad_(serialized_tensor.requires_grad)
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,7 @@ version: 2 @@
     sphinx:
       fail_on_warning: true
+      configuration: docs/conf.py
     python:
       install:
@@ Expand Down @@