Merge pull request #10 from argmaxinc/coreml-converter-helpers

arda-argmax · web-flow · commit ad8b50caf9ca · 2024-07-23T10:35:35.000-07:00
Add CoreML conversion helpers
diff --git a/README.md b/README.md
@@ -48,13 +48,13 @@ huggingface-cli login --token YOUR_HF_HUB_TOKEN
 **Step 3:** Prepare the denoise model (MMDiT) Core ML model files (`.mlpackage`)
 
 ```shell
-python -m tests.torch2coreml.test_mmdit --sd3-ckpt-path stabilityai/stable-diffusion-3-medium --model-version 2b -o <output-mlpackages-directory> --latent-size {64, 128}
+python -m python.src.diffusionkit.tests.torch2coreml.test_mmdit --sd3-ckpt-path stabilityai/stable-diffusion-3-medium --model-version 2b -o <output-mlpackages-directory> --latent-size {64, 128}
 ```
 
 **Step 4:** Prepare the VAE Decoder Core ML model files (`.mlpackage`)
 
 ```shell
-python -m tests.torch2coreml.test_vae --sd3-ckpt-path stabilityai/stable-diffusion-3-medium -o <output-mlpackages-directory> --latent-size {64, 128}
+python -m python.src.diffusionkit.tests.torch2coreml.test_vae --sd3-ckpt-path stabilityai/stable-diffusion-3-medium -o <output-mlpackages-directory> --latent-size {64, 128}
 ```
 
 Note:
diff --git a/python/src/diffusionkit/tests/__init__.py b/python/src/diffusionkit/tests/__init__.py
diff --git a/python/src/diffusionkit/tests/mlx/__init__.py b/python/src/diffusionkit/tests/mlx/__init__.py
diff --git a/python/src/diffusionkit/tests/mlx/test_diffusion_pipeline.py b/python/src/diffusionkit/tests/mlx/test_diffusion_pipeline.py
@@ -10,12 +10,11 @@
 import mlx.core as mx
 import numpy as np
 from argmaxtools.utils import get_logger
+from diffusionkit.mlx import DiffusionPipeline
+from diffusionkit.utils import image_psnr
 from huggingface_hub import hf_hub_download
 from PIL import Image
 
-from python.src.diffusionkit.mlx import DiffusionPipeline
-from python.src.diffusionkit.utils import image_psnr
-
 logger = get_logger(__name__)
 
 W16 = False
diff --git a/python/src/diffusionkit/tests/torch2coreml/__init__.py b/python/src/diffusionkit/tests/torch2coreml/__init__.py
@@ -0,0 +1,2 @@
+from .test_mmdit import convert_mmdit_to_mlpackage
+from .test_vae import convert_vae_to_mlpackage
diff --git a/python/src/diffusionkit/tests/torch2coreml/test_mmdit.py b/python/src/diffusionkit/tests/torch2coreml/test_mmdit.py
@@ -11,11 +11,10 @@
 import torch
 from argmaxtools import test_utils as argmaxtools_test_utils
 from argmaxtools.utils import get_fastest_device, get_logger
+from diffusionkit.torch import mmdit
+from diffusionkit.torch.model_io import _load_mmdit_weights
 from huggingface_hub import hf_hub_download
 
-from python.src.diffusionkit.torch import mmdit
-from python.src.diffusionkit.torch.model_io import _load_mmdit_weights
-
 torch.set_grad_enabled(False)
 logger = get_logger(__name__)
 
@@ -27,24 +26,38 @@
 TEST_TORCH_DTYPE = torch.float32
 TEST_PSNR_THR = 35
 TEST_LATENT_SIZE = 64  # 64 latent -> 512 image, 128 latent -> 1024 image
-
-# Test configuration
-argmaxtools_test_utils.TEST_MIN_SPEEDUP_VS_CPU = 3.0
-argmaxtools_test_utils.TEST_COREML_PRECISION = ct.precision.FLOAT32
-argmaxtools_test_utils.TEST_COMPUTE_UNIT = ct.ComputeUnit.CPU_AND_GPU
-argmaxtools_test_utils.TEST_COMPRESSION_MIN_SPEEDUP = 0.2
-argmaxtools_test_utils.TEST_DEFAULT_NBITS = None
-argmaxtools_test_utils.TEST_SKIP_SPEED_TESTS = True
+TEST_LATENT_HEIGHT = TEST_LATENT_SIZE
+TEST_LATENT_WIDTH = TEST_LATENT_SIZE
 
 TEST_MODELS = {
     "2b": mmdit.SD3_2b,
     "8b": mmdit.SD3_8b,
 }
 
 
+def setup_test_config(
+    min_speedup_vs_cpu=3.0,
+    compute_precision=ct.precision.FLOAT32,
+    compute_unit=ct.ComputeUnit.CPU_AND_GPU,
+    compression_min_speedup=0.2,
+    default_nbits=None,
+    skip_speed_tests=True,
+    compile_coreml=False,
+):
+    argmaxtools_test_utils.TEST_MIN_SPEEDUP_VS_CPU = min_speedup_vs_cpu
+    argmaxtools_test_utils.TEST_COREML_PRECISION = compute_precision
+    argmaxtools_test_utils.TEST_COMPUTE_UNIT = compute_unit
+    argmaxtools_test_utils.TEST_COMPRESSION_MIN_SPEEDUP = compression_min_speedup
+    argmaxtools_test_utils.TEST_DEFAULT_NBITS = default_nbits
+    argmaxtools_test_utils.TEST_SKIP_SPEED_TESTS = skip_speed_tests
+    argmaxtools_test_utils.TEST_COMPILE_COREML = compile_coreml
+
+
 class TestSD3MMDiT(argmaxtools_test_utils.CoreMLTestsMixin, unittest.TestCase):
     """Unit tests for stable_duffusion_3.mmdit.MMDiT module"""
 
+    model_version = "2b"
+
     @classmethod
     def setUpClass(cls):
         global TEST_SD3_CKPT_PATH
@@ -55,7 +68,7 @@ def setUpClass(cls):
         # Base test model
         logger.info("Initializing SD3 model")
         cls.test_torch_model = (
-            mmdit.MMDiT(TEST_MODELS[args.model_version])
+            mmdit.MMDiT(TEST_MODELS[cls.model_version])
             .to(TEST_DEV)
             .to(TEST_TORCH_DTYPE)
             .eval()
@@ -75,7 +88,7 @@ def setUpClass(cls):
 
         # Sample inputs
         # TODO(atiorh): CLI configurable model version
-        cls.test_torch_inputs = get_test_inputs(TEST_MODELS[args.model_version])
+        cls.test_torch_inputs = get_test_inputs(TEST_MODELS[cls.model_version])
 
         super().setUpClass()
 
@@ -89,13 +102,14 @@ def tearDownClass(cls):
 def get_test_inputs(cfg: mmdit.MMDiTConfig) -> Dict[str, torch.Tensor]:
     """Generate random inputs for the SD3 MMDiT model"""
     batch_size = 2  # classifier-free guidance
-    assert TEST_LATENT_SIZE < cfg.max_latent_resolution
+    assert TEST_LATENT_HEIGHT <= cfg.max_latent_resolution
+    assert TEST_LATENT_WIDTH <= cfg.max_latent_resolution
 
     latent_image_embeddings_dims = (
         batch_size,
         cfg.vae_latent_dim,
-        TEST_LATENT_SIZE,
-        TEST_LATENT_SIZE,
+        TEST_LATENT_HEIGHT,
+        TEST_LATENT_WIDTH,
     )
     pooled_text_embeddings_dims = (batch_size, cfg.pooled_text_embed_dim, 1, 1)
     token_level_text_embeddings_dims = (
@@ -118,6 +132,42 @@ def get_test_inputs(cfg: mmdit.MMDiTConfig) -> Dict[str, torch.Tensor]:
     }
 
 
+def convert_mmdit_to_mlpackage(
+    model_version: str,
+    latent_h: int,
+    latent_w: int,
+    output_dir: str = None,
+    **test_config_kwargs,
+) -> str:
+    """Converts a MMDiT model to a CoreML package.
+
+    Returns:
+        `str`: path to the converted model.
+    """
+    global TEST_SD3_CKPT_PATH, TEST_SD3_HF_REPO, TEST_LATENT_WIDTH, TEST_LATENT_HEIGHT, TEST_CACHE_DIR
+
+    # Convert to CoreML
+    TEST_SD3_HF_REPO = model_version
+    TEST_LATENT_HEIGHT = latent_h or TEST_LATENT_SIZE
+    TEST_LATENT_WIDTH = latent_w or TEST_LATENT_SIZE
+
+    setup_test_config(compile_coreml=False, **test_config_kwargs)
+
+    with argmaxtools_test_utils._get_test_cache_dir(
+        persistent_cache_dir=output_dir
+    ) as TEST_CACHE_DIR:
+        suite = unittest.TestSuite()
+        suite.addTest(TestSD3MMDiT("test_torch2coreml_correctness_and_speedup"))
+
+        if os.getenv("DEBUG", False):
+            suite.debug()
+        else:
+            runner = unittest.TextTestRunner()
+            runner.run(suite)
+
+    return os.path.join(TEST_CACHE_DIR, f"{TestSD3MMDiT.model_name}.mlpackage")
+
+
 if __name__ == "__main__":
     import argparse
 
@@ -142,6 +192,8 @@ def get_test_inputs(cfg: mmdit.MMDiTConfig) -> Dict[str, torch.Tensor]:
     TEST_LATENT_SIZE = args.latent_size
     TEST_CKPT_FILE_NAME = args.ckpt_file_name
 
+    setup_test_config()
+
     with argmaxtools_test_utils._get_test_cache_dir(args.o) as TEST_CACHE_DIR:
         suite = unittest.TestSuite()
         suite.addTest(TestSD3MMDiT("test_torch2coreml_correctness_and_speedup"))
diff --git a/python/src/diffusionkit/tests/torch2coreml/test_vae.py b/python/src/diffusionkit/tests/torch2coreml/test_vae.py
@@ -11,11 +11,10 @@
 import torch
 from argmaxtools import test_utils as argmaxtools_test_utils
 from argmaxtools.utils import get_fastest_device, get_logger
+from diffusionkit.torch import vae
+from diffusionkit.torch.model_io import _load_vae_decoder_weights
 from huggingface_hub import hf_hub_download
 
-from python.src.diffusionkit.torch import vae
-from python.src.diffusionkit.torch.model_io import _load_vae_decoder_weights
-
 torch.set_grad_enabled(False)
 logger = get_logger(__name__)
 
@@ -26,20 +25,31 @@
 TEST_TORCH_DTYPE = torch.float32
 TEST_PSNR_THR = 35
 TEST_LATENT_SIZE = 64  # 64 latent -> 512 image, 128 latent -> 1024 image
-
-# Test configuration
-# argmaxtools_test_utils.TEST_DEFAULT_NBITS = 8
-argmaxtools_test_utils.TEST_MIN_SPEEDUP_VS_CPU = 3.0
-argmaxtools_test_utils.TEST_COREML_PRECISION = ct.precision.FLOAT16
-argmaxtools_test_utils.TEST_COMPRESSION_MIN_SPEEDUP = 0.5
-argmaxtools_test_utils.TEST_COMPUTE_UNIT = ct.ComputeUnit.CPU_AND_GPU
-argmaxtools_test_utils.TEST_SKIP_SPEED_TESTS = True
-
+TEST_LATENT_HEIGHT = TEST_LATENT_SIZE
+TEST_LATENT_WIDTH = TEST_LATENT_SIZE
 
 SD3_8b = vae.VAEDecoderConfig(resolution=1024)
 SD3_2b = vae.VAEDecoderConfig(resolution=512)
 
 
+def setup_test_config(
+    min_speedup_vs_cpu=3.0,
+    compute_precision=ct.precision.FLOAT16,
+    compute_unit=ct.ComputeUnit.CPU_AND_GPU,
+    compression_min_speedup=0.5,
+    default_nbits=None,
+    skip_speed_tests=True,
+    compile_coreml=False,
+):
+    argmaxtools_test_utils.TEST_MIN_SPEEDUP_VS_CPU = min_speedup_vs_cpu
+    argmaxtools_test_utils.TEST_COREML_PRECISION = compute_precision
+    argmaxtools_test_utils.TEST_COMPUTE_UNIT = compute_unit
+    argmaxtools_test_utils.TEST_COMPRESSION_MIN_SPEEDUP = compression_min_speedup
+    argmaxtools_test_utils.TEST_DEFAULT_NBITS = default_nbits
+    argmaxtools_test_utils.TEST_SKIP_SPEED_TESTS = skip_speed_tests
+    argmaxtools_test_utils.TEST_COMPILE_COREML = compile_coreml
+
+
 class TestSD3VAEDecoder(argmaxtools_test_utils.CoreMLTestsMixin, unittest.TestCase):
     """Unit tests for stable_duffusion_3.vae.VAEDecoder module"""
 
@@ -90,13 +100,49 @@ def get_test_inputs(config: vae.VAEDecoderConfig) -> Dict[str, torch.Tensor]:
     if TEST_LATENT_SIZE != config_expected_latent_resolution:
         logger.warning(
             f"TEST_LATENT_SIZE ({TEST_LATENT_SIZE}) does not match the implied "
-            "latent resolution from the model config "
+            f"latent resolution ({config_expected_latent_resolution}) from the model config "
         )
 
-    z_dims = (1, config.in_channels, TEST_LATENT_SIZE, TEST_LATENT_SIZE)
+    z_dims = (1, config.in_channels, TEST_LATENT_HEIGHT, TEST_LATENT_WIDTH)
     return {"z": torch.randn(*z_dims).to(TEST_DEV).to(TEST_TORCH_DTYPE)}
 
 
+def convert_vae_to_mlpackage(
+    model_version: str,
+    latent_h: int,
+    latent_w: int,
+    output_dir: str = None,
+    **test_config_kwargs,
+) -> str:
+    """Converts a VAE decoder model to a CoreML package.
+
+    Returns:
+        `str`: path to the converted model.
+    """
+    global TEST_SD3_CKPT_PATH, TEST_SD3_HF_REPO, TEST_LATENT_WIDTH, TEST_LATENT_HEIGHT, TEST_CACHE_DIR
+
+    # Convert to CoreML
+    TEST_SD3_HF_REPO = model_version
+    TEST_LATENT_HEIGHT = latent_h or TEST_LATENT_SIZE
+    TEST_LATENT_WIDTH = latent_w or TEST_LATENT_SIZE
+
+    setup_test_config(compile_coreml=False, **test_config_kwargs)
+
+    with argmaxtools_test_utils._get_test_cache_dir(
+        persistent_cache_dir=output_dir
+    ) as TEST_CACHE_DIR:
+        suite = unittest.TestSuite()
+        suite.addTest(TestSD3VAEDecoder("test_torch2coreml_correctness_and_speedup"))
+
+        if os.getenv("DEBUG", False):
+            suite.debug()
+        else:
+            runner = unittest.TextTestRunner()
+            runner.run(suite)
+
+    return os.path.join(TEST_CACHE_DIR, f"{TestSD3VAEDecoder.model_name}.mlpackage")
+
+
 if __name__ == "__main__":
     import argparse
 
@@ -112,6 +158,8 @@ def get_test_inputs(config: vae.VAEDecoderConfig) -> Dict[str, torch.Tensor]:
     TEST_SD3_HF_REPO = args.sd3_ckpt_path
     TEST_LATENT_SIZE = args.latent_size
 
+    setup_test_config()
+
     with argmaxtools_test_utils._get_test_cache_dir(args.o) as TEST_CACHE_DIR:
         suite = unittest.TestSuite()
         suite.addTest(TestSD3VAEDecoder("test_torch2coreml_correctness_and_speedup"))
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
-argmaxtools
+argmaxtools>=0.1.13
 torch
 safetensors
 mlx
diff --git a/setup.py b/setup.py
@@ -1,6 +1,18 @@
+import os
+
 from setuptools import find_packages, setup
+from setuptools.command.install import install
+
+VERSION = "0.2.16"
+
+
+class VersionInstallCommand(install):
+    def run(self):
+        install.run(self)
+        version_file = os.path.join(self.install_lib, "diffusionkit", "version.py")
+        with open(version_file, "w") as f:
+            f.write(f"__version__ = '{VERSION}'\n")
 
-VERSION = "0.2.0"
 
 with open("README.md") as f:
     readme = f.read()
@@ -14,7 +26,7 @@
     long_description_content_type="text/markdown",
     author="Argmax, Inc.",
     install_requires=[
-        "argmaxtools",
+        "argmaxtools>=0.1.13",
         "torch",
         "safetensors",
         "mlx",
@@ -23,13 +35,16 @@
         "pillow",
         "sentencepiece",
     ],
-    packages=["diffusionkit"],
-    package_dir={"": "python/src", "diffusionkit": "python/src/diffusionkit"},
+    packages=find_packages(where="python/src"),
+    package_dir={"": "python/src"},
     entry_points={
         "console_scripts": [
             "diffusionkit-cli=diffusionkit.mlx.scripts.generate_images:cli",
         ],
     },
+    cmdclass={
+        "install": VersionInstallCommand,
+    },
     classifiers=[
         "Development Status :: 4 - Beta",
         "Intended Audience :: Developers",

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from .test_mmdit import convert_mmdit_to_mlpackage`
	`2`	`+from .test_vae import convert_vae_to_mlpackage`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-argmaxtools`
	`1`	`+argmaxtools>=0.1.13`
`2`	`2`	`torch`
`3`	`3`	`safetensors`
`4`	`4`	`mlx`