add precomputed covariance for the clip used in sd 1.5, thanks to @BradVidler!

lucidrains · lucidrains · commit 31d990364686 · 2023-08-25T09:08:50.000-07:00
diff --git a/README.md b/README.md
@@ -14,6 +14,8 @@ It seems they successfully applied the Rank-1 editing technique from a <a href="
 
 - Yoad Tewel for the multiple code reviews and clarifying emails
 
+- <a href="https://github.com/BradVidler">Brad Vidler</a> for precomputing the covariance matrix for the CLIP used in Stable Diffusion 1.5!
+
 - All the maintainers at <a href="https://github.com/mlfoundations/open_clip">OpenClip</a>, for their SOTA open sourced contrastive learning text-image models
 
 ## Install
@@ -33,17 +35,13 @@ from perfusion_pytorch import Rank1EditModule
 to_keys = nn.Linear(768, 320, bias = False)
 to_values = nn.Linear(768, 320, bias = False)
 
-input_covariance = torch.randn(768, 768)
-
 wrapped_to_keys = Rank1EditModule(
     to_keys,
-    C = input_covariance,
     is_key_proj = True
 )
 
 wrapped_to_values = Rank1EditModule(
-    to_values,
-    C = input_covariance
+    to_values
 )
 
 text_enc = torch.randn(4, 77, 768)                  # regular input
@@ -76,10 +74,10 @@ values = wrapped_to_values(text_enc)
 ## Todo
 
 - [ ] wire up with SD 1.5, starting with xiao's dreambooth-sd
-- [ ] embedding wrapper should take care of substituting with super class token id and return embedding with super class
 - [ ] show example in readme for inference with multiple concepts
 - [ ] automatically infer where keys and values projection are if not specified for the `make_key_value_proj_rank1_edit_modules_` function
 
+- [x] embedding wrapper should take care of substituting with super class token id and return embedding with super class
 - [x] review multiple concepts - thanks to Yoad
 - [x] offer a function that wires up the cross attention
 - [x] handle multiple concepts in one prompt at inference - summation of the sigmoid term + outputs
diff --git a/perfusion_pytorch/data/covariance_CLIP_ViT-L-14.pt b/perfusion_pytorch/data/covariance_CLIP_ViT-L-14.pt
diff --git a/perfusion_pytorch/perfusion.py b/perfusion_pytorch/perfusion.py
@@ -1,5 +1,6 @@
 from math import ceil
 from copy import deepcopy
+from pathlib import Path
 
 from beartype import beartype
 from beartype.typing import Union, List, Optional, Tuple
@@ -15,6 +16,24 @@
 
 from perfusion_pytorch.open_clip import OpenClipAdapter
 
+# constants
+
+IndicesTensor = Union[LongTensor, IntTensor]
+
+# precomputed covariance paths
+# will add for more models going forward, if the paper checks out
+
+CURRENT_DIR = Path(__file__).parents[0]
+DATA_DIR = CURRENT_DIR / 'data'
+
+assert DATA_DIR.is_dir()
+
+COVARIANCE_FILENAME_BY_TEXT_IMAGE_MODEL = dict(
+    SD15 = DATA_DIR / 'covariance_CLIP_VIT-L-14.pt'
+)
+
+assert all([filepath.exists() for filepath in COVARIANCE_FILENAME_BY_TEXT_IMAGE_MODEL.values()])
+
 # helpers
 
 def exists(val):
@@ -23,8 +42,6 @@ def exists(val):
 def is_all_unique(arr):
     return len(set(arr)) == len(arr)
 
-IndicesTensor = Union[LongTensor, IntTensor]
-
 # function for calculating C - input covariance
 
 @beartype
@@ -35,8 +52,6 @@ def calculate_input_covariance(
     batch_size = 32,
     **cov_kwargs
 ):
-    embeds, mask = clip.embed_texts(texts)
-
     num_batches = ceil(len(texts) / batch_size)
 
     all_embeds = []
@@ -126,7 +141,8 @@ def __init__(
         key_or_values_proj: nn.Linear,
         *,
         num_concepts: int = 1,
-        C: Tensor,                           # covariance of input, precomputed from 100K laion text
+        C: Optional[Tensor] = None,          # covariance of input, precomputed from 100K laion text
+        default_model = 'SD15',
         text_seq_len: int = 77,
         is_key_proj: bool = False,
         input_decay = 0.99,
@@ -172,7 +188,18 @@ def __init__(
 
         self.concept_outputs = nn.Parameter(torch.zeros(num_concepts, dim_output), requires_grad = not is_key_proj)
 
-        # C in the paper, inverse precomputed
+        # input covariance C in the paper, inverse precomputed
+        # if covariance was not passed in, then use default for SD1.5, precomputed by @BradVidler
+
+        if not exists(C):
+            covariance_filepath = COVARIANCE_FILENAME_BY_TEXT_IMAGE_MODEL.get(default_model, None)
+
+            assert exists(covariance_filepath), f'{default_model} not found in the list of precomputed covariances {tuple(COVARIANCE_FILENAME_BY_TEXT_IMAGE_MODEL.keys())}'
+
+            C = torch.load(str(covariance_filepath))
+            print(f'precomputed covariance loaded from {str(covariance_filepath)}')
+
+        # calculate C_inv
 
         C_inv = torch.inverse(C)
         self.register_buffer('C_inv', C_inv)
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'perfusion-pytorch',
   packages = find_packages(exclude=[]),
-  version = '0.1.4',
+  version = '0.1.6',
   license='MIT',
   description = 'Perfusion - Pytorch',
   author = 'Phil Wang',
@@ -23,6 +23,7 @@
     'opt-einsum',
     'torch>=2.0'
   ],
+  include_package_data = True,
   classifiers=[
     'Development Status :: 4 - Beta',
     'Intended Audience :: Developers',