Skip to content

Commit 31d9903

Browse files
committed
add precomputed covariance for the clip used in sd 1.5, thanks to @BradVidler!
1 parent eccb221 commit 31d9903

File tree

4 files changed

+39
-13
lines changed

4 files changed

+39
-13
lines changed

README.md

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ It seems they successfully applied the Rank-1 editing technique from a <a href="
1414

1515
- Yoad Tewel for the multiple code reviews and clarifying emails
1616

17+
- <a href="https://github.com/BradVidler">Brad Vidler</a> for precomputing the covariance matrix for the CLIP used in Stable Diffusion 1.5!
18+
1719
- All the maintainers at <a href="https://github.com/mlfoundations/open_clip">OpenClip</a>, for their SOTA open sourced contrastive learning text-image models
1820

1921
## Install
@@ -33,17 +35,13 @@ from perfusion_pytorch import Rank1EditModule
3335
to_keys = nn.Linear(768, 320, bias = False)
3436
to_values = nn.Linear(768, 320, bias = False)
3537

36-
input_covariance = torch.randn(768, 768)
37-
3838
wrapped_to_keys = Rank1EditModule(
3939
to_keys,
40-
C = input_covariance,
4140
is_key_proj = True
4241
)
4342

4443
wrapped_to_values = Rank1EditModule(
45-
to_values,
46-
C = input_covariance
44+
to_values
4745
)
4846

4947
text_enc = torch.randn(4, 77, 768) # regular input
@@ -76,10 +74,10 @@ values = wrapped_to_values(text_enc)
7674
## Todo
7775

7876
- [ ] wire up with SD 1.5, starting with xiao's dreambooth-sd
79-
- [ ] embedding wrapper should take care of substituting with super class token id and return embedding with super class
8077
- [ ] show example in readme for inference with multiple concepts
8178
- [ ] automatically infer where keys and values projection are if not specified for the `make_key_value_proj_rank1_edit_modules_` function
8279

80+
- [x] embedding wrapper should take care of substituting with super class token id and return embedding with super class
8381
- [x] review multiple concepts - thanks to Yoad
8482
- [x] offer a function that wires up the cross attention
8583
- [x] handle multiple concepts in one prompt at inference - summation of the sigmoid term + outputs
2.25 MB
Binary file not shown.

perfusion_pytorch/perfusion.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from math import ceil
22
from copy import deepcopy
3+
from pathlib import Path
34

45
from beartype import beartype
56
from beartype.typing import Union, List, Optional, Tuple
@@ -15,6 +16,24 @@
1516

1617
from perfusion_pytorch.open_clip import OpenClipAdapter
1718

19+
# constants
20+
21+
IndicesTensor = Union[LongTensor, IntTensor]
22+
23+
# precomputed covariance paths
24+
# will add for more models going forward, if the paper checks out
25+
26+
CURRENT_DIR = Path(__file__).parents[0]
27+
DATA_DIR = CURRENT_DIR / 'data'
28+
29+
assert DATA_DIR.is_dir()
30+
31+
COVARIANCE_FILENAME_BY_TEXT_IMAGE_MODEL = dict(
32+
SD15 = DATA_DIR / 'covariance_CLIP_VIT-L-14.pt'
33+
)
34+
35+
assert all([filepath.exists() for filepath in COVARIANCE_FILENAME_BY_TEXT_IMAGE_MODEL.values()])
36+
1837
# helpers
1938

2039
def exists(val):
@@ -23,8 +42,6 @@ def exists(val):
2342
def is_all_unique(arr):
2443
return len(set(arr)) == len(arr)
2544

26-
IndicesTensor = Union[LongTensor, IntTensor]
27-
2845
# function for calculating C - input covariance
2946

3047
@beartype
@@ -35,8 +52,6 @@ def calculate_input_covariance(
3552
batch_size = 32,
3653
**cov_kwargs
3754
):
38-
embeds, mask = clip.embed_texts(texts)
39-
4055
num_batches = ceil(len(texts) / batch_size)
4156

4257
all_embeds = []
@@ -126,7 +141,8 @@ def __init__(
126141
key_or_values_proj: nn.Linear,
127142
*,
128143
num_concepts: int = 1,
129-
C: Tensor, # covariance of input, precomputed from 100K laion text
144+
C: Optional[Tensor] = None, # covariance of input, precomputed from 100K laion text
145+
default_model = 'SD15',
130146
text_seq_len: int = 77,
131147
is_key_proj: bool = False,
132148
input_decay = 0.99,
@@ -172,7 +188,18 @@ def __init__(
172188

173189
self.concept_outputs = nn.Parameter(torch.zeros(num_concepts, dim_output), requires_grad = not is_key_proj)
174190

175-
# C in the paper, inverse precomputed
191+
# input covariance C in the paper, inverse precomputed
192+
# if covariance was not passed in, then use default for SD1.5, precomputed by @BradVidler
193+
194+
if not exists(C):
195+
covariance_filepath = COVARIANCE_FILENAME_BY_TEXT_IMAGE_MODEL.get(default_model, None)
196+
197+
assert exists(covariance_filepath), f'{default_model} not found in the list of precomputed covariances {tuple(COVARIANCE_FILENAME_BY_TEXT_IMAGE_MODEL.keys())}'
198+
199+
C = torch.load(str(covariance_filepath))
200+
print(f'precomputed covariance loaded from {str(covariance_filepath)}')
201+
202+
# calculate C_inv
176203

177204
C_inv = torch.inverse(C)
178205
self.register_buffer('C_inv', C_inv)

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
setup(
44
name = 'perfusion-pytorch',
55
packages = find_packages(exclude=[]),
6-
version = '0.1.4',
6+
version = '0.1.6',
77
license='MIT',
88
description = 'Perfusion - Pytorch',
99
author = 'Phil Wang',
@@ -23,6 +23,7 @@
2323
'opt-einsum',
2424
'torch>=2.0'
2525
],
26+
include_package_data = True,
2627
classifiers=[
2728
'Development Status :: 4 - Beta',
2829
'Intended Audience :: Developers',

0 commit comments

Comments
 (0)