You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: perfusion_pytorch/perfusion.py
+13-3Lines changed: 13 additions & 3 deletions
Original file line number
Diff line number
Diff line change
@@ -57,14 +57,15 @@ def __init__(
57
57
key_or_values_proj: nn.Linear,
58
58
*,
59
59
num_finetune_prompts: int,
60
-
C: Tensor, # covariance of input, precomputed from 100K laion text
60
+
C: Tensor, # covariance of input, precomputed from 100K laion text
61
61
text_seq_len: int=77,
62
62
is_key_proj: bool=False,
63
63
input_decay=0.99,
64
64
train_beta=0.75,
65
65
train_temperature=0.1,
66
-
eval_beta=0.70, # in paper, specified a range (0.6 - 0.75) for local-key lock, and (0.4 -0.6) for global-key lock
67
-
eval_temperature=0.15
66
+
eval_beta=0.70, # in paper, specified a range (0.6 - 0.75) for local-key lock, and (0.4 -0.6) for global-key lock
67
+
eval_temperature=0.15,
68
+
frac_gradient_concept_embed=0.1# they use a slower learning rate for the embed - this can be achieved by a trick to reduce the gradients going backwards through an operation
68
69
):
69
70
super().__init__()
70
71
assertnotexists(key_or_values_proj.bias), 'key value projection in attention should not have bias'
@@ -81,6 +82,11 @@ def __init__(
81
82
82
83
self.text_seq_len=text_seq_len
83
84
85
+
# for the lowered learning rate on the concept embed (0.006 vs 0.03 or something)
0 commit comments