Fact. embedding: improve interface

JeanKossaifi · JeanKossaifi · commit 5ffe91e9e5ca · 2022-12-30T19:22:33.000+01:00
diff --git a/tltorch/factorized_layers/factorized_embedding.py b/tltorch/factorized_layers/factorized_embedding.py
@@ -18,9 +18,9 @@ class FactorizedEmbedding(nn.Module):
         number of entries in the lookup table
     embedding_dim : int
         number of dimensions per entry
-    auto_reshape : bool
+    auto_tensorize : bool
         whether to use automatic reshaping for the embedding dimensions
-    d : int or int tuple
+    n_tensorized_modes : int or int tuple
         number of reshape dimensions for both embedding table dimension
     tensorized_num_embeddings : int tuple
         tensorized shape of the first embedding table dimension
@@ -34,8 +34,8 @@ class FactorizedEmbedding(nn.Module):
     def __init__(self,
                  num_embeddings,
                  embedding_dim,
-                 auto_reshape=True,
-                 d=3,
+                 auto_tensorize=True,
+                 n_tensorized_modes=3,
                  tensorized_num_embeddings=None,
                  tensorized_embedding_dim=None,
                  factorization='blocktt',
@@ -45,14 +45,14 @@ def __init__(self,
                  dtype=None):
         super().__init__()
 
-        if auto_reshape:
+        if auto_tensorize:
 
             if tensorized_num_embeddings is not None and tensorized_embedding_dim is not None:
                 raise ValueError(
-                    "Either use auto_reshape or specify tensorized_num_embeddings and tensorized_embedding_dim."
+                    "Either use auto_tensorize or specify tensorized_num_embeddings and tensorized_embedding_dim."
                 )
 
-            tensorized_num_embeddings, tensorized_embedding_dim = get_tensorized_shape(in_features=num_embeddings, out_features=embedding_dim, order=d, min_dim=2, verbose=False)
+            tensorized_num_embeddings, tensorized_embedding_dim = get_tensorized_shape(in_features=num_embeddings, out_features=embedding_dim, order=n_tensorized_modes, min_dim=2, verbose=False)
 
         else:
             #check that dimensions match factorization
@@ -121,8 +121,9 @@ def from_embedding(cls,
                        embedding_layer,
                        rank=8,
                        factorization='blocktt',
+                       n_tensorized_modes=2,
                        decompose_weights=True,
-                       auto_reshape=True,
+                       auto_tensorize=True,
                        decomposition_kwargs=dict(),
                        **kwargs):
         """
@@ -137,7 +138,7 @@ def from_embedding(cls,
             tensor type
         decompose_weights: bool
             whether to decompose weights and use for initialization
-        auto_reshape: bool
+        auto_tensorize: bool
             if True, automatically reshape dimensions for TensorizedTensor
         decomposition_kwargs: dict
             specify kwargs for the decomposition
@@ -146,8 +147,9 @@ def from_embedding(cls,
 
         instance = cls(num_embeddings,
                        embedding_dim,
-                       auto_reshape=auto_reshape,
+                       auto_tensorize=auto_tensorize,
                        factorization=factorization,
+                       n_tensorized_modes=n_tensorized_modes,
                        rank=rank,
                        **kwargs)
 
@@ -166,8 +168,9 @@ def from_embedding_list(cls,
                        embedding_layer_list,
                        rank=8,
                        factorization='blocktt',
+                       n_tensorized_modes=2,
                        decompose_weights=True,
-                       auto_reshape=True,
+                       auto_tensorize=True,
                        decomposition_kwargs=dict(),
                        **kwargs):
         """
@@ -182,7 +185,7 @@ def from_embedding_list(cls,
             tensor decomposition to use
         decompose_weights: bool
             decompose weights and use for initialization
-        auto_reshape: bool
+        auto_tensorize: bool
             automatically reshape dimensions for TensorizedTensor
         decomposition_kwargs: dict
             specify kwargs for the decomposition
@@ -207,7 +210,8 @@ def from_embedding_list(cls,
 
         instance = cls(num_embeddings,
                        embedding_dim,
-                       auto_reshape=auto_reshape,
+                       n_tensorized_modes=n_tensorized_modes,
+                       auto_tensorize=auto_tensorize,
                        factorization=factorization,
                        rank=rank,
                        n_layers=n_layers,
diff --git a/tltorch/factorized_layers/tests/test_factorized_embedding.py b/tltorch/factorized_layers/tests/test_factorized_embedding.py
@@ -11,17 +11,13 @@
 @pytest.mark.parametrize('factorization', ['CP','Tucker', 'BlockTT'])
 @pytest.mark.parametrize('dims', [(256,16), (1000,32)])
 def test_FactorizedEmbedding(factorization,dims):
-
-    
-    
-    NUM_EMBEDDINGS,EMBEDDING_DIM=dims
-    BATCH_SIZE = 3
+    NUM_EMBEDDINGS, EMBEDDING_DIM = dims
 
     #create factorized embedding
-    factorized_embedding = FactorizedEmbedding(NUM_EMBEDDINGS,EMBEDDING_DIM,factorization=factorization)
+    factorized_embedding = FactorizedEmbedding(NUM_EMBEDDINGS, EMBEDDING_DIM, factorization=factorization)
 
     #make test embedding of same shape and same weight
-    test_embedding = torch.nn.Embedding(factorized_embedding.weight.shape[0],factorized_embedding.weight.shape[1]) 
+    test_embedding = torch.nn.Embedding(factorized_embedding.weight.shape[0], factorized_embedding.weight.shape[1]) 
     test_embedding.weight.data.copy_(factorized_embedding.weight.to_matrix().detach())
     
     #create batch and test using all entries (shuffled since entries may not be sorted)