Treat meta as CPU device, so we can run the meta on CPU with MetaSparseDispatchMode (#1617)

houseroad · facebook-github-bot · commit 9493c2c88708 · 2023-02-27T21:47:38.000-08:00
Summary: Pull Request resolved: #1617 When we use meta, it's not treated as CPU tensor. But the use case is that we want to process the weights on CPU first. So we set use_cpu as true when device is true in `IntNBitTableBatchedEmbeddingBagsCodegen` Reviewed By: jianyuh, nanoax Differential Revision: D43633336 fbshipit-source-id: bfb5f52c17f6ecf8bedb4f47a85aabcf35b87416
diff --git a/fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops.py b/fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops.py
@@ -1799,6 +1799,10 @@ def __init__(
         dims: List[int] = [e[2] for e in embedding_specs]
         weights_tys: List[SparseType] = [e[3] for e in embedding_specs]
         locations: List[EmbeddingLocation] = [e[4] for e in embedding_specs]
+        # if target device is meta then we set use_cpu based on the embedding location
+        # information in embedding_specs.
+        if self.current_device.type == "meta":
+            self.use_cpu = all(loc == EmbeddingLocation.HOST for loc in locations)
 
         if row_alignment is None:
             self.row_alignment: int = 1 if self.use_cpu else 16