Call prefetch of underlying module when using prefetch (#2077)

Summary: Pull Request resolved: #2077 Right now, we can't use prefetch pipeline with a model that has a table sharded with data parallel. It would just call lookup.prefetch, which is well guarded with `if hasattr(emb_op.emb_module, "prefetch"):`. Reviewed By: joshuadeng Differential Revision: D58213342 fbshipit-source-id: e1ad02b95487dd2488b23b37f1fc4e81431fb30a
pytorch · Jun 6, 2024 · 5cf175a · 5cf175a
1 parent 8393202
commit 5cf175a
Showing 1 changed file with 3 additions and 0 deletions.
diff --git a/torchrec/distributed/embedding_types.py b/torchrec/distributed/embedding_types.py
@@ -16,6 +16,7 @@
 from fbgemm_gpu.split_table_batched_embeddings_ops_training import EmbeddingLocation
 from torch import fx, nn
 from torch.nn.modules.module import _addindent
+from torch.nn.parallel import DistributedDataParallel
 from torchrec.distributed.types import (
     get_tensor_size_bytes,
     ModuleSharder,
@@ -337,6 +338,8 @@ def prefetch(
         """
 
         for feature, emb_lookup in zip(dist_input, self._lookups):
+            while isinstance(emb_lookup, DistributedDataParallel):
+                emb_lookup = emb_lookup.module
             emb_lookup.prefetch(sparse_features=feature, forward_stream=forward_stream)
 
     def extra_repr(self) -> str: