Skip to content

Commit 64bfa30

Browse files
nipung90facebook-github-bot
authored andcommitted
Remove torch._running_with_deploy() from fbcode, Fix exception handling for torch.ops.load_libraries (pytorch#3213)
Summary: Rollback Plan: Differential Revision: D78583233
1 parent 332b8b4 commit 64bfa30

File tree

11 files changed

+198
-195
lines changed

11 files changed

+198
-195
lines changed

torchrec/distributed/comm_ops.py

Lines changed: 180 additions & 173 deletions
Large diffs are not rendered by default.

torchrec/distributed/dist_data.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@
3838
torch.ops.load_library(
3939
"//deeplearning/fbgemm/fbgemm_gpu:merge_pooled_embeddings_cpu"
4040
)
41-
except OSError:
42-
pass
41+
except (OSError, RuntimeError):
42+
from fbgemm_gpu import sparse_ops # noqa: F401, E402
4343

4444
# OSS
4545
try:

torchrec/distributed/embedding.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,8 @@
105105
try:
106106
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
107107
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")
108-
except OSError:
109-
pass
108+
except (OSError, RuntimeError):
109+
from fbgemm_gpu import sparse_ops # noqa: F401, E402
110110

111111

112112
logger: logging.Logger = logging.getLogger(__name__)

torchrec/distributed/embeddingbag.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,8 @@
113113
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
114114
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")
115115
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu/codegen:index_select_ops")
116-
except OSError:
117-
pass
116+
except (OSError, RuntimeError):
117+
from fbgemm_gpu import sparse_ops # noqa: F401, E402
118118

119119

120120
def _pin_and_move(tensor: torch.Tensor, device: torch.device) -> torch.Tensor:

torchrec/distributed/model_parallel.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@
5858
try:
5959
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
6060
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")
61-
except OSError:
62-
pass
61+
except (OSError, RuntimeError):
62+
from fbgemm_gpu import sparse_ops # noqa: F401, E402
6363

6464

6565
_DDP_STATE_DICT_PREFIX = "module."

torchrec/distributed/quant_embedding.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,8 @@
100100
try:
101101
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
102102
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")
103-
except OSError:
104-
pass
103+
except (OSError, RuntimeError):
104+
from fbgemm_gpu import sparse_ops # noqa: F401, E402
105105

106106

107107
logger: logging.Logger = logging.getLogger(__name__)

torchrec/distributed/train_pipeline/tracing.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,9 @@
1313

1414
import torch
1515

16-
if not torch._running_with_deploy():
17-
from torch.distributed._composable.fsdp.fully_shard import FSDPModule as FSDP2
18-
else:
1916

20-
class FSDP2:
21-
pass
17+
class FSDP2:
18+
pass
2219

2320

2421
from torch.distributed.fsdp import FullyShardedDataParallel as FSDP

torchrec/distributed/train_pipeline/train_pipelines.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,7 @@
8080
except ImportError:
8181
logger.warning("torchrec_use_sync_collectives is not available")
8282

83-
if not torch._running_with_deploy():
84-
torch.ops.import_module("fbgemm_gpu.sparse_ops")
83+
torch.ops.import_module("fbgemm_gpu.sparse_ops")
8584

8685

8786
# Note: doesn't make much sense but better than throwing.

torchrec/modules/itep_modules.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
torch.ops.load_library(
3030
"//deeplearning/fbgemm/fbgemm_gpu:intraining_embedding_pruning_gpu"
3131
)
32-
except OSError:
32+
except (OSError, RuntimeError):
3333
pass
3434

3535
logger: logging.Logger = logging.getLogger(__name__)

torchrec/quant/embedding_modules.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,8 @@
7979
try:
8080
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
8181
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")
82-
except OSError:
83-
pass
82+
except (OSError, RuntimeError):
83+
from fbgemm_gpu import sparse_ops # noqa: F401, E402
8484

8585
# OSS
8686
try:

0 commit comments

Comments
 (0)