Fix quantize_bench in OSS (#4463)

cthi · facebook-github-bot · commit 5d24e24ca983 · 2025-07-09T14:10:24.000-07:00
Summary: Pull Request resolved: #4463 X-link: facebookresearch/FBGEMM#1522 Currently quantize_bench is broken in OSS, this was because we slightly changed how the python files are installed in the cmake build. Right now they will be installed to `fbgemm_gpu/experimental/bench`, which would break the import on OSS as the directory structure doesn't match. I think simplest thing is just to install it into `fbgemm_gpu/experimental/gen_ai/bench`, and then add back the bench part to fbcode. Reviewed By: jiawenliu64 Differential Revision: D78025117 fbshipit-source-id: e9fe5c83b5bdf3cc1e461d28bc8e5293833a8a6c
diff --git a/fbgemm_gpu/experimental/gen_ai/CMakeLists.txt b/fbgemm_gpu/experimental/gen_ai/CMakeLists.txt
@@ -174,7 +174,7 @@ add_to_package(
 
 install(
   DIRECTORY bench
-  DESTINATION fbgemm_gpu/experimental)
+  DESTINATION fbgemm_gpu/experimental/gen_ai)
 
 install(
   DIRECTORY gen_ai
diff --git a/fbgemm_gpu/experimental/gen_ai/bench/quantize_bench.py b/fbgemm_gpu/experimental/gen_ai/bench/quantize_bench.py
@@ -29,7 +29,10 @@ def __init__(self, *args, **kwargs):
             super().__init__()
 
 
-from fbgemm_gpu.experimental.gen_ai.quantize_ops import get_quantize_ops, QuantizeOpBase
+from fbgemm_gpu.experimental.gen_ai.bench.quantize_ops import (
+    get_quantize_ops,
+    QuantizeOpBase,
+)
 
 
 def generate_group_tensor(G, M):
diff --git a/fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py b/fbgemm_gpu/experimental/gen_ai/bench/quantize_ops.py
@@ -41,8 +41,12 @@
     quantize_int4_preshuffle,
 )
 
-from gen_ai.llm_inference.fb.llm.kernel.rms_norm import rms_norm
-from gen_ai.llm_inference.fb.llm.kernel.silu_mul import silu_mul
+try:
+    from gen_ai.llm_inference.fb.llm.kernel.rms_norm import rms_norm
+    from gen_ai.llm_inference.fb.llm.kernel.silu_mul import silu_mul
+except ImportError:
+    # Above is used for some experiments, but the quantize is not relying on them. Okay to just skip.
+    pass
 
 try:
     from tinygemm.utils import group_quantize_tensor