We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 4589b94 commit 813e0b8Copy full SHA for 813e0b8
vllm/v1/worker/tpu_worker.py
@@ -101,7 +101,10 @@ def init_device(self):
101
# fix this. It will be removed after the bug in XLA compiler is fixed.
102
os.environ["LIBTPU_INIT_ARGS"] = (
103
os.environ.get("LIBTPU_INIT_ARGS", "") +
104
- " --xla_tpu_force_1d_allreduce_at_chunk_count=1")
+ " --xla_tpu_force_1d_allreduce_at_chunk_count=1"
105
+ " --xla_jf_conv_input_fusion=False")
106
+ # --xla_jf_conv_input_fusion=False is used to improve the perf of
107
+ # quantized matmul.
108
torch.set_grad_enabled(False)
109
torch.set_default_dtype(self.model_config.dtype)
110
0 commit comments