Fix inference_mode (#885)

jerryzh168 · web-flow · commit 90c8cbd4d6f6 · 2024-09-13T16:19:20.000-07:00
Summary: Fixes: #875 Test Plan: Test locally with tutorials/quantize_vit/run_vit_b_quant.py with: ``` with torch.inference_mode(): benchmark_model(model, 20, inputs) ``` but can't repro the issue in unit tests Reviewers: Subscribers: Tasks: Tags:
diff --git a/torchao/dtypes/affine_quantized_tensor.py b/torchao/dtypes/affine_quantized_tensor.py
@@ -1483,7 +1483,7 @@ def _register_aqt_quantized_linear_dispatches():
 
 _register_aqt_quantized_linear_dispatches()
 
-@implements(torch.nn.functional.linear)
+@implements([torch.nn.functional.linear, aten.linear.default])
 def _(func, types, args, kwargs):
     input_tensor, weight_tensor, bias = (
         args[0],
diff --git a/torchao/quantization/linear_activation_quantized_tensor.py b/torchao/quantization/linear_activation_quantized_tensor.py
@@ -91,7 +91,7 @@ def to(self, *args, **kwargs):
 
 implements = LinearActivationQuantizedTensor.implements
 
-@implements(torch.nn.functional.linear)
+@implements([torch.nn.functional.linear, aten.linear.default])
 def _(func, types, args, kwargs):
     input_tensor, weight_tensor, bias = (
         args[0],