Remove FP8 Patch (#1585)

kylesayrs · web-flow · commit cb8f41016375 · 2025-06-25T15:42:07.000Z
## Purpose ##
* Remove dead code

## Background ##
The `new_dtype_byte_size` patch was added due to lack of support for FP8
in transformer's `dtype_byte_size` function. This fix was merged into
transformers main over a year ago, this patch should be safe to remove
now.

## Changes ##
* Remove `new_dtype_byte_size` patch

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/llmcompressor/transformers/sparsification/compressed_tensors_utils.py b/src/llmcompressor/transformers/sparsification/compressed_tensors_utils.py
@@ -1,11 +1,9 @@
 import os
-import re
 import weakref
 from functools import wraps
 from typing import Optional
 
 import torch
-import transformers
 from accelerate.accelerator import get_state_dict_offloaded_model
 from compressed_tensors import (
     CompressionFormat,
@@ -86,11 +84,6 @@ def save_pretrained_wrapper(
             :param kwargs: additional kwargs to pass on to model.save_pretrained
             """
 
-            # HACK: Override the dtype_byte_size function in transformers to
-            # support float8 types. Fix is posted upstream
-            # https://github.com/huggingface/transformers/pull/30488
-            transformers.modeling_utils.dtype_byte_size = new_dtype_byte_size
-
             # compress model using compressor
             compressor = get_model_compressor(
                 model=model,
@@ -128,18 +121,6 @@ def save_pretrained_wrapper(
         model.save_pretrained = save_pretrained_compressed(model.save_pretrained)
 
 
-# HACK: Override the dtype_byte_size function in transformers to support float8 types
-# Fix is posted upstream https://github.com/huggingface/transformers/pull/30488
-def new_dtype_byte_size(dtype):
-    if dtype == torch.bool:
-        return 1 / 8
-    bit_search = re.search(r"[^\d](\d+)_?", str(dtype))
-    if bit_search is None:
-        raise ValueError(f"`dtype` is not a valid dtype: {dtype}.")
-    bit_size = int(bit_search.groups()[0])
-    return bit_size // 8
-
-
 def patch_tied_tensors_bug(model: torch.nn.Module):
     """
     Patches bug where HF transformers will fail to untie weights under specific