Skip to content

gh-132983: Introduce compression package and move _compression module #133018

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 27, 2025
8 changes: 4 additions & 4 deletions Lib/bz2.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
__author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>"

from builtins import open as _builtin_open
from compression._common import _streams
import io
import os
import _compression

from _bz2 import BZ2Compressor, BZ2Decompressor

Expand All @@ -23,7 +23,7 @@
_MODE_WRITE = 3


class BZ2File(_compression.BaseStream):
class BZ2File(_streams.BaseStream):

"""A file object providing transparent bzip2 (de)compression.

Expand Down Expand Up @@ -88,7 +88,7 @@ def __init__(self, filename, mode="r", *, compresslevel=9):
raise TypeError("filename must be a str, bytes, file or PathLike object")

if self._mode == _MODE_READ:
raw = _compression.DecompressReader(self._fp,
raw = _streams.DecompressReader(self._fp,
BZ2Decompressor, trailing_error=OSError)
self._buffer = io.BufferedReader(raw)
else:
Expand Down Expand Up @@ -248,7 +248,7 @@ def writelines(self, seq):

Line separators are not added between the written byte strings.
"""
return _compression.BaseStream.writelines(self, seq)
return _streams.BaseStream.writelines(self, seq)

def seek(self, offset, whence=io.SEEK_SET):
"""Change the file position.
Expand Down
Empty file added Lib/compression/__init__.py
Empty file.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Internal classes used by the gzip, lzma and bz2 modules"""
"""Internal classes used by compression modules"""

import io
import sys
Expand Down
5 changes: 5 additions & 0 deletions Lib/compression/bz2/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import bz2
__doc__ = bz2.__doc__
del bz2

from bz2 import *
5 changes: 5 additions & 0 deletions Lib/compression/gzip/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import gzip
__doc__ = gzip.__doc__
del gzip

from gzip import *
5 changes: 5 additions & 0 deletions Lib/compression/lzma/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import lzma
__doc__ = lzma.__doc__
del lzma

from lzma import *
5 changes: 5 additions & 0 deletions Lib/compression/zlib/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import zlib
__doc__ = zlib.__doc__
del zlib

from zlib import *
6 changes: 3 additions & 3 deletions Lib/gzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

# based on Andrew Kuchling's minigzip.py distributed with the zlib module

import _compression
import builtins
import io
import os
Expand All @@ -14,6 +13,7 @@
import time
import weakref
import zlib
from compression._common import _streams

__all__ = ["BadGzipFile", "GzipFile", "open", "compress", "decompress"]

Expand Down Expand Up @@ -144,7 +144,7 @@ def writable(self):
return True


class GzipFile(_compression.BaseStream):
class GzipFile(_streams.BaseStream):
"""The GzipFile class simulates most of the methods of a file object with
the exception of the truncate() method.

Expand Down Expand Up @@ -523,7 +523,7 @@ def _read_gzip_header(fp):
return last_mtime


class _GzipReader(_compression.DecompressReader):
class _GzipReader(_streams.DecompressReader):
def __init__(self, fp):
super().__init__(_PaddedFile(fp), zlib._ZlibDecompressor,
wbits=-zlib.MAX_WBITS)
Expand Down
6 changes: 3 additions & 3 deletions Lib/lzma.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
import builtins
import io
import os
from compression._common import _streams
from _lzma import *
from _lzma import _encode_filter_properties, _decode_filter_properties # noqa: F401
import _compression


# Value 0 no longer used
Expand All @@ -35,7 +35,7 @@
_MODE_WRITE = 3


class LZMAFile(_compression.BaseStream):
class LZMAFile(_streams.BaseStream):

"""A file object providing transparent LZMA (de)compression.

Expand Down Expand Up @@ -127,7 +127,7 @@ def __init__(self, filename=None, mode="r", *,
raise TypeError("filename must be a str, bytes, file or PathLike object")

if self._mode == _MODE_READ:
raw = _compression.DecompressReader(self._fp, LZMADecompressor,
raw = _streams.DecompressReader(self._fp, LZMADecompressor,
trailing_error=LZMAError, format=format, filters=filters)
self._buffer = io.BufferedReader(raw)

Expand Down
10 changes: 5 additions & 5 deletions Lib/test/test_bz2.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from test.support import import_helper
from test.support import threading_helper
from test.support.os_helper import unlink, FakePath
import _compression
from compression._common import _streams
import sys


Expand Down Expand Up @@ -126,15 +126,15 @@ def testReadMultiStream(self):
def testReadMonkeyMultiStream(self):
# Test BZ2File.read() on a multi-stream archive where a stream
# boundary coincides with the end of the raw read buffer.
buffer_size = _compression.BUFFER_SIZE
_compression.BUFFER_SIZE = len(self.DATA)
buffer_size = _streams.BUFFER_SIZE
_streams.BUFFER_SIZE = len(self.DATA)
try:
self.createTempFile(streams=5)
with BZ2File(self.filename) as bz2f:
self.assertRaises(TypeError, bz2f.read, float())
self.assertEqual(bz2f.read(), self.TEXT * 5)
finally:
_compression.BUFFER_SIZE = buffer_size
_streams.BUFFER_SIZE = buffer_size

def testReadTrailingJunk(self):
self.createTempFile(suffix=self.BAD_DATA)
Expand Down Expand Up @@ -742,7 +742,7 @@ def testOpenPathLikeFilename(self):
def testDecompressLimited(self):
"""Decompressed data buffering should be limited"""
bomb = bz2.compress(b'\0' * int(2e6), compresslevel=9)
self.assertLess(len(bomb), _compression.BUFFER_SIZE)
self.assertLess(len(bomb), _streams.BUFFER_SIZE)

decomp = BZ2File(BytesIO(bomb))
self.assertEqual(decomp.read(1), b'\0')
Expand Down
10 changes: 5 additions & 5 deletions Lib/test/test_lzma.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import _compression
import array
from io import BytesIO, UnsupportedOperation, DEFAULT_BUFFER_SIZE
import os
Expand All @@ -7,6 +6,7 @@
import sys
from test import support
import unittest
from compression._common import _streams

from test.support import _4G, bigmemtest
from test.support.import_helper import import_module
Expand Down Expand Up @@ -861,13 +861,13 @@ def test_read_multistream(self):
def test_read_multistream_buffer_size_aligned(self):
# Test the case where a stream boundary coincides with the end
# of the raw read buffer.
saved_buffer_size = _compression.BUFFER_SIZE
_compression.BUFFER_SIZE = len(COMPRESSED_XZ)
saved_buffer_size = _streams.BUFFER_SIZE
_streams.BUFFER_SIZE = len(COMPRESSED_XZ)
try:
with LZMAFile(BytesIO(COMPRESSED_XZ * 5)) as f:
self.assertEqual(f.read(), INPUT * 5)
finally:
_compression.BUFFER_SIZE = saved_buffer_size
_streams.BUFFER_SIZE = saved_buffer_size

def test_read_trailing_junk(self):
with LZMAFile(BytesIO(COMPRESSED_XZ + COMPRESSED_BOGUS)) as f:
Expand Down Expand Up @@ -1066,7 +1066,7 @@ def test_readlines(self):
def test_decompress_limited(self):
"""Decompressed data buffering should be limited"""
bomb = lzma.compress(b'\0' * int(2e6), preset=6)
self.assertLess(len(bomb), _compression.BUFFER_SIZE)
self.assertLess(len(bomb), _streams.BUFFER_SIZE)

decomp = LZMAFile(BytesIO(bomb))
self.assertEqual(decomp.read(1), b'\0')
Expand Down
2 changes: 1 addition & 1 deletion Python/stdlib_module_names.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading