diff --git a/.gitignore b/.gitignore index a602abea..6a9a394d 100644 --- a/.gitignore +++ b/.gitignore @@ -38,6 +38,7 @@ parts/ pyvenv.cfg var/ local.cfg +.python-version # mxdev /instance/ diff --git a/news/181.bugfix b/news/181.bugfix new file mode 100644 index 00000000..2d1ea98e --- /dev/null +++ b/news/181.bugfix @@ -0,0 +1 @@ +Support for extracting metadata from WebP images without loading the entire image into memory, as already done for other formats. [mamico] diff --git a/plone/namedfile/tests/900.webp b/plone/namedfile/tests/900.webp new file mode 100644 index 00000000..c2c4bfdb Binary files /dev/null and b/plone/namedfile/tests/900.webp differ diff --git a/plone/namedfile/tests/image.bmp b/plone/namedfile/tests/image.bmp new file mode 100644 index 00000000..00494d53 Binary files /dev/null and b/plone/namedfile/tests/image.bmp differ diff --git a/plone/namedfile/tests/image_loseless.webp b/plone/namedfile/tests/image_loseless.webp new file mode 100644 index 00000000..db5fcb16 Binary files /dev/null and b/plone/namedfile/tests/image_loseless.webp differ diff --git a/plone/namedfile/tests/image_lossy.webp b/plone/namedfile/tests/image_lossy.webp new file mode 100644 index 00000000..ef78d2e6 Binary files /dev/null and b/plone/namedfile/tests/image_lossy.webp differ diff --git a/plone/namedfile/tests/test_storable.py b/plone/namedfile/tests/test_storable.py index bf4dc35b..f1630836 100644 --- a/plone/namedfile/tests/test_storable.py +++ b/plone/namedfile/tests/test_storable.py @@ -25,7 +25,10 @@ from ZODB.blob import Blob from ZODB.blob import BlobFile +import io import os +import piexif +import PIL import tempfile import unittest @@ -128,3 +131,80 @@ def count_reads(self, size=-1): self.assertEqual( read_bytes, 0, "Validation is reading the whole blob in memory" ) + + def test_large_webp_storable(self): + # ensure we don't read the whole file into memory + + old_open = Blob.open + blob_read = 0 + blob_write = 0 + old_read = BlobFile.read + read_bytes = 0 + + def count_open(self, mode="r"): + nonlocal blob_read, blob_write + blob_read += 1 if "r" in mode else 0 + blob_write += 1 if "w" in mode else 0 + return old_open(self, mode) + + def count_reads(self, size=-1): + nonlocal read_bytes + res = old_read(self, size) + read_bytes += len(res) + return res + + with patch.object(Blob, "open", count_open), patch.object( + BlobFile, "read", count_reads + ): + fi = NamedBlobImage(getFile("900.webp"), filename="900.webp") + self.assertEqual((900, 900), fi.getImageSize()) + self.assertLess( + read_bytes, + fi.getSize(), + "Images should not need to read all data to get exif, dimensions", + ) + self.assertEqual( + blob_read, 3, "blob opening for getsize, get_exif and getImageInfo only" + ) + + def test_rotate(self): + # Create a 200x200 white image + img = PIL.Image.new("RGB", (100, 400), "white") + # Set the top-left pixels to black + img.putpixel((0, 0), (0, 0, 0)) + + # 270 degree rotation + # Create EXIF dict + exif_dict = {"0th": {}, "Exif": {}, "GPS": {}, "1st": {}, "thumbnail": None} + exif_dict["0th"][piexif.ImageIFD.Orientation] = 6 + exif_bytes = piexif.dump(exif_dict) + # Save image as JPEG with EXIF data + out = io.BytesIO() + img.save(out, format="JPEG", exif=exif_bytes) + fi_jpg = NamedBlobImage(out.getvalue(), filename="image.jpg") + img_jpg = PIL.Image.open(io.BytesIO(fi_jpg.data)) + self.assertEqual(fi_jpg.getImageSize(), (400, 100)) + self.assertEqual(img_jpg.getpixel((0, 0)), (255, 255, 255)) + # The pixel is not exactly black (RGB 0,0,0) due to quantization errors and + # compression artifacts introduced by the JPEG encoding process. + self.assertEqual(img_jpg.getpixel((399, 0)), (10, 10, 10)) + self.assertEqual(img_jpg.getpixel((0, 99)), (255, 255, 255)) + self.assertEqual(img_jpg.getpixel((399, 99)), (255, 255, 255)) + + # flip left to right + # Create EXIF dict + exif_dict = {"0th": {}, "Exif": {}, "GPS": {}, "1st": {}, "thumbnail": None} + exif_dict["0th"][piexif.ImageIFD.Orientation] = 2 + exif_bytes = piexif.dump(exif_dict) + # Save image as JPEG with EXIF data + out = io.BytesIO() + img.save(out, format="JPEG", exif=exif_bytes) + fi_jpg = NamedBlobImage(out.getvalue(), filename="image.jpg") + img_jpg = PIL.Image.open(io.BytesIO(fi_jpg.data)) + self.assertEqual(fi_jpg.getImageSize(), (100, 400)) + self.assertEqual(img_jpg.getpixel((0, 0)), (255, 255, 255)) + # The pixel is not exactly black (RGB 0,0,0) due to quantization errors and + # compression artifacts introduced by the JPEG encoding process. + self.assertEqual(img_jpg.getpixel((99, 0)), (8, 8, 8)) + self.assertEqual(img_jpg.getpixel((0, 399)), (255, 255, 255)) + self.assertEqual(img_jpg.getpixel((99, 399)), (255, 255, 255)) diff --git a/plone/namedfile/tests/test_utils.py b/plone/namedfile/tests/test_utils.py index 4ef9bff7..b74f4128 100644 --- a/plone/namedfile/tests/test_utils.py +++ b/plone/namedfile/tests/test_utils.py @@ -1,6 +1,7 @@ from plone.namedfile.file import NamedImage from plone.namedfile.tests import getFile from plone.namedfile.utils import get_contenttype +from plone.namedfile.utils import getImageInfo import unittest @@ -62,3 +63,29 @@ def test_get_contenttype(self): get_contenttype(filename="nothing.plonenamedfile"), "application/octet-stream", ) + + def test_get_image_info(self): + + # WEBP WP8 + self.assertEqual( + getImageInfo(getFile("image_lossy.webp")), + ("image/webp", 500, 200), + ) + + # WEBP WP8L + self.assertEqual( + getImageInfo(getFile("image_loseless.webp")), + ("image/webp", 200, 200), + ) + + # PNG + self.assertEqual( + getImageInfo(getFile("image.png")), + ("image/png", 200, 200), + ) + + # BMP3 + self.assertEqual( + getImageInfo(getFile("image.bmp")), + ("image/x-ms-bmp", 200, 200), + ) diff --git a/plone/namedfile/utils/__init__.py b/plone/namedfile/utils/__init__.py index 54eb0267..b240ed71 100644 --- a/plone/namedfile/utils/__init__.py +++ b/plone/namedfile/utils/__init__.py @@ -203,11 +203,37 @@ def getImageInfo(data): content_type = "image/x-ms-bmp" width, height = struct.unpack(" 30 and data[:4] == b"RIFF" and data[8:12] == b"WEBP": + # handle WEBPs + content_type = "image/webp" + chunk_type = data[12:16] + if chunk_type == b"VP8 ": + # WebP lossy (VP8): width and height are at offset 26–30 (2 bytes each, little endian) + width = int.from_bytes(data[26:28], "little") & 0x3FFF + height = int.from_bytes(data[28:30], "little") & 0x3FFF + elif chunk_type == b"VP8L": + # WebP lossless (VP8L): dimensions are encoded in 4 bytes at offset 21–25 + b = data[21:25] + b0, b1, b2, b3 = b + width = ((b1 & 0x3F) << 8 | b0) + 1 + height = ((b3 & 0xF) << 10 | b2 << 2 | (b1 >> 6)) + 1 + elif chunk_type == b"VP8X": + # Extended WebP (VP8X) + # Width: bytes 24-26 (little endian, minus 1) + # Height: bytes 27-29 (little endian, minus 1) + width_bytes = data[24:27] + height_bytes = data[27:30] + width = int.from_bytes(width_bytes, "little") + 1 + height = int.from_bytes(height_bytes, "little") + 1 + else: + # Not supported: could be animation, etc. + pass + elif size and b"http://www.w3.org/2000/svg" in data: # handle SVGs content_type, width, height = process_svg(data) - else: + if (width, height) == (-1, -1): # Use PIL / Pillow to determ Image Information try: img = PIL.Image.open(BytesIO(data))