plone · davisagli · May 11, 2025 · Apr 26, 2025 · Apr 27, 2025 · Apr 27, 2025
diff --git a/.gitignore b/.gitignore
@@ -38,6 +38,7 @@ parts/
 pyvenv.cfg
 var/
 local.cfg
+.python-version
 
 # mxdev
 /instance/

diff --git a/news/181.bugfix b/news/181.bugfix
@@ -0,0 +1 @@
+Support for extracting metadata from WebP images without loading the entire image into memory, as already done for other formats. [mamico]
diff --git a/plone/namedfile/tests/900.webp b/plone/namedfile/tests/900.webp
diff --git a/plone/namedfile/tests/image.bmp b/plone/namedfile/tests/image.bmp
diff --git a/plone/namedfile/tests/image_loseless.webp b/plone/namedfile/tests/image_loseless.webp
diff --git a/plone/namedfile/tests/image_lossy.webp b/plone/namedfile/tests/image_lossy.webp
diff --git a/plone/namedfile/tests/test_storable.py b/plone/namedfile/tests/test_storable.py
@@ -25,7 +25,10 @@
 from ZODB.blob import Blob
 from ZODB.blob import BlobFile
 
+import io
 import os
+import piexif
+import PIL
 import tempfile
 import unittest
 
@@ -128,3 +131,80 @@ def count_reads(self, size=-1):
             self.assertEqual(
                 read_bytes, 0, "Validation is reading the whole blob in memory"
             )
+
+    def test_large_webp_storable(self):
+        # ensure we don't read the whole file into memory
+
+        old_open = Blob.open
+        blob_read = 0
+        blob_write = 0
+        old_read = BlobFile.read
+        read_bytes = 0
+
+        def count_open(self, mode="r"):
+            nonlocal blob_read, blob_write
+            blob_read += 1 if "r" in mode else 0
+            blob_write += 1 if "w" in mode else 0
+            return old_open(self, mode)
+
+        def count_reads(self, size=-1):
+            nonlocal read_bytes
+            res = old_read(self, size)
+            read_bytes += len(res)
+            return res
+
+        with patch.object(Blob, "open", count_open), patch.object(
+            BlobFile, "read", count_reads
+        ):
+            fi = NamedBlobImage(getFile("900.webp"), filename="900.webp")
+            self.assertEqual((900, 900), fi.getImageSize())
+            self.assertLess(
+                read_bytes,
+                fi.getSize(),
+                "Images should not need to read all data to get exif, dimensions",
+            )
+            self.assertEqual(
+                blob_read, 3, "blob opening for getsize, get_exif and getImageInfo only"
+            )
+
+    def test_rotate(self):
+        # Create a 200x200 white image
+        img = PIL.Image.new("RGB", (100, 400), "white")
+        # Set the top-left pixels to black
+        img.putpixel((0, 0), (0, 0, 0))
+
+        # 270 degree rotation
+        # Create EXIF dict
+        exif_dict = {"0th": {}, "Exif": {}, "GPS": {}, "1st": {}, "thumbnail": None}
+        exif_dict["0th"][piexif.ImageIFD.Orientation] = 6
+        exif_bytes = piexif.dump(exif_dict)
+        # Save image as JPEG with EXIF data
+        out = io.BytesIO()
+        img.save(out, format="JPEG", exif=exif_bytes)
+        fi_jpg = NamedBlobImage(out.getvalue(), filename="image.jpg")
+        img_jpg = PIL.Image.open(io.BytesIO(fi_jpg.data))
+        self.assertEqual(fi_jpg.getImageSize(), (400, 100))
+        self.assertEqual(img_jpg.getpixel((0, 0)), (255, 255, 255))
+        # The pixel is not exactly black (RGB 0,0,0) due to quantization errors and
+        # compression artifacts introduced by the JPEG encoding process.
+        self.assertEqual(img_jpg.getpixel((399, 0)), (10, 10, 10))
+        self.assertEqual(img_jpg.getpixel((0, 99)), (255, 255, 255))
+        self.assertEqual(img_jpg.getpixel((399, 99)), (255, 255, 255))
+
+        # flip left to right
+        # Create EXIF dict
+        exif_dict = {"0th": {}, "Exif": {}, "GPS": {}, "1st": {}, "thumbnail": None}
+        exif_dict["0th"][piexif.ImageIFD.Orientation] = 2
+        exif_bytes = piexif.dump(exif_dict)
+        # Save image as JPEG with EXIF data
+        out = io.BytesIO()
+        img.save(out, format="JPEG", exif=exif_bytes)
+        fi_jpg = NamedBlobImage(out.getvalue(), filename="image.jpg")
+        img_jpg = PIL.Image.open(io.BytesIO(fi_jpg.data))
+        self.assertEqual(fi_jpg.getImageSize(), (100, 400))
+        self.assertEqual(img_jpg.getpixel((0, 0)), (255, 255, 255))
+        # The pixel is not exactly black (RGB 0,0,0) due to quantization errors and
+        # compression artifacts introduced by the JPEG encoding process.
+        self.assertEqual(img_jpg.getpixel((99, 0)), (8, 8, 8))
+        self.assertEqual(img_jpg.getpixel((0, 399)), (255, 255, 255))
+        self.assertEqual(img_jpg.getpixel((99, 399)), (255, 255, 255))
diff --git a/plone/namedfile/tests/test_utils.py b/plone/namedfile/tests/test_utils.py
@@ -1,6 +1,7 @@
 from plone.namedfile.file import NamedImage
 from plone.namedfile.tests import getFile
 from plone.namedfile.utils import get_contenttype
+from plone.namedfile.utils import getImageInfo
 
 import unittest
 
@@ -62,3 +63,29 @@ def test_get_contenttype(self):
             get_contenttype(filename="nothing.plonenamedfile"),
             "application/octet-stream",
         )
+
+    def test_get_image_info(self):
+
+        # WEBP WP8
+        self.assertEqual(
+            getImageInfo(getFile("image_lossy.webp")),
+            ("image/webp", 500, 200),
+        )
+
+        # WEBP WP8L
+        self.assertEqual(
+            getImageInfo(getFile("image_loseless.webp")),
+            ("image/webp", 200, 200),
+        )
+
+        # PNG
+        self.assertEqual(
+            getImageInfo(getFile("image.png")),
+            ("image/png", 200, 200),
+        )
+
+        # BMP3
+        self.assertEqual(
+            getImageInfo(getFile("image.bmp")),
+            ("image/x-ms-bmp", 200, 200),
+        )
diff --git a/plone/namedfile/utils/__init__.py b/plone/namedfile/utils/__init__.py
@@ -203,11 +203,37 @@ def getImageInfo(data):
             content_type = "image/x-ms-bmp"
             width, height = struct.unpack("<LL", data[18:26])
 
+    elif size > 30 and data[:4] == b"RIFF" and data[8:12] == b"WEBP":
+        # handle WEBPs
+        content_type = "image/webp"
+        chunk_type = data[12:16]
+        if chunk_type == b"VP8 ":
+            # WebP lossy (VP8): width and height are at offset 26–30 (2 bytes each, little endian)
+            width = int.from_bytes(data[26:28], "little") & 0x3FFF
+            height = int.from_bytes(data[28:30], "little") & 0x3FFF
+        elif chunk_type == b"VP8L":
+            # WebP lossless (VP8L): dimensions are encoded in 4 bytes at offset 21–25
+            b = data[21:25]
+            b0, b1, b2, b3 = b
+            width = ((b1 & 0x3F) << 8 | b0) + 1
+            height = ((b3 & 0xF) << 10 | b2 << 2 | (b1 >> 6)) + 1
+        elif chunk_type == b"VP8X":
+            # Extended WebP (VP8X)
+            # Width: bytes 24-26 (little endian, minus 1)
+            # Height: bytes 27-29 (little endian, minus 1)
+            width_bytes = data[24:27]
+            height_bytes = data[27:30]
+            width = int.from_bytes(width_bytes, "little") + 1
+            height = int.from_bytes(height_bytes, "little") + 1
+        else:
+            # Not supported: could be animation, etc.
+            pass
+
     elif size and b"http://www.w3.org/2000/svg" in data:
         # handle SVGs
         content_type, width, height = process_svg(data)
 
-    else:
+    if (width, height) == (-1, -1):
         # Use PIL / Pillow to determ Image Information
         try:
             img = PIL.Image.open(BytesIO(data))
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Support for extracting metadata from WebP images without loading the entire image into memory, as already done for other formats. [mamico]