Skip to content

webp imageinfo #181

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ parts/
pyvenv.cfg
var/
local.cfg
.python-version

# mxdev
/instance/
Expand Down
1 change: 1 addition & 0 deletions news/181.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Support for extracting metadata from WebP images without loading the entire image into memory, as already done for other formats. [mamico]
Binary file added plone/namedfile/tests/900.webp
Binary file not shown.
Binary file added plone/namedfile/tests/image.bmp
Binary file not shown.
Binary file added plone/namedfile/tests/image_loseless.webp
Binary file not shown.
Binary file added plone/namedfile/tests/image_lossy.webp
Binary file not shown.
80 changes: 80 additions & 0 deletions plone/namedfile/tests/test_storable.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@
from ZODB.blob import Blob
from ZODB.blob import BlobFile

import io
import os
import piexif
import PIL
import tempfile
import unittest

Expand Down Expand Up @@ -128,3 +131,80 @@ def count_reads(self, size=-1):
self.assertEqual(
read_bytes, 0, "Validation is reading the whole blob in memory"
)

def test_large_webp_storable(self):
# ensure we don't read the whole file into memory

old_open = Blob.open
blob_read = 0
blob_write = 0
old_read = BlobFile.read
read_bytes = 0

def count_open(self, mode="r"):
nonlocal blob_read, blob_write
blob_read += 1 if "r" in mode else 0
blob_write += 1 if "w" in mode else 0
return old_open(self, mode)

def count_reads(self, size=-1):
nonlocal read_bytes
res = old_read(self, size)
read_bytes += len(res)
return res

with patch.object(Blob, "open", count_open), patch.object(
BlobFile, "read", count_reads
):
fi = NamedBlobImage(getFile("900.webp"), filename="900.webp")
self.assertEqual((900, 900), fi.getImageSize())
self.assertLess(
read_bytes,
fi.getSize(),
"Images should not need to read all data to get exif, dimensions",
)
self.assertEqual(
blob_read, 3, "blob opening for getsize, get_exif and getImageInfo only"
)

def test_rotate(self):
# Create a 200x200 white image
img = PIL.Image.new("RGB", (100, 400), "white")
# Set the top-left pixels to black
img.putpixel((0, 0), (0, 0, 0))

# 270 degree rotation
# Create EXIF dict
exif_dict = {"0th": {}, "Exif": {}, "GPS": {}, "1st": {}, "thumbnail": None}
exif_dict["0th"][piexif.ImageIFD.Orientation] = 6
exif_bytes = piexif.dump(exif_dict)
# Save image as JPEG with EXIF data
out = io.BytesIO()
img.save(out, format="JPEG", exif=exif_bytes)
fi_jpg = NamedBlobImage(out.getvalue(), filename="image.jpg")
img_jpg = PIL.Image.open(io.BytesIO(fi_jpg.data))
self.assertEqual(fi_jpg.getImageSize(), (400, 100))
self.assertEqual(img_jpg.getpixel((0, 0)), (255, 255, 255))
# The pixel is not exactly black (RGB 0,0,0) due to quantization errors and
# compression artifacts introduced by the JPEG encoding process.
self.assertEqual(img_jpg.getpixel((399, 0)), (10, 10, 10))
self.assertEqual(img_jpg.getpixel((0, 99)), (255, 255, 255))
self.assertEqual(img_jpg.getpixel((399, 99)), (255, 255, 255))

# flip left to right
# Create EXIF dict
exif_dict = {"0th": {}, "Exif": {}, "GPS": {}, "1st": {}, "thumbnail": None}
exif_dict["0th"][piexif.ImageIFD.Orientation] = 2
exif_bytes = piexif.dump(exif_dict)
# Save image as JPEG with EXIF data
out = io.BytesIO()
img.save(out, format="JPEG", exif=exif_bytes)
fi_jpg = NamedBlobImage(out.getvalue(), filename="image.jpg")
img_jpg = PIL.Image.open(io.BytesIO(fi_jpg.data))
self.assertEqual(fi_jpg.getImageSize(), (100, 400))
self.assertEqual(img_jpg.getpixel((0, 0)), (255, 255, 255))
# The pixel is not exactly black (RGB 0,0,0) due to quantization errors and
# compression artifacts introduced by the JPEG encoding process.
self.assertEqual(img_jpg.getpixel((99, 0)), (8, 8, 8))
self.assertEqual(img_jpg.getpixel((0, 399)), (255, 255, 255))
self.assertEqual(img_jpg.getpixel((99, 399)), (255, 255, 255))
27 changes: 27 additions & 0 deletions plone/namedfile/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from plone.namedfile.file import NamedImage
from plone.namedfile.tests import getFile
from plone.namedfile.utils import get_contenttype
from plone.namedfile.utils import getImageInfo

import unittest

Expand Down Expand Up @@ -62,3 +63,29 @@ def test_get_contenttype(self):
get_contenttype(filename="nothing.plonenamedfile"),
"application/octet-stream",
)

def test_get_image_info(self):

# WEBP WP8
self.assertEqual(
getImageInfo(getFile("image_lossy.webp")),
("image/webp", 500, 200),
)

# WEBP WP8L
self.assertEqual(
getImageInfo(getFile("image_loseless.webp")),
("image/webp", 200, 200),
)

# PNG
self.assertEqual(
getImageInfo(getFile("image.png")),
("image/png", 200, 200),
)

# BMP3
self.assertEqual(
getImageInfo(getFile("image.bmp")),
("image/x-ms-bmp", 200, 200),
)
28 changes: 27 additions & 1 deletion plone/namedfile/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,11 +203,37 @@ def getImageInfo(data):
content_type = "image/x-ms-bmp"
width, height = struct.unpack("<LL", data[18:26])

elif size > 30 and data[:4] == b"RIFF" and data[8:12] == b"WEBP":
# handle WEBPs
content_type = "image/webp"
chunk_type = data[12:16]
if chunk_type == b"VP8 ":
# WebP lossy (VP8): width and height are at offset 26–30 (2 bytes each, little endian)
width = int.from_bytes(data[26:28], "little") & 0x3FFF
height = int.from_bytes(data[28:30], "little") & 0x3FFF
elif chunk_type == b"VP8L":
# WebP lossless (VP8L): dimensions are encoded in 4 bytes at offset 21–25
b = data[21:25]
b0, b1, b2, b3 = b
width = ((b1 & 0x3F) << 8 | b0) + 1
height = ((b3 & 0xF) << 10 | b2 << 2 | (b1 >> 6)) + 1
elif chunk_type == b"VP8X":
# Extended WebP (VP8X)
# Width: bytes 24-26 (little endian, minus 1)
# Height: bytes 27-29 (little endian, minus 1)
width_bytes = data[24:27]
height_bytes = data[27:30]
width = int.from_bytes(width_bytes, "little") + 1
height = int.from_bytes(height_bytes, "little") + 1
else:
# Not supported: could be animation, etc.
pass

elif size and b"http://www.w3.org/2000/svg" in data:
# handle SVGs
content_type, width, height = process_svg(data)

else:
if (width, height) == (-1, -1):
# Use PIL / Pillow to determ Image Information
try:
img = PIL.Image.open(BytesIO(data))
Expand Down