From 527f63ab2ff01f8dd829e794d3d1de436875cde1 Mon Sep 17 00:00:00 2001 From: Dylan Jay Date: Wed, 29 Nov 2023 10:27:30 +0700 Subject: [PATCH 1/7] avoid reading in whole blob during NamedBlobFile validation --- plone/namedfile/field.py | 16 +++++++-- plone/namedfile/tests/test_storable.py | 46 ++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/plone/namedfile/field.py b/plone/namedfile/field.py index 12dc7892..50ba0141 100644 --- a/plone/namedfile/field.py +++ b/plone/namedfile/field.py @@ -20,7 +20,7 @@ from zope.interface import Interface from zope.schema import Object from zope.schema import ValidationError - +from zope.schema._bootstrapinterfaces import SchemaNotProvided _ = MessageFactory("plone") @@ -106,7 +106,12 @@ def __init__(self, **kw): super().__init__(schema=self.schema, **kw) def _validate(self, value): - super()._validate(value) + # we don't want default validation of super()._validate(value) + # as that will read the whole file into memory + # schema has to be provided by value + if not self.schema.providedBy(value): + raise SchemaNotProvided(self.schema, value).with_field_and_value( + self, value) validate_file_field(self, value) @@ -123,5 +128,10 @@ def __init__(self, **kw): super().__init__(schema=self.schema, **kw) def _validate(self, value): - super()._validate(value) + # we don't want default validation of super()._validate(value) + # as that will read the whole file into memory + # schema has to be provided by value + if not self.schema.providedBy(value): + raise SchemaNotProvided(self.schema, value).with_field_and_value( + self, value) validate_image_field(self, value) diff --git a/plone/namedfile/tests/test_storable.py b/plone/namedfile/tests/test_storable.py index 8e8309c7..a3252641 100644 --- a/plone/namedfile/tests/test_storable.py +++ b/plone/namedfile/tests/test_storable.py @@ -18,6 +18,8 @@ from OFS.Image import Pdata from plone.namedfile.file import FileChunk from plone.namedfile.file import NamedBlobImage +from plone.namedfile.file import NamedBlobFile +from plone.namedfile import field from plone.namedfile.testing import PLONE_NAMEDFILE_FUNCTIONAL_TESTING from plone.namedfile.tests import getFile @@ -56,3 +58,47 @@ def test_opened_file_storable(self): if os.path.exists(path): os.remove(path) self.assertEqual(303, fi.getSize()) + + def test_upload_no_read(self): + # ensure we don't read the whole file into memory + + import ZODB.blob + + old_open = ZODB.blob.Blob.open + blob_read = 0 + blob_write = 0 + + def count_open(self, mode="r"): + nonlocal blob_read, blob_write + blob_read += 1 if "r" in mode else 0 + blob_write += 1 if "w" in mode else 0 + return old_open(self, mode) + + ZODB.blob.Blob.open = count_open + + data = getFile("image.gif") + f = tempfile.NamedTemporaryFile(delete=False) + try: + path = f.name + f.write(data) + f.close() + with open(path, "rb") as f: + fi = NamedBlobFile(f, filename="image.gif") + finally: + if os.path.exists(path): + os.remove(path) + self.assertEqual(303, fi.getSize()) + self.assertEqual(blob_read, 1, "blob should have only been opened to get size") + self.assertEqual( + blob_write, + 1, + "Slow write to blob instead of os rename. Should be only 1 on init", + ) + blob_read = 0 + + blob_field = field.NamedBlobFile() + blob_field.validate(fi) + + self.assertEqual(blob_read, 0, "Validation is reading the whole blob in memory") + + ZODB.blob.Blob.open = old_open From 832688ade45e17bf095ed54f34e7c1ea1d816987 Mon Sep 17 00:00:00 2001 From: Dylan Jay Date: Mon, 15 Jan 2024 10:33:04 +0700 Subject: [PATCH 2/7] make test patch more isolated --- plone/namedfile/tests/test_storable.py | 53 ++++++++++++-------------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/plone/namedfile/tests/test_storable.py b/plone/namedfile/tests/test_storable.py index a3252641..7e4f214c 100644 --- a/plone/namedfile/tests/test_storable.py +++ b/plone/namedfile/tests/test_storable.py @@ -74,31 +74,28 @@ def count_open(self, mode="r"): blob_write += 1 if "w" in mode else 0 return old_open(self, mode) - ZODB.blob.Blob.open = count_open - - data = getFile("image.gif") - f = tempfile.NamedTemporaryFile(delete=False) - try: - path = f.name - f.write(data) - f.close() - with open(path, "rb") as f: - fi = NamedBlobFile(f, filename="image.gif") - finally: - if os.path.exists(path): - os.remove(path) - self.assertEqual(303, fi.getSize()) - self.assertEqual(blob_read, 1, "blob should have only been opened to get size") - self.assertEqual( - blob_write, - 1, - "Slow write to blob instead of os rename. Should be only 1 on init", - ) - blob_read = 0 - - blob_field = field.NamedBlobFile() - blob_field.validate(fi) - - self.assertEqual(blob_read, 0, "Validation is reading the whole blob in memory") - - ZODB.blob.Blob.open = old_open + with unittest.mock.patch.object(ZODB.blob.Blob, 'open', count_open): + data = getFile("image.gif") + f = tempfile.NamedTemporaryFile(delete=False) + try: + path = f.name + f.write(data) + f.close() + with open(path, "rb") as f: + fi = NamedBlobFile(f, filename="image.gif") + finally: + if os.path.exists(path): + os.remove(path) + self.assertEqual(303, fi.getSize()) + self.assertEqual(blob_read, 1, "blob should have only been opened to get size") + self.assertEqual( + blob_write, + 1, + "Slow write to blob instead of os rename. Should be only 1 on init", + ) + blob_read = 0 + + blob_field = field.NamedBlobFile() + blob_field.validate(fi) + + self.assertEqual(blob_read, 0, "Validation is reading the whole blob in memory") From 945d88d2a3c6ff1f9113b0f88a7ea07a42e98975 Mon Sep 17 00:00:00 2001 From: Dylan Jay Date: Mon, 15 Jan 2024 11:22:26 +0700 Subject: [PATCH 3/7] rework file interfaces so data doesnt get read in for blobs --- plone/namedfile/field.py | 16 ++-------------- plone/namedfile/interfaces.py | 15 ++++++++++++--- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/plone/namedfile/field.py b/plone/namedfile/field.py index 50ba0141..fbd29e41 100644 --- a/plone/namedfile/field.py +++ b/plone/namedfile/field.py @@ -106,13 +106,7 @@ def __init__(self, **kw): super().__init__(schema=self.schema, **kw) def _validate(self, value): - # we don't want default validation of super()._validate(value) - # as that will read the whole file into memory - # schema has to be provided by value - if not self.schema.providedBy(value): - raise SchemaNotProvided(self.schema, value).with_field_and_value( - self, value) - validate_file_field(self, value) + super()._validate(value) @implementer(INamedBlobImageField) @@ -128,10 +122,4 @@ def __init__(self, **kw): super().__init__(schema=self.schema, **kw) def _validate(self, value): - # we don't want default validation of super()._validate(value) - # as that will read the whole file into memory - # schema has to be provided by value - if not self.schema.providedBy(value): - raise SchemaNotProvided(self.schema, value).with_field_and_value( - self, value) - validate_image_field(self, value) + super()._validate(value) diff --git a/plone/namedfile/interfaces.py b/plone/namedfile/interfaces.py index d7e86ae9..630a3033 100644 --- a/plone/namedfile/interfaces.py +++ b/plone/namedfile/interfaces.py @@ -6,8 +6,8 @@ HAVE_BLOBS = True -class IFile(Interface): +class ITyped(Interface): contentType = schema.NativeStringLine( title="Content Type", description="The content type identifies the type of data.", @@ -16,6 +16,8 @@ class IFile(Interface): missing_value="", ) +class IFile(Interface): + data = schema.Bytes( title="Data", description="The actual content of the object.", @@ -79,6 +81,11 @@ class INamed(Interface): filename = schema.TextLine(title="Filename", required=False, default=None) +class INamedTyped(INamed, ITyped): + "" + pass + + class INamedFile(INamed, IFile): """A non-BLOB file with a filename""" @@ -122,12 +129,14 @@ class IBlobby(Interface): """Marker interface for objects that support blobs.""" -class INamedBlobFile(INamedFile, IBlobby): +class INamedBlobFile(INamedTyped, IBlobby): """A BLOB file with a filename""" + # Note: Doesn't inherit from IFile because default validation will read whole file into memory -class INamedBlobImage(INamedImage, IBlobby): +class INamedBlobImage(INamedTyped, IBlobby): """A BLOB image with a filename""" + # Note: Doesn't inherit from IFile because default validation will read whole file into memory # Fields From 1c0050011523b53991e47af3d103e73bfa8443e3 Mon Sep 17 00:00:00 2001 From: Dylan Jay Date: Mon, 15 Jan 2024 14:50:27 +0700 Subject: [PATCH 4/7] change file fields to validate against INamedTyped --- plone/namedfile/field.py | 6 +++--- plone/namedfile/interfaces.py | 13 +++++-------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/plone/namedfile/field.py b/plone/namedfile/field.py index fbd29e41..3e1e6994 100644 --- a/plone/namedfile/field.py +++ b/plone/namedfile/field.py @@ -2,7 +2,7 @@ from plone.namedfile.file import NamedBlobImage as BlobImageValueType from plone.namedfile.file import NamedFile as FileValueType from plone.namedfile.file import NamedImage as ImageValueType -from plone.namedfile.interfaces import INamedBlobFile +from plone.namedfile.interfaces import INamedBlobFile, INamedTyped from plone.namedfile.interfaces import INamedBlobFileField from plone.namedfile.interfaces import INamedBlobImage from plone.namedfile.interfaces import INamedBlobImageField @@ -98,7 +98,7 @@ class NamedBlobFile(Object): """A NamedBlobFile field""" _type = BlobFileValueType - schema = INamedBlobFile + schema = INamedTyped # Note: Don't validate against IFile as will read in whole file def __init__(self, **kw): if "schema" in kw: @@ -114,7 +114,7 @@ class NamedBlobImage(Object): """A NamedBlobImage field""" _type = BlobImageValueType - schema = INamedBlobImage + schema = INamedTyped # Note: Don't validate against IFile as will read in whole file def __init__(self, **kw): if "schema" in kw: diff --git a/plone/namedfile/interfaces.py b/plone/namedfile/interfaces.py index 630a3033..314e9063 100644 --- a/plone/namedfile/interfaces.py +++ b/plone/namedfile/interfaces.py @@ -82,15 +82,14 @@ class INamed(Interface): class INamedTyped(INamed, ITyped): - "" - pass + """An item with a filename and contentType""" -class INamedFile(INamed, IFile): +class INamedFile(INamedTyped, IFile): """A non-BLOB file with a filename""" -class INamedImage(INamed, IImage): +class INamedImage(INamedTyped, IImage): """A non-BLOB image with a filename""" @@ -129,14 +128,12 @@ class IBlobby(Interface): """Marker interface for objects that support blobs.""" -class INamedBlobFile(INamedTyped, IBlobby): +class INamedBlobFile(INamedFile, IBlobby): """A BLOB file with a filename""" - # Note: Doesn't inherit from IFile because default validation will read whole file into memory -class INamedBlobImage(INamedTyped, IBlobby): +class INamedBlobImage(INamedImage, IBlobby): """A BLOB image with a filename""" - # Note: Doesn't inherit from IFile because default validation will read whole file into memory # Fields From bf174b644fdc65caa8148fd719ac973edc18deb3 Mon Sep 17 00:00:00 2001 From: Dylan Jay Date: Mon, 15 Jan 2024 17:45:05 +0700 Subject: [PATCH 5/7] add changelog --- news/155.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 news/155.bugfix diff --git a/news/155.bugfix b/news/155.bugfix new file mode 100644 index 00000000..35c6453f --- /dev/null +++ b/news/155.bugfix @@ -0,0 +1 @@ +Speed up uploads by not reading blob into memory during NamedBlobFile field validation @djay75 \ No newline at end of file From a0b787a8eabd341af500fd228158ef94e9d6d7d4 Mon Sep 17 00:00:00 2001 From: Dylan Jay Date: Wed, 17 Jan 2024 01:45:34 +0700 Subject: [PATCH 6/7] validation against a different schema --- plone/namedfile/field.py | 94 +++++++++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 36 deletions(-) diff --git a/plone/namedfile/field.py b/plone/namedfile/field.py index 3e1e6994..e5cf7401 100644 --- a/plone/namedfile/field.py +++ b/plone/namedfile/field.py @@ -20,7 +20,10 @@ from zope.interface import Interface from zope.schema import Object from zope.schema import ValidationError +from zope.schema import Field from zope.schema._bootstrapinterfaces import SchemaNotProvided +from zope.schema._bootstrapinterfaces import SchemaNotCorrectlyImplemented +from zope.schema._bootstrapfields import get_validation_errors _ = MessageFactory("plone") @@ -59,12 +62,7 @@ def validate_file_field(field, value): validate_binary_field(IPluggableFileFieldValidation, field, value) -@implementer(INamedFileField) -class NamedFile(Object): - """A NamedFile field""" - - _type = FileValueType - schema = INamedFile +class CustomValidator(object): def __init__(self, **kw): if "schema" in kw: @@ -72,54 +70,78 @@ def __init__(self, **kw): super().__init__(schema=self.schema, **kw) def _validate(self, value): - super()._validate(value) - validate_file_field(self, value) + super(Object, self)._validate(value) + + # schema has to be provided by value + if not self.validation_schema.providedBy(value): + raise SchemaNotProvided(self.validation_schema, value).with_field_and_value( + self, value) + + # check the value against schema + schema_error_dict, invariant_errors = get_validation_errors( + self.validation_schema, + value, + self.validate_invariants + ) + + if schema_error_dict or invariant_errors: + errors = list(schema_error_dict.values()) + invariant_errors + exception = SchemaNotCorrectlyImplemented( + errors, + self.__name__, + schema_error_dict, + invariant_errors + ).with_field_and_value(self, value) + + try: + raise exception + finally: + # Break cycles + del exception + del invariant_errors + del schema_error_dict + del errors + + self.extra_validator(value) + + +@implementer(INamedFileField) +class NamedFile(CustomValidator, Object): + """A NamedFile field""" + + _type = FileValueType + schema = INamedFile + validation_schema = INamedFile + extra_validator = validate_file_field @implementer(INamedImageField) -class NamedImage(Object): +class NamedImage(CustomValidator, Object): """A NamedImage field""" _type = ImageValueType schema = INamedImage - - def __init__(self, **kw): - if "schema" in kw: - self.schema = kw.pop("schema") - super().__init__(schema=self.schema, **kw) - - def _validate(self, value): - super()._validate(value) - validate_image_field(self, value) + validation_schema = INamedImage + extra_validator = validate_image_field @implementer(INamedBlobFileField) -class NamedBlobFile(Object): +class NamedBlobFile(CustomValidator, Object): """A NamedBlobFile field""" _type = BlobFileValueType - schema = INamedTyped # Note: Don't validate against IFile as will read in whole file + schema = INamedFile + validation_schema = INamedTyped # Note: Don't validate data as will read in whole file + extra_validator = validate_file_field - def __init__(self, **kw): - if "schema" in kw: - self.schema = kw.pop("schema") - super().__init__(schema=self.schema, **kw) - - def _validate(self, value): - super()._validate(value) @implementer(INamedBlobImageField) -class NamedBlobImage(Object): +class NamedBlobImage(CustomValidator, Object): """A NamedBlobImage field""" _type = BlobImageValueType - schema = INamedTyped # Note: Don't validate against IFile as will read in whole file + schema = INamedImage + validation_schema = INamedTyped # Note: Don't validate data as will read in whole file + extra_validator = validate_image_field - def __init__(self, **kw): - if "schema" in kw: - self.schema = kw.pop("schema") - super().__init__(schema=self.schema, **kw) - - def _validate(self, value): - super()._validate(value) From 7f576b0d1163c2b5333dec2de5bc29030faa1c15 Mon Sep 17 00:00:00 2001 From: Dylan Jay Date: Wed, 17 Jan 2024 01:56:42 +0700 Subject: [PATCH 7/7] comments --- plone/namedfile/field.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/plone/namedfile/field.py b/plone/namedfile/field.py index e5cf7401..6f3e7543 100644 --- a/plone/namedfile/field.py +++ b/plone/namedfile/field.py @@ -63,6 +63,10 @@ def validate_file_field(field, value): class CustomValidator(object): + """ Mixin that overrides Object._validate with the same code except it will + use self.validation_schema instead of self.schema. It will also execute + self.extra_validator if it exists. + """ def __init__(self, **kw): if "schema" in kw: @@ -101,8 +105,9 @@ def _validate(self, value): del invariant_errors del schema_error_dict del errors - - self.extra_validator(value) + + if hasattr(self, 'extra_validator'): + self.extra_validator(value) @implementer(INamedFileField) @@ -130,18 +135,17 @@ class NamedBlobFile(CustomValidator, Object): """A NamedBlobFile field""" _type = BlobFileValueType - schema = INamedFile + schema = INamedFile validation_schema = INamedTyped # Note: Don't validate data as will read in whole file extra_validator = validate_file_field - @implementer(INamedBlobImageField) class NamedBlobImage(CustomValidator, Object): """A NamedBlobImage field""" _type = BlobImageValueType - schema = INamedImage + schema = INamedImage validation_schema = INamedTyped # Note: Don't validate data as will read in whole file extra_validator = validate_image_field