diff --git a/back/README.md b/back/README.md index 9a51b7967..3611c6651 100755 --- a/back/README.md +++ b/back/README.md @@ -136,6 +136,7 @@ Mind the following perks of peewee: 1. When creating a model instance referencing another model via a foreign key, use the ID of the FK model instance instead of a model instance, e.g. `Location(base=1)`. 1. If you want to retrieve only the ID of a foreign key field, access it with the "magic" suffix `_id`, e.g. `location.base_id`. This avoids overhead of an additional select query issued by peewee when using `location.base.id`. +1. peewee will cache the results of a select operation, leading to large memory consumption for large result sets. This can be avoided by attaching `.iterator()` to the select call. See [docs for more info](http://docs.peewee-orm.com/en/latest/peewee/querying.html#iterating-over-large-result-sets) and also this [insightful question](https://stackoverflow.com/questions/77564291/debug-peewee-cache-hits/77571546#77571546) 1. You can activate peewee's logging to gain insight into the generated SQL queries: ```python diff --git a/back/boxtribute_server/graph_ql/loaders.py b/back/boxtribute_server/graph_ql/loaders.py index 2d68da196..35ac9c068 100644 --- a/back/boxtribute_server/graph_ql/loaders.py +++ b/back/boxtribute_server/graph_ql/loaders.py @@ -56,7 +56,9 @@ async def batch_load_fn(self, ids): permission = f"{resource}:read" authorize(permission=permission) - rows = {r.id: r for r in self.model.select().where(self.model.id << ids)} + rows = { + r.id: r for r in self.model.select().where(self.model.id << ids).iterator() + } return [rows.get(i) for i in ids] @@ -119,10 +121,12 @@ class ShipmentLoader(DataLoader): async def batch_load_fn(self, keys): shipments = { s.id: s - for s in Shipment.select().orwhere( + for s in Shipment.select() + .orwhere( authorized_bases_filter(Shipment, base_fk_field_name="source_base_id"), authorized_bases_filter(Shipment, base_fk_field_name="target_base_id"), ) + .iterator() } return [shipments.get(i) for i in keys] @@ -132,11 +136,12 @@ async def batch_load_fn(self, agreement_ids): # Select all shipments with given agreement IDs that the user is authorized for, # and group them by agreement ID shipments = defaultdict(list) - for shipment in Shipment.select().where( + result = Shipment.select().where( Shipment.transfer_agreement << agreement_ids, authorized_bases_filter(Shipment, base_fk_field_name="source_base") | authorized_bases_filter(Shipment, base_fk_field_name="target_base"), - ): + ) + for shipment in result.iterator(): shipments[shipment.transfer_agreement_id].append(shipment) # Return empty list if agreement has no shipments attached return [shipments.get(i, []) for i in agreement_ids] @@ -146,7 +151,7 @@ class TagsForBoxLoader(DataLoader): async def batch_load_fn(self, keys): tags = defaultdict(list) # maybe need different join type - for relation in TagsRelation.select( + result = TagsRelation.select( TagsRelation.object_type, TagsRelation.object_id, Tag ).join( Tag, @@ -157,7 +162,8 @@ async def batch_load_fn(self, keys): & (TagsRelation.deleted_on.is_null()) & (authorized_bases_filter(Tag)) ), - ): + ) + for relation in result.iterator(): tags[relation.object_id].append(relation.tag) # Keys are in fact box IDs. Return empty list if box has no tags assigned @@ -434,7 +440,7 @@ async def batch_load_fn(self, box_ids): # Construct mapping of box IDs and their history information box_histories = defaultdict(list) - for row in result.dicts(): + for row in result.dicts().iterator(): box_histories[row["record_id"]].extend( [ DbChangeHistory(id=i, user=u, changes=c, change_date=d) @@ -467,6 +473,7 @@ async def batch_load_fn(self, shipment_ids): ShipmentDetail.select(ShipmentDetail, Shipment) .join(Shipment) .where(ShipmentDetail.shipment << shipment_ids) + .iterator() ): details[detail.shipment_id].append(detail) # Return empty list if shipment has no details attached @@ -477,12 +484,14 @@ class ShipmentDetailForBoxLoader(DataLoader): async def batch_load_fn(self, keys): details = { detail.box_id: detail - for detail in ShipmentDetail.select().where( + for detail in ShipmentDetail.select() + .where( ShipmentDetail.box << keys, ShipmentDetail.removed_on.is_null(), ShipmentDetail.lost_on.is_null(), ShipmentDetail.received_on.is_null(), ) + .iterator() } # Keys are in fact box IDs. Return None if box has no shipment detail associated return [details.get(i) for i in keys] @@ -502,6 +511,7 @@ async def batch_load_fn(self, product_ids): (Box.deleted_on.is_null() | ~Box.deleted_on), ) .group_by(Box.product) + .iterator() } return [counts.get(i, 0) for i in product_ids] @@ -511,7 +521,7 @@ async def batch_load_fn(self, keys): authorize(permission="size:read") # Mapping of size range ID to list of sizes sizes = defaultdict(list) - for size in Size.select(): + for size in Size.select().iterator(): sizes[size.size_range_id].append(size) # Keys are in fact size range IDs. Return empty list if size range has no sizes return [sizes.get(i, []) for i in keys]