Skip to content

Commit 0630c4f

Browse files
authored
Merge pull request #241 from fnattino/fix/generate_subcatalogs
Fix unexpected behaviour of `generate_subcatalogs`
2 parents c3685d4 + 3ab4095 commit 0630c4f

File tree

2 files changed

+104
-5
lines changed

2 files changed

+104
-5
lines changed

pystac/catalog.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,7 @@ def fn():
520520

521521
return self
522522

523-
def generate_subcatalogs(self, template, defaults=None, **kwargs):
523+
def generate_subcatalogs(self, template, defaults=None, parent_ids=None, **kwargs):
524524
"""Walks through the catalog and generates subcatalogs
525525
for items based on the template string. See :class:`~pystac.layout.LayoutTemplate`
526526
for details on the construction of template strings. This template string
@@ -533,29 +533,43 @@ def generate_subcatalogs(self, template, defaults=None, **kwargs):
533533
defaults (dict): Default values for the template variables
534534
that will be used if the property cannot be found on
535535
the item.
536+
parent_ids (List[str]): Optional list of the parent catalogs'
537+
identifiers. If the bottom-most subcatalags already match the
538+
template, no subcatalog is added.
536539
537540
Returns:
538541
[catalog]: List of new catalogs created
539542
"""
540543
result = []
544+
parent_ids = parent_ids or list()
545+
parent_ids.append(self.id)
541546
for child in self.get_children():
542-
result.extend(child.generate_subcatalogs(template, defaults=defaults))
547+
result.extend(
548+
child.generate_subcatalogs(template,
549+
defaults=defaults,
550+
parent_ids=parent_ids.copy()))
543551

544552
layout_template = LayoutTemplate(template, defaults=defaults)
545-
subcat_id_to_cat = {}
553+
546554
items = list(self.get_items())
547555
for item in items:
548556
item_parts = layout_template.get_template_values(item)
557+
id_iter = reversed(parent_ids)
558+
if all(['{}'.format(id) == next(id_iter, None)
559+
for id in reversed(item_parts.values())]):
560+
# Skip items for which the sub-catalog structure already
561+
# matches the template. The list of parent IDs can include more
562+
# elements on the root side, so compare the reversed sequences.
563+
continue
549564
curr_parent = self
550565
for k, v in item_parts.items():
551566
subcat_id = '{}'.format(v)
552-
subcat = subcat_id_to_cat.get(subcat_id)
567+
subcat = curr_parent.get_child(subcat_id)
553568
if subcat is None:
554569
subcat_desc = 'Catalog of items from {} with {} of {}'.format(
555570
curr_parent.id, k, v)
556571
subcat = pystac.Catalog(id=subcat_id, description=subcat_desc)
557572
curr_parent.add_child(subcat)
558-
subcat_id_to_cat[subcat_id] = subcat
559573
result.append(subcat)
560574
curr_parent = subcat
561575
self.remove_item(item.id)

tests/test_catalog.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,91 @@ def test_generate_subcatalogs_does_not_change_item_count(self):
329329
expected = item_counts[child.id]
330330
self.assertEqual(actual, expected, msg=" for child '{}'".format(child.id))
331331

332+
def test_generate_subcatalogs_can_be_applied_multiple_times(self):
333+
catalog = TestCases.test_case_8()
334+
335+
_ = catalog.generate_subcatalogs('${year}/${month}')
336+
catalog.normalize_hrefs('/tmp')
337+
expected_hrefs = {item.id: item.get_self_href() for item in catalog.get_all_items()}
338+
339+
result = catalog.generate_subcatalogs('${year}/${month}')
340+
self.assertEqual(len(result), 0)
341+
catalog.normalize_hrefs('/tmp')
342+
for item in catalog.get_all_items():
343+
self.assertEqual(item.get_self_href(),
344+
expected_hrefs[item.id],
345+
msg=" for item '{}'".format(item.id))
346+
347+
def test_generate_subcatalogs_works_after_adding_more_items(self):
348+
catalog = Catalog(id='test', description='Test')
349+
properties = dict(property1='A', property2=1)
350+
catalog.add_item(
351+
Item(id='item1',
352+
geometry=RANDOM_GEOM,
353+
bbox=RANDOM_BBOX,
354+
datetime=datetime.utcnow(),
355+
properties=properties))
356+
catalog.generate_subcatalogs('${property1}/${property2}')
357+
catalog.add_item(
358+
Item(id='item2',
359+
geometry=RANDOM_GEOM,
360+
bbox=RANDOM_BBOX,
361+
datetime=datetime.utcnow(),
362+
properties=properties))
363+
catalog.generate_subcatalogs('${property1}/${property2}')
364+
365+
catalog.normalize_hrefs('/tmp')
366+
item1_parent = catalog.get_item('item1', recursive=True).get_parent()
367+
item2_parent = catalog.get_item('item2', recursive=True).get_parent()
368+
self.assertEqual(item1_parent.get_self_href(), item2_parent.get_self_href())
369+
370+
def test_generate_subcatalogs_works_for_branched_subcatalogs(self):
371+
catalog = Catalog(id='test', description='Test')
372+
item_properties = [
373+
dict(property1='A', property2=1, property3='i'), # add 3 subcats
374+
dict(property1='A', property2=1, property3='j'), # add 1 more
375+
dict(property1='A', property2=2, property3='i'), # add 2 more
376+
dict(property1='B', property2=1, property3='i'), # add 3 more
377+
]
378+
for ni, properties in enumerate(item_properties):
379+
catalog.add_item(
380+
Item(id='item{}'.format(ni),
381+
geometry=RANDOM_GEOM,
382+
bbox=RANDOM_BBOX,
383+
datetime=datetime.utcnow(),
384+
properties=properties))
385+
result = catalog.generate_subcatalogs('${property1}/${property2}/${property3}')
386+
self.assertEqual(len(result), 9)
387+
388+
actual_subcats = set([cat.id for cat in result])
389+
expected_subcats = {'A', 'B', '1', '2', 'i', 'j'}
390+
self.assertSetEqual(actual_subcats, expected_subcats)
391+
392+
def test_generate_subcatalogs_works_for_subcatalogs_with_same_ids(self):
393+
catalog = Catalog(id='test', description='Test')
394+
item_properties = [
395+
dict(property1=1, property2=1), # add 2 subcats
396+
dict(property1=1, property2=2), # add 1 more
397+
dict(property1=2, property2=1), # add 2 more
398+
dict(property1=2, property2=2), # add 1 more
399+
]
400+
for ni, properties in enumerate(item_properties):
401+
catalog.add_item(
402+
Item(id='item{}'.format(ni),
403+
geometry=RANDOM_GEOM,
404+
bbox=RANDOM_BBOX,
405+
datetime=datetime.utcnow(),
406+
properties=properties))
407+
result = catalog.generate_subcatalogs('${property1}/${property2}')
408+
self.assertEqual(len(result), 6)
409+
410+
catalog.normalize_hrefs('/')
411+
for item in catalog.get_all_items():
412+
parent_href = item.get_parent().get_self_href()
413+
path_to_parent, _ = os.path.split(parent_href)
414+
subcats = [el for el in path_to_parent.split('/') if el]
415+
self.assertEqual(len(subcats), 2, msg=" for item '{}'".format(item.id))
416+
332417
def test_map_items(self):
333418
def item_mapper(item):
334419
item.properties['ITEM_MAPPER'] = 'YEP'

0 commit comments

Comments
 (0)