Skip to content

Commit 7c94c52

Browse files
TensorFlow Datasets Teamcopybara-github
authored andcommitted
Switch to using apache beam to load bigearthnet TFDS dataset.
PiperOrigin-RevId: 251242287
1 parent e12e4c8 commit 7c94c52

File tree

1 file changed

+12
-8
lines changed

1 file changed

+12
-8
lines changed

tensorflow_datasets/image/bigearthnet.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -119,19 +119,19 @@ def __init__(self, selection=None, **kwargs):
119119
self.selection = selection
120120

121121

122-
class Bigearthnet(tfds.core.GeneratorBasedBuilder):
122+
class Bigearthnet(tfds.core.BeamBasedBuilder):
123123
"""Bigearthnet remote sensing dataset of Sentinel-2 image patches."""
124124

125125
BUILDER_CONFIGS = [
126126
BigearthnetConfig(
127127
selection='rgb',
128128
name='rgb',
129-
version='0.0.1',
129+
version='0.0.2',
130130
description='Sentinel-2 RGB channels'),
131131
BigearthnetConfig(
132132
selection='all',
133133
name='all',
134-
version='0.0.1',
134+
version='0.0.2',
135135
description='13 Sentinel-2 channels'),
136136
]
137137

@@ -218,11 +218,15 @@ def _split_generators(self, dl_manager):
218218
),
219219
]
220220

221-
def _generate_examples(self, path, selection):
222-
"""Yields examples."""
223-
for subdir in tf.io.gfile.listdir(path):
224-
d = _read_chip(os.path.join(path, subdir), selection)
225-
yield d
221+
def _build_pcollection(self, pipeline, path, selection):
222+
"""Generates examples as dicts."""
223+
beam = tfds.core.lazy_imports.apache_beam
224+
225+
def _process_example(subdir):
226+
return _read_chip(os.path.join(path, subdir), selection)
227+
228+
return (pipeline | beam.Create(tf.io.gfile.listdir(path))
229+
| beam.Map(_process_example))
226230

227231

228232
def _read_chip(path, selection):

0 commit comments

Comments
 (0)