Skip to content

Commit 81fb84c

Browse files
pierrot0copybara-github
authored andcommitted
Make S3 versions the defaults.
PiperOrigin-RevId: 290973467
1 parent aa1e1e5 commit 81fb84c

File tree

131 files changed

+280
-605
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

131 files changed

+280
-605
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ input pipelines with `tf.data` but use whatever you'd like for your model
145145
components.
146146

147147
```python
148-
train_ds = tfds.load("mnist", split=tfds.Split.TRAIN)
148+
train_ds = tfds.load("mnist", split="train")
149149
train_ds = train_ds.shuffle(1024).batch(128).repeat(5).prefetch(10)
150150
for example in tfds.as_numpy(train_ds):
151151
numpy_images, numpy_labels = example["image"], example["label"]

docs/_index.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
"print(tfds.list_builders())\n",
4848
"\n",
4949
"# Construct a tf.data.Dataset\n",
50-
"dataset = tfds.load(name=\"mnist\", split=tfds.Split.TRAIN)\n",
50+
"dataset = tfds.load(name=\"mnist\", split=\"train\")\n",
5151
"\n",
5252
"# Build your input pipeline\n",
5353
"dataset = dataset.shuffle(1024).batch(32).prefetch(tf.data.experimental.AUTOTUNE)\n",

docs/_index.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ landing_page:
3333
print(tfds.list_builders())
3434
3535
# Construct a tf.data.Dataset
36-
dataset = tfds.load(name="mnist", split=tfds.Split.TRAIN)
36+
dataset = tfds.load(name="mnist", split="train")
3737
3838
# Build your input pipeline
3939
dataset = dataset.shuffle(1024).batch(32).prefetch(tf.data.experimental.AUTOTUNE)

docs/api_docs/python/tfds/core/SplitGenerator.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ source</a>
3333
``` python
3434
__init__(
3535
name,
36-
num_shards=1,
3736
gen_kwargs=None
3837
)
3938
```

oss_scripts/oss_pip_install.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ pip install -q -U numpy
3232
# data load
3333
pip install -e .
3434
python -c "import tensorflow_datasets as tfds"
35-
python -c "import tensorflow_datasets as tfds; tfds.load('mnist', split=tfds.Split.TRAIN)"
35+
python -c "import tensorflow_datasets as tfds; tfds.load('mnist:3.*.*', split='train')"
3636

3737
# Then install the test dependencies
3838
pip install -e .[tests]

tensorflow_datasets/audio/groove.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -77,15 +77,12 @@ def __init__(self, split_bars=None, include_audio=True, audio_rate=16000,
7777
else:
7878
name_parts.append("midionly")
7979

80+
v1 = tfds.core.Version(
81+
"1.0.0", experiments={tfds.core.Experiment.S3: False})
82+
v2 = tfds.core.Version(
83+
"2.0.0", "New split API (https://tensorflow.org/datasets/splits)")
8084
super(GrooveConfig, self).__init__(
81-
name="-".join(name_parts),
82-
version=tfds.core.Version(
83-
"1.0.0", experiments={tfds.core.Experiment.S3: False}),
84-
supported_versions=[
85-
tfds.core.Version(
86-
"2.0.0",
87-
"New split API (https://tensorflow.org/datasets/splits)"),
88-
],
85+
name="-".join(name_parts), version=v2, supported_versions=[v1],
8986
**kwargs)
9087
self.split_bars = split_bars
9188
self.include_audio = include_audio

tensorflow_datasets/audio/librispeech.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -149,21 +149,16 @@ def _make_builder_configs():
149149
vocab_size=2**15),
150150
]
151151
configs = []
152+
v001 = tfds.core.Version(
153+
"0.0.1", experiments={tfds.core.Experiment.S3: False})
154+
v1 = tfds.core.Version(
155+
"1.0.0", "New split API (https://tensorflow.org/datasets/splits)")
152156
for text_encoder_config in text_encoder_configs:
153157
for data in _DATA_OPTIONS:
154158
config = LibrispeechConfig(
155-
version=tfds.core.Version(
156-
"0.0.1", experiments={tfds.core.Experiment.S3: False}),
157-
supported_versions=[
158-
tfds.core.Version(
159-
"1.0.0",
160-
"New split API (https://tensorflow.org/datasets/splits)"),
161-
],
159+
version=v1, supported_versions=[v001],
162160
text_encoder_config=text_encoder_config,
163161
data=data)
164-
# Version history:
165-
# 1.0.0: S3 (new shuffling, sharding and slicing mechanism).
166-
# 0.0.1: Initial version.
167162
configs.append(config)
168163
return configs
169164

tensorflow_datasets/core/dataset_builder_test.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -163,12 +163,12 @@ def test_load_from_gcs(self):
163163
data_dir=tmp_dir,
164164
with_info=True)
165165
self.assertSetEqual(
166-
set(["dataset_info.json", "image.image.json",
167-
"mnist-test.counts.txt-00000-of-00001",
166+
set(["dataset_info.json",
167+
"image.image.json",
168168
"mnist-test.tfrecord-00000-of-00001",
169-
"mnist-train.counts.txt-00000-of-00001"] +
170-
["mnist-train.tfrecord-0000%d-of-00010" % i for i in range(10)]),
171-
set(tf.io.gfile.listdir(os.path.join(tmp_dir, "mnist/1.0.0"))))
169+
"mnist-train.tfrecord-00000-of-00001",
170+
]),
171+
set(tf.io.gfile.listdir(os.path.join(tmp_dir, "mnist/3.0.0"))))
172172

173173
self.assertEqual(set(info.splits.keys()), set(["train", "test"]))
174174

@@ -350,7 +350,7 @@ def setUp(self):
350350
def load_mnist_dataset_info(self):
351351
mnist_info_path = os.path.join(
352352
utils.tfds_dir(),
353-
"testing/test_data/dataset_info/mnist/1.0.0",
353+
"testing/test_data/dataset_info/mnist/3.0.0",
354354
)
355355
mnist_info_path = os.path.normpath(mnist_info_path)
356356
self.read_from_directory(mnist_info_path)

tensorflow_datasets/core/dataset_info_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@
3535

3636
_TFDS_DIR = py_utils.tfds_dir()
3737
_INFO_DIR = os.path.join(_TFDS_DIR, "testing", "test_data", "dataset_info",
38-
"mnist", "1.0.0")
38+
"mnist", "3.0.0")
3939
_INFO_DIR_UNLABELED = os.path.join(_TFDS_DIR, "testing", "test_data",
40-
"dataset_info", "mnist_unlabeled", "1.0.0")
40+
"dataset_info", "mnist_unlabeled", "3.0.0")
4141
_NON_EXISTENT_DIR = os.path.join(_TFDS_DIR, "non_existent_dir")
4242

4343

@@ -305,7 +305,7 @@ def test_updates_on_bucket_info(self):
305305

306306
INFO_STR = """tfds.core.DatasetInfo(
307307
name='mnist',
308-
version=1.0.0,
308+
version=3.0.0,
309309
description='The MNIST database of handwritten digits.',
310310
homepage='https://storage.googleapis.com/cvdf-datasets/mnist/',
311311
features=FeaturesDict({

tensorflow_datasets/image/caltech.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,11 @@
4949
class Caltech101(tfds.core.GeneratorBasedBuilder):
5050
"""Caltech-101."""
5151

52-
VERSION = tfds.core.Version("1.1.0",
53-
experiments={tfds.core.Experiment.S3: False})
52+
VERSION = tfds.core.Version(
53+
"3.0.0", "New split API (https://tensorflow.org/datasets/splits)")
5454
SUPPORTED_VERSIONS = [
5555
tfds.core.Version(
56-
"3.0.0", "New split API (https://tensorflow.org/datasets/splits)"),
56+
"1.1.0", experiments={tfds.core.Experiment.S3: False}),
5757
]
5858

5959
def _info(self):

0 commit comments

Comments
 (0)