Skip to content

Commit 163b494

Browse files
Merge pull request #784 from habernal:patch-2
PiperOrigin-RevId: 258584247
2 parents 830a663 + e57f236 commit 163b494

File tree

1 file changed

+40
-0
lines changed

1 file changed

+40
-0
lines changed

tensorflow_datasets/core/dataset_builder.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,46 @@ def as_dataset(self,
320320
321321
Callers must pass arguments as keyword arguments.
322322
323+
The output types vary depending on the parameters. Examples:
324+
325+
```python
326+
builder = tfds.builder('imdb_reviews')
327+
builder.download_and_prepare()
328+
329+
# Default parameters: Returns the dict of tf.data.Dataset
330+
ds_all_dict = builder.as_dataset()
331+
assert isinstance(ds_all_dict, dict)
332+
print(ds_all_dict.keys()) # ==> ['test', 'train', 'unsupervised']
333+
334+
assert isinstance(ds_all_dict['test'], tf.data.Dataset)
335+
# Each dataset (test, train, unsup.) consists of dictionaries
336+
# {'label': <tf.Tensor: .. dtype=int64, numpy=1>,
337+
# 'text': <tf.Tensor: .. dtype=string, numpy=b"I've watched the movie ..">}
338+
# {'label': <tf.Tensor: .. dtype=int64, numpy=1>,
339+
# 'text': <tf.Tensor: .. dtype=string, numpy=b'If you love Japanese ..'>}
340+
341+
# With as_supervised: tf.data.Dataset only contains (feature, label) tuples
342+
ds_all_supervised = builder.as_dataset(as_supervised=True)
343+
assert isinstance(ds_all_supervised, dict)
344+
print(ds_all_supervised.keys()) # ==> ['test', 'train', 'unsupervised']
345+
346+
assert isinstance(ds_all_supervised['test'], tf.data.Dataset)
347+
# Each dataset (test, train, unsup.) consists of tuples (text, label)
348+
# (<tf.Tensor: ... dtype=string, numpy=b"I've watched the movie ..">,
349+
# <tf.Tensor: ... dtype=int64, numpy=1>)
350+
# (<tf.Tensor: ... dtype=string, numpy=b"If you love Japanese ..">,
351+
# <tf.Tensor: ... dtype=int64, numpy=1>)
352+
353+
# Same as above plus requesting a particular split
354+
ds_test_supervised = builder.as_dataset(as_supervised=True, split='test')
355+
assert isinstance(ds_test_supervised, tf.data.Dataset)
356+
# The dataset consists of tuples (text, label)
357+
# (<tf.Tensor: ... dtype=string, numpy=b"I've watched the movie ..">,
358+
# <tf.Tensor: ... dtype=int64, numpy=1>)
359+
# (<tf.Tensor: ... dtype=string, numpy=b"If you love Japanese ..">,
360+
# <tf.Tensor: ... dtype=int64, numpy=1>)
361+
```
362+
323363
Args:
324364
split: `tfds.core.SplitBase`, which subset(s) of the data to read. If None
325365
(default), returns all splits in a dict

0 commit comments

Comments
 (0)