@@ -320,6 +320,46 @@ def as_dataset(self,
320
320
321
321
Callers must pass arguments as keyword arguments.
322
322
323
+ The output types vary depending on the parameters. Examples:
324
+
325
+ ```python
326
+ builder = tfds.builder('imdb_reviews')
327
+ builder.download_and_prepare()
328
+
329
+ # Default parameters: Returns the dict of tf.data.Dataset
330
+ ds_all_dict = builder.as_dataset()
331
+ assert isinstance(ds_all_dict, dict)
332
+ print(ds_all_dict.keys()) # ==> ['test', 'train', 'unsupervised']
333
+
334
+ assert isinstance(ds_all_dict['test'], tf.data.Dataset)
335
+ # Each dataset (test, train, unsup.) consists of dictionaries
336
+ # {'label': <tf.Tensor: .. dtype=int64, numpy=1>,
337
+ # 'text': <tf.Tensor: .. dtype=string, numpy=b"I've watched the movie ..">}
338
+ # {'label': <tf.Tensor: .. dtype=int64, numpy=1>,
339
+ # 'text': <tf.Tensor: .. dtype=string, numpy=b'If you love Japanese ..'>}
340
+
341
+ # With as_supervised: tf.data.Dataset only contains (feature, label) tuples
342
+ ds_all_supervised = builder.as_dataset(as_supervised=True)
343
+ assert isinstance(ds_all_supervised, dict)
344
+ print(ds_all_supervised.keys()) # ==> ['test', 'train', 'unsupervised']
345
+
346
+ assert isinstance(ds_all_supervised['test'], tf.data.Dataset)
347
+ # Each dataset (test, train, unsup.) consists of tuples (text, label)
348
+ # (<tf.Tensor: ... dtype=string, numpy=b"I've watched the movie ..">,
349
+ # <tf.Tensor: ... dtype=int64, numpy=1>)
350
+ # (<tf.Tensor: ... dtype=string, numpy=b"If you love Japanese ..">,
351
+ # <tf.Tensor: ... dtype=int64, numpy=1>)
352
+
353
+ # Same as above plus requesting a particular split
354
+ ds_test_supervised = builder.as_dataset(as_supervised=True, split='test')
355
+ assert isinstance(ds_test_supervised, tf.data.Dataset)
356
+ # The dataset consists of tuples (text, label)
357
+ # (<tf.Tensor: ... dtype=string, numpy=b"I've watched the movie ..">,
358
+ # <tf.Tensor: ... dtype=int64, numpy=1>)
359
+ # (<tf.Tensor: ... dtype=string, numpy=b"If you love Japanese ..">,
360
+ # <tf.Tensor: ... dtype=int64, numpy=1>)
361
+ ```
362
+
323
363
Args:
324
364
split: `tfds.core.SplitBase`, which subset(s) of the data to read. If None
325
365
(default), returns all splits in a dict
0 commit comments