@@ -319,6 +319,46 @@ def as_dataset(self,
319
319
"""Constructs a `tf.data.Dataset`.
320
320
321
321
Callers must pass arguments as keyword arguments.
322
+
323
+ The output types vary depending on the parameters. Examples:
324
+
325
+ ```python
326
+ ds_builder = tfds.text.imdb.IMDBReviews()
327
+ ds_builder.download_and_prepare()
328
+
329
+ # Default parameters
330
+ ds1 = ds_builder.as_dataset()
331
+ assert isinstance(ds1, dict)
332
+ print(ds1.keys()) # ==> ['test', 'train', 'unsupervised']
333
+
334
+ assert isinstance(ds1[tfds.Split.TEST], tf.data.Dataset)
335
+ # Each dataset (test, train, unsup.) consists of dictionaries
336
+ # {'label': <tf.Tensor: .. dtype=int64, numpy=1>,
337
+ # 'text': <tf.Tensor: .. dtype=string, numpy=b"I've watched the movie ..">}
338
+ # {'label': <tf.Tensor: .. dtype=int64, numpy=1>,
339
+ # 'text': <tf.Tensor: .. dtype=string, numpy=b'If you love Japanese ..'>}
340
+
341
+ # Only (feature, label) tuples specified in this particular DatasetBuilder
342
+ ds2 = ds_builder.as_dataset(as_supervised=True)
343
+ assert isinstance(ds2, dict)
344
+ print(ds2.keys()) # ==> ['test', 'train', 'unsupervised']
345
+
346
+ assert isinstance(ds2[tfds.Split.TEST], tf.data.Dataset)
347
+ # Each dataset (test, train, unsup.) consists of tuples (text, label)
348
+ # (<tf.Tensor: ... dtype=string, numpy=b"I've watched the movie ..">,
349
+ # <tf.Tensor: ... dtype=int64, numpy=1>)
350
+ # (<tf.Tensor: ... dtype=string, numpy=b"If you love Japanese ..">,
351
+ # <tf.Tensor: ... dtype=int64, numpy=1>)
352
+
353
+ # Same as above plus requesting a particular split
354
+ ds3 = ds_builder.as_dataset(as_supervised=True, split=tfds.Split.TEST)
355
+ assert isinstance(ds3, tf.data.Dataset)
356
+ # The dataset consists of tuples (text, label)
357
+ # (<tf.Tensor: ... dtype=string, numpy=b"I've watched the movie ..">,
358
+ # <tf.Tensor: ... dtype=int64, numpy=1>)
359
+ # (<tf.Tensor: ... dtype=string, numpy=b"If you love Japanese ..">,
360
+ # <tf.Tensor: ... dtype=int64, numpy=1>)
361
+ ```
322
362
323
363
Args:
324
364
split: `tfds.core.SplitBase`, which subset(s) of the data to read. If None
0 commit comments