tensorflow
diff --git a/‎README.md
Lines changed: 7 additions & 1 deletion b/‎README.md
Lines changed: 7 additions & 1 deletion
diff --git a/‎docs/add_dataset.md
Lines changed: 3 additions & 0 deletions b/‎docs/add_dataset.md
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs/api_docs/python/tfds/_api_cache.json
Lines changed: 0 additions & 12 deletions b/‎docs/api_docs/python/tfds/_api_cache.json
Lines changed: 0 additions & 12 deletions
diff --git a/‎docs/api_docs/python/tfds/core/BeamBasedBuilder.md
Lines changed: 6 additions & 5 deletions b/‎docs/api_docs/python/tfds/core/BeamBasedBuilder.md
Lines changed: 6 additions & 5 deletions
diff --git a/‎docs/api_docs/python/tfds/core/DatasetBuilder.md
Lines changed: 6 additions & 5 deletions b/‎docs/api_docs/python/tfds/core/DatasetBuilder.md
Lines changed: 6 additions & 5 deletions
diff --git a/‎docs/api_docs/python/tfds/core/GeneratorBasedBuilder.md
Lines changed: 6 additions & 5 deletions b/‎docs/api_docs/python/tfds/core/GeneratorBasedBuilder.md
Lines changed: 6 additions & 5 deletions
diff --git a/‎docs/api_docs/python/tfds/disable_progress_bar.md
Lines changed: 4 additions & 0 deletions b/‎docs/api_docs/python/tfds/disable_progress_bar.md
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/api_docs/python/tfds/features/text/TokenTextEncoder.md
Lines changed: 4 additions & 2 deletions b/‎docs/api_docs/python/tfds/features/text/TokenTextEncoder.md
Lines changed: 4 additions & 2 deletions
diff --git a/‎docs/api_docs/python/tfds/load.md
Lines changed: 12 additions & 3 deletions b/‎docs/api_docs/python/tfds/load.md
Lines changed: 12 additions & 3 deletions
diff --git a/‎docs/api_docs/python/tfds/testing/DatasetBuilderTestCase.md
Lines changed: 9 additions & 14 deletions b/‎docs/api_docs/python/tfds/testing/DatasetBuilderTestCase.md
Lines changed: 9 additions & 14 deletions
@@ -8,7 +8,13 @@ TensorFlow Datasets provides many public datasets as `tf.data.Datasets`.
 * [List of datasets](https://github.com/tensorflow/datasets/tree/master/docs/datasets.md)
 * [Try it in Colab](https://colab.research.google.com/github/tensorflow/datasets/blob/master/docs/overview.ipynb)
 * [API docs](https://www.tensorflow.org/datasets/api_docs/python/tfds)
-* [Add a dataset](https://github.com/tensorflow/datasets/tree/master/docs/add_dataset.md)
+* Guides
+  * [Overview](https://www.tensorflow.org/datasets/overview)
+  * [Datasets versioning](https://www.tensorflow.org/datasets/datasets_versioning)
+  * [Using splits and slicing API](https://www.tensorflow.org/datasets/splits)
+  * [Add a dataset](https://www.tensorflow.org/datasets/add_dataset)
+  * [Add a huge dataset (>>100GiB)](https://www.tensorflow.org/datasets/beam_datasets)
+
 
 **Table of Contents**
 
 
@@ -138,6 +138,9 @@ If you'd like to follow a test-driven development workflow, which can help you
 iterate faster, jump to the [testing instructions](#testing-mydataset) below,
 add the test, and then return here.
 
+For an explanation of what the version is, please read
+[datasets versioning](datasets_versioning.md).
+
 ## Specifying `DatasetInfo`
 
 [`DatasetInfo`](api_docs/python/tfds/core/DatasetInfo.md) describes the
 
@@ -139,8 +139,6 @@
     "tfds.core": false, 
     "tfds.core.BeamBasedBuilder": false, 
     "tfds.core.BeamBasedBuilder.BUILDER_CONFIGS": true, 
-    "tfds.core.BeamBasedBuilder.GOOGLE_DISABLED": true, 
-    "tfds.core.BeamBasedBuilder.IN_DEVELOPMENT": true, 
     "tfds.core.BeamBasedBuilder.SUPPORTED_VERSIONS": true, 
     "tfds.core.BeamBasedBuilder.VERSION": true, 
     "tfds.core.BeamBasedBuilder.__init__": true, 
@@ -160,8 +158,6 @@
     "tfds.core.BuilderConfig.version": true, 
     "tfds.core.DatasetBuilder": false, 
     "tfds.core.DatasetBuilder.BUILDER_CONFIGS": true, 
-    "tfds.core.DatasetBuilder.GOOGLE_DISABLED": true, 
-    "tfds.core.DatasetBuilder.IN_DEVELOPMENT": true, 
     "tfds.core.DatasetBuilder.SUPPORTED_VERSIONS": true, 
     "tfds.core.DatasetBuilder.VERSION": true, 
     "tfds.core.DatasetBuilder.__init__": true, 
@@ -200,8 +196,6 @@
     "tfds.core.Experiment.S3": true, 
     "tfds.core.GeneratorBasedBuilder": false, 
     "tfds.core.GeneratorBasedBuilder.BUILDER_CONFIGS": true, 
-    "tfds.core.GeneratorBasedBuilder.GOOGLE_DISABLED": true, 
-    "tfds.core.GeneratorBasedBuilder.IN_DEVELOPMENT": true, 
     "tfds.core.GeneratorBasedBuilder.SUPPORTED_VERSIONS": true, 
     "tfds.core.GeneratorBasedBuilder.VERSION": true, 
     "tfds.core.GeneratorBasedBuilder.__init__": true, 
@@ -599,7 +593,6 @@
     "tfds.testing.DatasetBuilderTestCase.DATASET_CLASS": true, 
     "tfds.testing.DatasetBuilderTestCase.DL_EXTRACT_RESULT": true, 
     "tfds.testing.DatasetBuilderTestCase.EXAMPLE_DIR": true, 
-    "tfds.testing.DatasetBuilderTestCase.INTERNAL_DATASET": true, 
     "tfds.testing.DatasetBuilderTestCase.MOCK_MONARCH": true, 
     "tfds.testing.DatasetBuilderTestCase.MOCK_OUT_FORBIDDEN_OS_FUNCTIONS": true, 
     "tfds.testing.DatasetBuilderTestCase.OVERLAPPING_SPLITS": true, 
@@ -745,8 +738,6 @@
     "tfds.testing.DatasetBuilderTestCase.test_session": true, 
     "tfds.testing.DummyDatasetSharedGenerator": false, 
     "tfds.testing.DummyDatasetSharedGenerator.BUILDER_CONFIGS": true, 
-    "tfds.testing.DummyDatasetSharedGenerator.GOOGLE_DISABLED": true, 
-    "tfds.testing.DummyDatasetSharedGenerator.IN_DEVELOPMENT": true, 
     "tfds.testing.DummyDatasetSharedGenerator.SUPPORTED_VERSIONS": true, 
     "tfds.testing.DummyDatasetSharedGenerator.VERSION": true, 
     "tfds.testing.DummyDatasetSharedGenerator.__init__": true, 
@@ -760,8 +751,6 @@
     "tfds.testing.DummyDatasetSharedGenerator.version": true, 
     "tfds.testing.DummyMnist": false, 
     "tfds.testing.DummyMnist.BUILDER_CONFIGS": true, 
-    "tfds.testing.DummyMnist.GOOGLE_DISABLED": true, 
-    "tfds.testing.DummyMnist.IN_DEVELOPMENT": true, 
     "tfds.testing.DummyMnist.SUPPORTED_VERSIONS": true, 
     "tfds.testing.DummyMnist.VERSION": true, 
     "tfds.testing.DummyMnist.__init__": true, 
@@ -1199,7 +1188,6 @@
     "tfds.units.MiB": true, 
     "tfds.units.PiB": true, 
     "tfds.units.TiB": true, 
-    "tfds.units.absolute_import": true, 
     "tfds.units.division": true, 
     "tfds.units.print_function": true, 
     "tfds.units.size_str": false
 
@@ -9,8 +9,6 @@
 <meta itemprop="property" content="as_dataset"/>
 <meta itemprop="property" content="download_and_prepare"/>
 <meta itemprop="property" content="BUILDER_CONFIGS"/>
-<meta itemprop="property" content="GOOGLE_DISABLED"/>
-<meta itemprop="property" content="IN_DEVELOPMENT"/>
 <meta itemprop="property" content="SUPPORTED_VERSIONS"/>
 <meta itemprop="property" content="VERSION"/>
 <meta itemprop="property" content="builder_configs"/>
@@ -80,7 +78,8 @@ as_dataset(
     split=None,
     batch_size=None,
     shuffle_files=None,
-    as_supervised=False
+    as_supervised=False,
+    in_memory=None
 )
 ```
 
@@ -105,6 +104,10 @@ Callers must pass arguments as keyword arguments.
     will have a 2-tuple structure `(input, label)` according to
     `builder.info.supervised_keys`. If `False`, the default, the returned
     `tf.data.Dataset` will have a dictionary with all the features.
+*   <b>`in_memory`</b>: `bool`, if `True`, loads the dataset in memory which
+    increases iteration speeds. Note that if `True` and the dataset has unknown
+    dimensions, the features will be padded to the maximum size across the
+    dataset.
 
 #### Returns:
 
@@ -142,8 +145,6 @@ Downloads and prepares dataset for reading.
 ## Class Members
 
 *   `BUILDER_CONFIGS` <a id="BUILDER_CONFIGS"></a>
-*   `GOOGLE_DISABLED = False` <a id="GOOGLE_DISABLED"></a>
-*   `IN_DEVELOPMENT = False` <a id="IN_DEVELOPMENT"></a>
 *   `SUPPORTED_VERSIONS` <a id="SUPPORTED_VERSIONS"></a>
 *   `VERSION = None` <a id="VERSION"></a>
 *   `builder_configs` <a id="builder_configs"></a>
 
@@ -9,8 +9,6 @@
 <meta itemprop="property" content="as_dataset"/>
 <meta itemprop="property" content="download_and_prepare"/>
 <meta itemprop="property" content="BUILDER_CONFIGS"/>
-<meta itemprop="property" content="GOOGLE_DISABLED"/>
-<meta itemprop="property" content="IN_DEVELOPMENT"/>
 <meta itemprop="property" content="SUPPORTED_VERSIONS"/>
 <meta itemprop="property" content="VERSION"/>
 <meta itemprop="property" content="builder_configs"/>
@@ -111,7 +109,8 @@ as_dataset(
     split=None,
     batch_size=None,
     shuffle_files=None,
-    as_supervised=False
+    as_supervised=False,
+    in_memory=None
 )
 ```
 
@@ -136,6 +135,10 @@ Callers must pass arguments as keyword arguments.
     will have a 2-tuple structure `(input, label)` according to
     `builder.info.supervised_keys`. If `False`, the default, the returned
     `tf.data.Dataset` will have a dictionary with all the features.
+*   <b>`in_memory`</b>: `bool`, if `True`, loads the dataset in memory which
+    increases iteration speeds. Note that if `True` and the dataset has unknown
+    dimensions, the features will be padded to the maximum size across the
+    dataset.
 
 #### Returns:
 
@@ -173,8 +176,6 @@ Downloads and prepares dataset for reading.
 ## Class Members
 
 *   `BUILDER_CONFIGS` <a id="BUILDER_CONFIGS"></a>
-*   `GOOGLE_DISABLED = False` <a id="GOOGLE_DISABLED"></a>
-*   `IN_DEVELOPMENT = False` <a id="IN_DEVELOPMENT"></a>
 *   `SUPPORTED_VERSIONS` <a id="SUPPORTED_VERSIONS"></a>
 *   `VERSION = None` <a id="VERSION"></a>
 *   `builder_configs` <a id="builder_configs"></a>
 
@@ -9,8 +9,6 @@
 <meta itemprop="property" content="as_dataset"/>
 <meta itemprop="property" content="download_and_prepare"/>
 <meta itemprop="property" content="BUILDER_CONFIGS"/>
-<meta itemprop="property" content="GOOGLE_DISABLED"/>
-<meta itemprop="property" content="IN_DEVELOPMENT"/>
 <meta itemprop="property" content="SUPPORTED_VERSIONS"/>
 <meta itemprop="property" content="VERSION"/>
 <meta itemprop="property" content="builder_configs"/>
@@ -89,7 +87,8 @@ as_dataset(
     split=None,
     batch_size=None,
     shuffle_files=None,
-    as_supervised=False
+    as_supervised=False,
+    in_memory=None
 )
 ```
 
@@ -114,6 +113,10 @@ Callers must pass arguments as keyword arguments.
     will have a 2-tuple structure `(input, label)` according to
     `builder.info.supervised_keys`. If `False`, the default, the returned
     `tf.data.Dataset` will have a dictionary with all the features.
+*   <b>`in_memory`</b>: `bool`, if `True`, loads the dataset in memory which
+    increases iteration speeds. Note that if `True` and the dataset has unknown
+    dimensions, the features will be padded to the maximum size across the
+    dataset.
 
 #### Returns:
 
@@ -151,8 +154,6 @@ Downloads and prepares dataset for reading.
 ## Class Members
 
 *   `BUILDER_CONFIGS` <a id="BUILDER_CONFIGS"></a>
-*   `GOOGLE_DISABLED = False` <a id="GOOGLE_DISABLED"></a>
-*   `IN_DEVELOPMENT = False` <a id="IN_DEVELOPMENT"></a>
 *   `SUPPORTED_VERSIONS` <a id="SUPPORTED_VERSIONS"></a>
 *   `VERSION = None` <a id="VERSION"></a>
 *   `builder_configs` <a id="builder_configs"></a>
 
@@ -16,6 +16,10 @@ Defined in
 
 ### Used in the tutorials:
 
+*   [CycleGAN](https://www.tensorflow.org/beta/tutorials/generative/cyclegan)
+*   [Distributed training with Keras](https://www.tensorflow.org/beta/tutorials/distribute/keras)
+*   [Multi-worker Training with Estimator](https://www.tensorflow.org/beta/tutorials/distribute/multi_worker_with_estimator)
+*   [Multi-worker Training with Keras](https://www.tensorflow.org/beta/tutorials/distribute/multi_worker_with_keras)
 *   [Transfer Learning Using Pretrained ConvNets](https://www.tensorflow.org/beta/tutorials/images/transfer_learning)
 
 #### Usage:
 
@@ -46,8 +46,10 @@ __init__(
 
 Constructs a TokenTextEncoder.
 
-To load from a file saved with `TokenTextEncoder.save_to_file`, use
-`TokenTextEncoder.load_from_file`.
+To load from a file saved with
+<a href="../../../tfds/features/text/TokenTextEncoder.md#save_to_file"><code>TokenTextEncoder.save_to_file</code></a>,
+use
+<a href="../../../tfds/features/text/TokenTextEncoder.md#load_from_file"><code>TokenTextEncoder.load_from_file</code></a>.
 
 #### Args:
 
 
@@ -13,6 +13,7 @@ tfds.load(
     split=None,
     data_dir=None,
     batch_size=None,
+    in_memory=None,
     download=True,
     as_supervised=False,
     with_info=False,
@@ -31,6 +32,7 @@ Defined in [`core/registered.py`](https://github.com/tensorflow/datasets/tree/ma
 
 ### Used in the tutorials:
 
+*   [CycleGAN](https://www.tensorflow.org/beta/tutorials/generative/cyclegan)
 *   [Distributed training with Keras](https://www.tensorflow.org/beta/tutorials/distribute/keras)
 *   [Multi-worker Training with Estimator](https://www.tensorflow.org/beta/tutorials/distribute/multi_worker_with_estimator)
 *   [Multi-worker Training with Keras](https://www.tensorflow.org/beta/tutorials/distribute/multi_worker_with_keras)
@@ -42,9 +44,12 @@ Defined in [`core/registered.py`](https://github.com/tensorflow/datasets/tree/ma
 If `split=None` (the default), returns all splits for the dataset. Otherwise,
 returns the specified split.
 
-`load` is a convenience method that fetches the <a href="../tfds/core/DatasetBuilder.md"><code>tfds.core.DatasetBuilder</code></a> by
-string name, optionally calls `DatasetBuilder.download_and_prepare`
-(if `download=True`), and then calls `DatasetBuilder.as_dataset`.
+`load` is a convenience method that fetches the
+<a href="../tfds/core/DatasetBuilder.md"><code>tfds.core.DatasetBuilder</code></a>
+by string name, optionally calls
+<a href="../tfds/core/DatasetBuilder.md#download_and_prepare"><code>DatasetBuilder.download_and_prepare</code></a>
+(if `download=True`), and then calls
+<a href="../tfds/core/DatasetBuilder.md#as_dataset"><code>DatasetBuilder.as_dataset</code></a>.
 This is roughly equivalent to:
 
 ```
@@ -86,6 +91,10 @@ of hundreds of GiB to disk. Refer to the `download` argument.
 *   <b>`batch_size`</b>: `int`, if set, add a batch dimension to examples. Note
     that variable length features will be 0-padded. If `batch_size=-1`, will
     return the full dataset as `tf.Tensor`s.
+*   <b>`in_memory`</b>: `bool`, if `True`, loads the dataset in memory which
+    increases iteration speeds. Note that if `True` and the dataset has unknown
+    dimensions, the features will be padded to the maximum size across the
+    dataset.
 *   <b>`download`</b>: `bool` (optional), whether to call
     <a href="../tfds/core/DatasetBuilder.md#download_and_prepare"><code>tfds.core.DatasetBuilder.download_and_prepare</code></a>
     before calling `tf.DatasetBuilder.as_dataset`. If `False`, data is expected
 
@@ -137,7 +137,6 @@
 <meta itemprop="property" content="DATASET_CLASS"/>
 <meta itemprop="property" content="DL_EXTRACT_RESULT"/>
 <meta itemprop="property" content="EXAMPLE_DIR"/>
-<meta itemprop="property" content="INTERNAL_DATASET"/>
 <meta itemprop="property" content="MOCK_MONARCH"/>
 <meta itemprop="property" content="MOCK_OUT_FORBIDDEN_OS_FUNCTIONS"/>
 <meta itemprop="property" content="OVERLAPPING_SPLITS"/>
@@ -178,18 +177,15 @@ MOCK_OUT_FORBIDDEN_OS_FUNCTIONS: `bool`, defaults to True. Set to False to
 disable checks preventing usage of `os` or builtin functions instead of
 recommended `tf.io.gfile` API.
 
-This test case will check for the following:
- - the dataset builder is correctly registered, i.e. `tfds.load(name)` works;
- - the dataset builder can read the fake examples stored in
-     testing/test_data/fake_examples/${dataset_name};
- - the dataset builder can produce serialized data;
- - the dataset builder produces a valid Dataset object from serialized data
-   - in eager mode;
-   - in graph mode.
- - the produced Dataset examples have the expected dimensions and types;
- - the produced Dataset has and the expected number of examples;
- - a example is not part of two splits, or one of these splits is whitelisted
-     in OVERLAPPING_SPLITS.
+This test case will check for the following: - the dataset builder is correctly
+registered, i.e. <a href="../../tfds/load.md"><code>tfds.load(name)</code></a>
+works; - the dataset builder can read the fake examples stored in
+testing/test_data/fake_examples/${dataset_name}; - the dataset builder can
+produce serialized data; - the dataset builder produces a valid Dataset object
+from serialized data - in eager mode; - in graph mode. - the produced Dataset
+examples have the expected dimensions and types; - the produced Dataset has and
+the expected number of examples; - a example is not part of two splits, or one
+of these splits is whitelisted in OVERLAPPING_SPLITS.
 
 <h2 id="__init__"><code>__init__</code></h2>
 
@@ -2450,7 +2446,6 @@ Use `self.session()` or `self.cached_session()` instead.
 *   `DATASET_CLASS = None` <a id="DATASET_CLASS"></a>
 *   `DL_EXTRACT_RESULT = None` <a id="DL_EXTRACT_RESULT"></a>
 *   `EXAMPLE_DIR = None` <a id="EXAMPLE_DIR"></a>
-*   `INTERNAL_DATASET = False` <a id="INTERNAL_DATASET"></a>
 *   `MOCK_MONARCH = True` <a id="MOCK_MONARCH"></a>
 *   `MOCK_OUT_FORBIDDEN_OS_FUNCTIONS = True`
     <a id="MOCK_OUT_FORBIDDEN_OS_FUNCTIONS"></a>