tensorflow
diff --git a/‎.github/PULL_REQUEST_TEMPLATE/add_dataset.md
Lines changed: 20 additions & 0 deletions b/‎.github/PULL_REQUEST_TEMPLATE/add_dataset.md
Lines changed: 20 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 30 additions & 27 deletions b/‎README.md
Lines changed: 30 additions & 27 deletions
diff --git a/‎docs/_index.ipynb
Lines changed: 1 addition & 1 deletion b/‎docs/_index.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/_index.yaml
Lines changed: 3 additions & 3 deletions b/‎docs/_index.yaml
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/add_dataset.md
Lines changed: 9 additions & 13 deletions b/‎docs/add_dataset.md
Lines changed: 9 additions & 13 deletions
diff --git a/‎docs/api_docs/python/_toc.yaml
Lines changed: 2 additions & 0 deletions b/‎docs/api_docs/python/_toc.yaml
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/api_docs/python/index.md
Lines changed: 1 addition & 0 deletions b/‎docs/api_docs/python/index.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/api_docs/python/tfds.md
Lines changed: 3 additions & 1 deletion b/‎docs/api_docs/python/tfds.md
Lines changed: 3 additions & 1 deletion
diff --git a/‎docs/api_docs/python/tfds/ReadConfig.md
Lines changed: 113 additions & 0 deletions b/‎docs/api_docs/python/tfds/ReadConfig.md
Lines changed: 113 additions & 0 deletions
diff --git a/‎docs/api_docs/python/tfds/Split.md
Lines changed: 2 additions & 2 deletions b/‎docs/api_docs/python/tfds/Split.md
Lines changed: 2 additions & 2 deletions
@@ -0,0 +1,20 @@
+# Add Dataset
+
+* Dataset Name: <name>
+* Issue Reference: <link>
+* `dataset_info.json` Gist: <link>
+  
+## Description
+
+<description>
+  
+## Checklist
+* [ ] Address all TODO's
+* [ ] Add alphabetized import to subdirectory's `__init__.py`
+* [ ] Run `download_and_prepare` successfully
+* [ ] Add checksums file
+* [ ] Properly cite in `BibTeX` format
+* [ ] Add passing test(s)
+* [ ] Add test data
+* [ ] Add data generation script (if applicable)
+* [ ] Lint code
@@ -46,8 +46,7 @@ to receive updates on the project.
 import tensorflow_datasets as tfds
 import tensorflow as tf
 
-# tfds works in both Eager and Graph modes
-tf.compat.v1.enable_eager_execution()
+# Here we assume Eager mode is enabled (TF2), but tfds also works in Graph mode.
 
 # See available datasets
 print(tfds.list_builders())
@@ -92,32 +91,36 @@ ds = mnist_builder.as_dataset(split='train')
 # dataset and its features
 info = mnist_builder.info
 print(info)
+```
+
+This will print the dataset info content:
 
-    tfds.core.DatasetInfo(
-        name='mnist',
-        version=1.0.0,
-        description='The MNIST database of handwritten digits.',
-        homepage='http://yann.lecun.com/exdb/mnist/',
-        features=FeaturesDict({
-            'image': Image(shape=(28, 28, 1), dtype=tf.uint8),
-            'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=10)
-        },
-        total_num_examples=70000,
-        splits={
-            'test': <tfds.core.SplitInfo num_examples=10000>,
-            'train': <tfds.core.SplitInfo num_examples=60000>
-        },
-        supervised_keys=('image', 'label'),
-        citation='"""
-            @article{lecun2010mnist,
-              title={MNIST handwritten digit database},
-              author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
-              journal={ATT Labs [Online]. Available: http://yann. lecun. com/exdb/mnist},
-              volume={2},
-              year={2010}
-            }
-      """',
-  )
+```
+tfds.core.DatasetInfo(
+    name='mnist',
+    version=1.0.0,
+    description='The MNIST database of handwritten digits.',
+    homepage='http://yann.lecun.com/exdb/mnist/',
+    features=FeaturesDict({
+        'image': Image(shape=(28, 28, 1), dtype=tf.uint8),
+        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=10)
+    },
+    total_num_examples=70000,
+    splits={
+        'test': <tfds.core.SplitInfo num_examples=10000>,
+        'train': <tfds.core.SplitInfo num_examples=60000>
+    },
+    supervised_keys=('image', 'label'),
+    citation='"""
+        @article{lecun2010mnist,
+          title={MNIST handwritten digit database},
+          author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
+          journal={ATT Labs [Online]. Available: http://yann. lecun. com/exdb/mnist},
+          volume={2},
+          year={2010}
+        }
+    """',
+)
 ```
 
 You can also get details about the classes (number of classes and their names).
 
@@ -37,7 +37,7 @@
         "from __future__ import division\n",
         "from __future__ import print_function\n",
         "\n",
-        "import tensorflow as tf\n",
+        "import tensorflow.compat.v2 as tf\n",
         "import tensorflow_datasets as tfds\n",
         "\n",
         "# tfds works in both Eager and Graph modes\n",
 
@@ -23,7 +23,7 @@ landing_page:
         <a href="./datasets">list of datasets</a>.
     - code_block: |
         <pre class = "prettyprint">
-        import tensorflow as tf
+        import tensorflow.compat.v2 as tf
         import tensorflow_datasets as tfds
 
         # tfds works in both Eager and Graph modes
@@ -48,10 +48,10 @@ landing_page:
     items:
     - heading: Introducing TensorFlow Datasets
       image_path: /resources/images/tf-logo-card-16x9.png
-      path: https://github.com/tensorflow/datasets/blob/master/docs/announce_proxy.md
+      path: https://blog.tensorflow.org/2019/02/introducing-tensorflow-datasets.html
       buttons:
       - label: Read on TensorFlow Blog
-        path: https://github.com/tensorflow/datasets/blob/master/docs/announce_proxy.md
+        path: https://blog.tensorflow.org/2019/02/introducing-tensorflow-datasets.html
     - heading: TensorFlow Datasets on GitHub
       image_path: /resources/images/github-card-16x9.png
       path: https://github.com/tensorflow/datasets
 
@@ -30,8 +30,7 @@ isn't already added.
     *   [3. Double-check the citation](#3-double-check-the-citation)
     *   [4. Add a test](#4-add-a-test)
     *   [5. Check your code style](#5-check-your-code-style)
-    *   [6. Add release notes](#6-add-release-notes)
-    *   [7. Send for review!](#7-send-for-review)
+    *   [6. Send for review!](#6-send-for-review)
 *   [Define the dataset outside TFDS](#define-the-dataset-outside-tfds)
 *   [Large datasets and distributed generation](#large-datasets-and-distributed-generation)
 *   [Testing `MyDataset`](#testing-mydataset)
@@ -312,7 +311,7 @@ additional dependencies only as needed, use `tfds.core.lazy_imports`.
 To use `lazy_imports`:
 
 *   Add an entry for your dataset into `DATASET_EXTRAS` in
-    [`setup.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/setup.py).
+    [`setup.py`](https://github.com/tensorflow/datasets/tree/master/setup.py).
     This makes it so that users can do, for example, `pip install
     'tensorflow-datasets[svhn]'` to install the extra dependencies.
 *   Add an entry for your import to
@@ -543,7 +542,7 @@ except TensorFlow uses 2 spaces instead of 4. Please conform to the
 [Google Python Style Guide](https://github.com/google/styleguide/blob/gh-pages/pyguide.md),
 
 Most importantly, use
-[`tensorflow_datasets/oss_scripts/lint.sh`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/oss_scripts/lint.sh)
+[`tensorflow_datasets/oss_scripts/lint.sh`](https://github.com/tensorflow/datasets/tree/master/oss_scripts/lint.sh)
 to ensure your code is properly formatted. For example, to lint the `image`
 directory:
 
@@ -555,13 +554,7 @@ See
 [TensorFlow code style guide](https://www.tensorflow.org/community/contribute/code_style)
 for more information.
 
-### 6. Add release notes
-
-Add the dataset to the
-[release notes](https://github.com/tensorflow/datasets/tree/master/docs/release_notes.md).
-The release note will be published for the next release.
-
-### 7. Send for review!
+### 6. Send for review!
 
 Send the pull request for review.
 
@@ -586,7 +579,7 @@ To create this checksum file the first time, you can use the
 `tensorflow_datasets.scripts.download_and_prepare` script and pass the flags
 `--register_checksums --checksums_dir=/path/to/checksums_dir`.
 
-### 2. Adjust the fake example direcory
+### 2. Adjust the fake example directory
 
 For testing, instead of using the default
 [fake example directory](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/testing/test_data/fake_examples)
@@ -595,7 +588,7 @@ you can define your own by setting the `EXAMPLE_DIR` property of
 
 ```
 class MyDatasetTest(tfds.testing.DatasetBuilderTestCase):
-  EXAMPLE_DIR = 'path/to/fakedata'`
+  EXAMPLE_DIR = 'path/to/fakedata'
 ```
 
 ## Large datasets and distributed generation
@@ -617,6 +610,9 @@ as downloaded and extracted. It can be created manually or automatically with a
 script
 ([example script](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/testing/cifar.py)).
 
+If you're using automation to generate the test data, please include that script
+in [`testing`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/testing).
+
 Make sure to use different data in your test data splits, as the test will
 fail if your dataset splits overlap.
 
 
@@ -17,6 +17,8 @@ toc:
     path: /datasets/api_docs/python/tfds/load
   - title: percent
     path: /datasets/api_docs/python/tfds/percent
+  - title: ReadConfig
+    path: /datasets/api_docs/python/tfds/ReadConfig
   - title: show_examples
     path: /datasets/api_docs/python/tfds/show_examples
   - title: Split
 
@@ -4,6 +4,7 @@
 
 *   <a href="./tfds.md"><code>tfds</code></a>
 *   <a href="./tfds/download/GenerateMode.md"><code>tfds.GenerateMode</code></a>
+*   <a href="./tfds/ReadConfig.md"><code>tfds.ReadConfig</code></a>
 *   <a href="./tfds/Split.md"><code>tfds.Split</code></a>
 *   <a href="./tfds/as_numpy.md"><code>tfds.as_numpy</code></a>
 *   <a href="./tfds/builder.md"><code>tfds.builder</code></a>
 
@@ -53,6 +53,8 @@ converting various units.
 
 [`class GenerateMode`](./tfds/download/GenerateMode.md): `Enum` for how to treat pre-existing downloads and data.
 
+[`class ReadConfig`](./tfds/ReadConfig.md): Configures input reading pipeline.
+
 [`class Split`](./tfds/Split.md): `Enum` for dataset splits.
 
 [`class percent`](./tfds/percent.md): Syntactic sugar for defining slice subsplits: `tfds.percent[75:-5]`.
@@ -78,4 +80,4 @@ from an image classification dataset.
 
 ## Other Members
 
-*   `__version__ = '1.3.0'` <a id="__version__"></a>
+*   `__version__ = '1.3.2'` <a id="__version__"></a>
@@ -0,0 +1,113 @@
+<div itemscope itemtype="http://developers.google.com/ReferenceObject">
+<meta itemprop="name" content="tfds.ReadConfig" />
+<meta itemprop="path" content="Stable" />
+<meta itemprop="property" content="__eq__"/>
+<meta itemprop="property" content="__ge__"/>
+<meta itemprop="property" content="__gt__"/>
+<meta itemprop="property" content="__init__"/>
+<meta itemprop="property" content="__le__"/>
+<meta itemprop="property" content="__lt__"/>
+<meta itemprop="property" content="__ne__"/>
+</div>
+
+# tfds.ReadConfig
+
+<!-- Insert buttons and diff -->
+
+<table class="tfo-notebook-buttons tfo-api" align="left">
+</table>
+
+<a target="_blank" href="https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/utils/read_config.py">View
+source</a>
+
+<!-- Equality marker -->
+## Class `ReadConfig`
+
+Configures input reading pipeline.
+
+<!-- Placeholder for "Used in" -->
+
+#### Attributes:
+
+*   <b>`options`</b>: `tf.data.Options()`, dataset options. Those options are
+    added to the default values defined in `tfrecord_reader.py`. Note that when
+    `shuffle_files` is True and no seed is defined, experimental_deterministic
+    will be set to False internally, unless it is defined here.
+*   <b>`shuffle_seed`</b>: `tf.int64`, seeds forwarded to
+    `tf.data.Dataset.shuffle` when `shuffle_files=True`.
+*   <b>`shuffle_reshuffle_each_iteration`</b>: `bool`, forwarded to
+    `tf.data.Dataset.shuffle` when `shuffle_files=True`.
+*   <b>`interleave_parallel_reads`</b>: `int`, forwarded to
+    `tf.data.Dataset.interleave`. Default to 16.
+*   <b>`interleave_block_length`</b>: `int`, forwarded to
+    `tf.data.Dataset.interleave`. Default to 16.
+*   <b>`experimental_interleave_sort_fn`</b>: Function with signature
+    `List[FileDict] -> List[FileDict]`, which takes the list of `dict(file: str,
+    take: int, skip: int)` and returns the modified version to read. This can be
+    used to sort/shuffle the shards to read in a custom order, instead of
+    relying on `shuffle_files=True`.
+
+<h2 id="__init__"><code>__init__</code></h2>
+
+```python
+__init__(
+    options=NOTHING,
+    shuffle_seed=attr_dict['shuffle_seed'].default,
+    shuffle_reshuffle_each_iteration=attr_dict['shuffle_reshuffle_each_iteration'].default,
+    interleave_parallel_reads=attr_dict['interleave_parallel_reads'].default,
+    interleave_block_length=attr_dict['interleave_block_length'].default,
+    experimental_interleave_sort_fn=attr_dict['experimental_interleave_sort_fn'].default
+)
+```
+
+Initialize self. See help(type(self)) for accurate signature.
+
+## Methods
+
+<h3 id="__eq__"><code>__eq__</code></h3>
+
+```python
+__eq__(other)
+```
+
+Return self==value.
+
+<h3 id="__ge__"><code>__ge__</code></h3>
+
+```python
+__ge__(other)
+```
+
+Automatically created by attrs.
+
+<h3 id="__gt__"><code>__gt__</code></h3>
+
+```python
+__gt__(other)
+```
+
+Automatically created by attrs.
+
+<h3 id="__le__"><code>__le__</code></h3>
+
+```python
+__le__(other)
+```
+
+Automatically created by attrs.
+
+<h3 id="__lt__"><code>__lt__</code></h3>
+
+```python
+__lt__(other)
+```
+
+Automatically created by attrs.
+
+<h3 id="__ne__"><code>__ne__</code></h3>
+
+```python
+__ne__(other)
+```
+
+Check equality and either forward a NotImplemented or return the result negated.
@@ -10,17 +10,17 @@
 
 # tfds.Split
 
-<!-- Insert buttons -->
+<!-- Insert buttons and diff -->
 
 <table class="tfo-notebook-buttons tfo-api" align="left">
 </table>
 
 <a target="_blank" href="https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/splits.py">View
 source</a>
 
+<!-- Equality marker -->
 ## Class `Split`
 
-<!-- Start diff -->
 `Enum` for dataset splits.
 
 <!-- Placeholder for "Used in" -->