Adds 'shapes3d' data set.

obachem · copybara-github · commit 32aa0406165a · 2019-03-26T08:53:09.000-07:00
PiperOrigin-RevId: 240356128
diff --git a/tensorflow_datasets/image/__init__.py b/tensorflow_datasets/image/__init__.py
@@ -41,5 +41,6 @@
 from tensorflow_datasets.image.open_images import OpenImagesV4
 from tensorflow_datasets.image.quickdraw import QuickdrawBitmap
 from tensorflow_datasets.image.rock_paper_scissors import RockPaperScissors
+from tensorflow_datasets.image.shapes3d import Shapes3d
 from tensorflow_datasets.image.svhn import SvhnCropped
 from tensorflow_datasets.image.voc import Voc2007
diff --git a/tensorflow_datasets/image/shapes3d.py b/tensorflow_datasets/image/shapes3d.py
@@ -0,0 +1,172 @@
+# coding=utf-8
+# Copyright 2019 The TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Shapes3D dataset."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tempfile
+
+import h5py
+import numpy as np
+from six import moves
+import tensorflow as tf
+
+import tensorflow_datasets.public_api as tfds
+
+_CITATION = """\
+@misc{3dshapes18,
+  title={3D Shapes Dataset},
+  author={Burgess, Chris and Kim, Hyunjik},
+  howpublished={https://github.com/deepmind/3dshapes-dataset/},
+  year={2018}
+}
+"""
+
+_URL = ("https://storage.googleapis.com/3d-shapes/3dshapes.h5")
+
+_DESCRIPTION = """\
+3dshapes is a dataset of 3D shapes procedurally generated from 6 ground truth
+independent latent factors. These factors are *floor colour*, *wall colour*, *object colour*,
+*scale*, *shape* and *orientation*.
+
+All possible combinations of these latents are present exactly once, generating N = 480000 total images.
+
+### Latent factor values
+
+*   floor hue: 10 values linearly spaced in [0, 1]
+*   wall hue: 10 values linearly spaced in [0, 1]
+*   object hue: 10 values linearly spaced in [0, 1]
+*   scale: 8 values linearly spaced in [0, 1]
+*   shape: 4 values in [0, 1, 2, 3]
+*   orientation: 15 values linearly spaced in [-30, 30]
+
+We varied one latent at a time (starting from orientation, then shape, etc), and sequentially stored the images in fixed order in the `images` array. The corresponding values of the factors are stored in the same order in the `labels` array.
+"""
+
+
+class Shapes3d(tfds.core.GeneratorBasedBuilder):
+  """Shapes3d data set."""
+
+  VERSION = tfds.core.Version("0.1.0")
+
+  def _info(self):
+    return tfds.core.DatasetInfo(
+        builder=self,
+        description=_DESCRIPTION,
+        features=tfds.features.FeaturesDict({
+            "image":
+                tfds.features.Image(shape=(64, 64, 3)),
+            "label_floor_hue":
+                tfds.features.ClassLabel(num_classes=10),
+            "label_wall_hue":
+                tfds.features.ClassLabel(num_classes=10),
+            "label_object_hue":
+                tfds.features.ClassLabel(num_classes=10),
+            "label_scale":
+                tfds.features.ClassLabel(num_classes=8),
+            "label_shape":
+                tfds.features.ClassLabel(num_classes=4),
+            "label_orientation":
+                tfds.features.ClassLabel(num_classes=15),
+            "value_floor_hue":
+                tfds.features.Tensor(shape=[], dtype=tf.float32),
+            "value_wall_hue":
+                tfds.features.Tensor(shape=[], dtype=tf.float32),
+            "value_object_hue":
+                tfds.features.Tensor(shape=[], dtype=tf.float32),
+            "value_scale":
+                tfds.features.Tensor(shape=[], dtype=tf.float32),
+            "value_shape":
+                tfds.features.Tensor(shape=[], dtype=tf.float32),
+            "value_orientation":
+                tfds.features.Tensor(shape=[], dtype=tf.float32),
+        }),
+        urls=["https://github.com/deepmind/3d-shapes"],
+        citation=_CITATION,
+    )
+
+  def _split_generators(self, dl_manager):
+    filepath = dl_manager.download(_URL)
+
+    # There is no predefined train/val/test split for this dataset.
+    return [
+        tfds.core.SplitGenerator(
+            name=tfds.Split.TRAIN,
+            num_shards=1,
+            gen_kwargs=dict(filepath=filepath)),
+    ]
+
+  def _generate_examples(self, filepath):
+    """Generate examples for the Shapes3d dataset.
+
+    Args:
+      filepath: path to the Shapes3d hdf5 file.
+
+    Yields:
+      Dictionaries with images and the different labels.
+    """
+    # Simultaneously iterating through the different data sets in the hdf5
+    # file will be slow with a single file. Instead, we first load everything
+    # into memory before yielding the samples.
+    image_array, values_array = _load_data(filepath)
+
+    # We need to calculate the class labels from the float values in the file.
+    labels_array = np.zeros_like(values_array, dtype=np.int64)
+    for i in range(values_array.shape[1]):
+      labels_array[:, i] = _discretize(values_array[:, i])
+
+    for image, labels, values in moves.zip(image_array, labels_array,
+                                           values_array):
+      yield {
+          "image": image,
+          "label_floor_hue": labels[0],
+          "label_wall_hue": labels[1],
+          "label_object_hue": labels[2],
+          "label_scale": labels[3],
+          "label_shape": labels[4],
+          "label_orientation": labels[5],
+          "value_floor_hue": values[0],
+          "value_wall_hue": values[1],
+          "value_object_hue": values[2],
+          "value_scale": values[3],
+          "value_shape": values[4],
+          "value_orientation": values[5],
+      }
+
+
+def _load_data(filepath):
+  """Loads the images and latent values into Numpy arrays."""
+  with h5py.File(filepath, "r") as h5dataset:
+    image_array = np.array(h5dataset["images"])
+    # The 'label' data set in the hdf5 file actually contains the float values
+    # and not the class labels.
+    values_array = np.array(h5dataset["labels"])
+  return image_array, values_array
+
+
+
+
+def _discretize(a):
+  """Discretizes array values to class labels."""
+  arr = np.asarray(a)
+  index = np.argsort(arr)
+  inverse_index = np.zeros(arr.size, dtype=np.intp)
+  inverse_index[index] = np.arange(arr.size, dtype=np.intp)
+  arr = arr[index]
+  obs = np.r_[True, arr[1:] != arr[:-1]]
+  return obs.cumsum()[inverse_index] - 1
diff --git a/tensorflow_datasets/image/shapes3d_test.py b/tensorflow_datasets/image/shapes3d_test.py
@@ -0,0 +1,27 @@
+# coding=utf-8
+# Copyright 2019 The TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from tensorflow_datasets.image import shapes3d
+import tensorflow_datasets.testing as tfds_test
+
+
+class Shapes3dTest(tfds_test.DatasetBuilderTestCase):
+  DATASET_CLASS = shapes3d.Shapes3d
+  SPLITS = {"train": 5}
+  DL_EXTRACT_RESULT = "3dshapes.h5"
+
+
+if __name__ == "__main__":
+  tfds_test.test_main()
diff --git a/tensorflow_datasets/testing/shapes3d.py b/tensorflow_datasets/testing/shapes3d.py
@@ -0,0 +1,92 @@
+# coding=utf-8
+# Copyright 2019 The TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+r"""Generate Shapes3d-like files, smaller and with random data.
+
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from absl import app
+from absl import flags
+import h5py
+import numpy as np
+
+from tensorflow_datasets.core.utils import py_utils
+from tensorflow_datasets.testing import test_utils
+
+NUM_IMAGES = 5
+FACTOR_VALUES = [[0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
+                 [0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
+                 [0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
+                 [
+                     0.75, 0.82142857, 0.89285714, 0.96428571, 1.03571429,
+                     1.10714286, 1.17857143, 1.25
+                 ], [0., 1., 2., 3.],
+                 [
+                     -30., -25.71428571, -21.42857143, -17.14285714,
+                     -12.85714286, -8.57142857, -4.28571429, 0., 4.28571429,
+                     8.57142857, 12.85714286, 17.14285714, 21.42857143,
+                     25.71428571, 30.
+                 ]]
+OUTPUT_NAME = "3dshapes.h5"
+
+flags.DEFINE_string("tfds_dir", py_utils.tfds_dir(),
+                    "Path to tensorflow_datasets directory")
+FLAGS = flags.FLAGS
+
+
+def _create_fake_samples():
+  """Creates a fake set of samples.
+
+  Returns:
+    Tuple with fake images and fake latent values.
+  """
+  rs = np.random.RandomState(0)
+  images = rs.randint(256, size=(NUM_IMAGES, 64, 64, 3)).astype("uint8")
+  values = []
+  for factor_values in FACTOR_VALUES:
+    values.append(rs.choice(factor_values, size=(NUM_IMAGES)))
+
+  return images, np.transpose(values)
+
+
+def _generate():
+  """Generates a fake data set and writes it to the fake_examples directory."""
+  output_dir = os.path.join(FLAGS.tfds_dir, "testing", "test_data",
+                            "fake_examples", "shapes3d")
+  test_utils.remake_dir(output_dir)
+
+  images, values = _create_fake_samples()
+
+  with h5py.File(os.path.join(output_dir, OUTPUT_NAME), "w") as f:
+    img_dataset = f.create_dataset("images", images.shape, "|u1")
+    img_dataset.write_direct(images)
+    values_dataset = f.create_dataset("labels", values.shape, "<f8")
+    values_dataset.write_direct(np.ascontiguousarray(values))
+
+
+def main(argv):
+  if len(argv) > 1:
+    raise app.UsageError("Too many command-line arguments.")
+  _generate()
+
+
+if __name__ == "__main__":
+  app.run(main)
diff --git a/tensorflow_datasets/testing/test_data/fake_examples/shapes3d/3dshapes.h5 b/tensorflow_datasets/testing/test_data/fake_examples/shapes3d/3dshapes.h5
diff --git a/tensorflow_datasets/url_checksums/shapes3d.txt b/tensorflow_datasets/url_checksums/shapes3d.txt
@@ -0,0 +1 @@
+https://storage.googleapis.com/3d-shapes/3dshapes.h5 267573662 0a0f6ed98baff276a50f3a081a7434d788da63cb135a98189b2a5b5769be1785

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+https://storage.googleapis.com/3d-shapes/3dshapes.h5 267573662 0a0f6ed98baff276a50f3a081a7434d788da63cb135a98189b2a5b5769be1785`