tensorflow
diff --git a/‎setup.py
Lines changed: 1 addition & 0 deletions b/‎setup.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎tensorflow_datasets/audio/librispeech_test.py
Lines changed: 9 additions & 9 deletions b/‎tensorflow_datasets/audio/librispeech_test.py
Lines changed: 9 additions & 9 deletions
diff --git a/‎tensorflow_datasets/audio/nsynth.py
Lines changed: 45 additions & 22 deletions b/‎tensorflow_datasets/audio/nsynth.py
Lines changed: 45 additions & 22 deletions
diff --git a/‎tensorflow_datasets/audio/nsynth_test.py
Lines changed: 34 additions & 0 deletions b/‎tensorflow_datasets/audio/nsynth_test.py
Lines changed: 34 additions & 0 deletions
diff --git a/‎tensorflow_datasets/core/download/downloader.py
Lines changed: 17 additions & 5 deletions b/‎tensorflow_datasets/core/download/downloader.py
Lines changed: 17 additions & 5 deletions
diff --git a/‎tensorflow_datasets/core/download/downloader_test.py
Lines changed: 33 additions & 5 deletions b/‎tensorflow_datasets/core/download/downloader_test.py
Lines changed: 33 additions & 5 deletions
diff --git a/‎tensorflow_datasets/core/download/kaggle.py
Lines changed: 3 additions & 1 deletion b/‎tensorflow_datasets/core/download/kaggle.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎tensorflow_datasets/image/__init__.py
Lines changed: 1 addition & 0 deletions b/‎tensorflow_datasets/image/__init__.py
Lines changed: 1 addition & 0 deletions
@@ -35,6 +35,7 @@
 REQUIRED_PKGS = [
     'absl-py',
     'future',
+    'numpy',
     'promise',
     'protobuf>=3.6.1',
     'requests',
 
@@ -20,40 +20,40 @@
 from __future__ import print_function
 
 from tensorflow_datasets import testing
+from tensorflow_datasets.audio import librispeech
 import tensorflow_datasets.public_api as tfds
-from tensorflow_datasets.audio import librispeech 
 
 
-class LibrispeechTest(testing.DatasetBuilderTestCase):
+class LibrispeechTest100(testing.DatasetBuilderTestCase):
   DATASET_CLASS = librispeech.Librispeech
   BUILDER_CONFIG_NAMES_TO_TEST = ["clean-100"]
   SPLITS = {
       "train": 2,
       "test": 1,
       "dev": 1,
   }
-  
+
   DL_EXTRACT_RESULT = {
       tfds.Split.TRAIN: ["train-clean-100"],
       tfds.Split.TEST: ["test-clean"],
       tfds.Split.VALIDATION: ["dev-clean"],
   }
-  
-  
-class LibrispeechTest(testing.DatasetBuilderTestCase):
+
+
+class LibrispeechTest360(testing.DatasetBuilderTestCase):
   DATASET_CLASS = librispeech.Librispeech
   BUILDER_CONFIG_NAMES_TO_TEST = ["clean-360"]
   SPLITS = {
       "train": 1,
       "test": 1,
       "dev": 1,
   }
-  
+
   DL_EXTRACT_RESULT = {
-      tfds.Split.TRAIN: ["train-clean-100", "train-clean-360"], 
+      tfds.Split.TRAIN: ["train-clean-100", "train-clean-360"],
       tfds.Split.TEST: ["test-clean"],
       tfds.Split.VALIDATION: ["dev-clean"],
-  }  
+  }
 
 
 if __name__ == "__main__":
 
@@ -26,7 +26,7 @@
 _DESCRIPTION = """\
 The NSynth Dataset is an audio dataset containing ~300k musical notes, each
 with a unique pitch, timbre, and envelope. Each note is annotated with three
-additional pieces of information based on a combination of human evaluation 
+additional pieces of information based on a combination of human evaluation
 and heuristic algorithms:
  -Source: The method of sound production for the note's instrument.
  -Family: The high-level family of which the note's instrument is a member.
@@ -63,8 +63,16 @@
     "string", "synth_lead", "vocal"]
 _INSTRUMENT_SOURCES = ["acoustic", "electronic", "synthetic"]
 _QUALITIES = [
-    "bright", "dark", "distortion", "fast_decay", "long_release", "multiphonic",
-    "nonlinear_env", "percussive", "reverb", "tempo-synced"]
+    "bright",
+    "dark",
+    "distortion",
+    "fast_decay",
+    "long_release",
+    "multiphonic",
+    "nonlinear_env",
+    "percussive",
+    "reverb",
+    "tempo-synced"]
 
 _BASE_DOWNLOAD_PATH = "http://download.magenta.tensorflow.org/datasets/nsynth/nsynth-"
 
@@ -86,11 +94,14 @@ def _info(self):
         builder=self,
         description=_DESCRIPTION,
         features=tfds.features.FeaturesDict({
-            "id": tf.string,
-            "audio": tfds.features.Tensor(
-                shape=(_SAMPLE_LENGTH,), dtype=tf.float32),
-            "pitch": tfds.features.ClassLabel(num_classes=128),
-            "velocity": tfds.features.ClassLabel(num_classes=128),
+            "id":
+                tf.string,
+            "audio":
+                tfds.features.Tensor(shape=(_SAMPLE_LENGTH,), dtype=tf.float32),
+            "pitch":
+                tfds.features.ClassLabel(num_classes=128),
+            "velocity":
+                tfds.features.ClassLabel(num_classes=128),
             "instrument": {
                 # We read the list of labels in _split_generators.
                 "label": tfds.features.ClassLabel(num_classes=1006),
@@ -105,17 +116,20 @@ def _info(self):
 
   def _split_generators(self, dl_manager):
     dl_urls = {
-        split: _BASE_DOWNLOAD_PATH + "%s.tfrecord" % split for split in _SPLITS}
-    dl_urls["instrument_labels"] = _BASE_DOWNLOAD_PATH + "instrument_labels.txt"
+        split: _BASE_DOWNLOAD_PATH + "%s.tfrecord" % split for split in _SPLITS
+    }
+    dl_urls["instrument_labels"] = (_BASE_DOWNLOAD_PATH +
+                                    "instrument_labels.txt")
     dl_paths = dl_manager.download_and_extract(dl_urls)
 
-    instrument_labels = tf.io.gfile.GFile(
-        dl_paths["instrument_labels"], "r").read().strip().split("\n")
+    instrument_labels = tf.io.gfile.GFile(dl_paths["instrument_labels"],
+                                          "r").read().strip().split("\n")
     self.info.features["instrument"]["label"].names = instrument_labels
 
     return [
-        tfds.core.SplitGenerator(
-            name=split, num_shards=_SPLIT_SHARDS[split],
+        tfds.core.SplitGenerator(  # pylint: disable=g-complex-comprehension
+            name=split,
+            num_shards=_SPLIT_SHARDS[split],
             gen_kwargs={"path": dl_paths[split]}) for split in _SPLITS
     ]
 
@@ -126,15 +140,24 @@ def _generate_examples(self, path):
       example = tf.train.Example.FromString(example_str)
       features = example.features.feature
       yield {
-          "id": features["note_str"].bytes_list.value[0],
-          "audio": np.array(
-              features["audio"].float_list.value, dtype=np.float32),
-          "pitch": features["pitch"].int64_list.value[0],
-          "velocity": features["velocity"].int64_list.value[0],
+          "id":
+              features["note_str"].bytes_list.value[0],
+          "audio":
+              np.array(features["audio"].float_list.value, dtype=np.float32),
+          "pitch":
+              features["pitch"].int64_list.value[0],
+          "velocity":
+              features["velocity"].int64_list.value[0],
           "instrument": {
-              "label": features["instrument_str"].bytes_list.value[0],
-              "family": features["instrument_family_str"].bytes_list.value[0],
-              "source": features["instrument_source_str"].bytes_list.value[0]
+              "label":
+                  tf.compat.as_text(
+                      features["instrument_str"].bytes_list.value[0]),
+              "family":
+                  tf.compat.as_text(
+                      features["instrument_family_str"].bytes_list.value[0]),
+              "source":
+                  tf.compat.as_text(
+                      features["instrument_source_str"].bytes_list.value[0])
           },
           "qualities": {
               q: features["qualities"].int64_list.value[i]
 
@@ -0,0 +1,34 @@
+# coding=utf-8
+# Copyright 2019 The TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Nsynth Dataset Builder test."""
+from tensorflow_datasets.audio import nsynth
+import tensorflow_datasets.testing as tfds_test
+
+
+class NsynthTest(tfds_test.DatasetBuilderTestCase):
+  """Test Nsynth."""
+  DATASET_CLASS = nsynth.Nsynth
+  SPLITS = {"train": 3, "test": 3, "valid": 3}
+  DL_EXTRACT_RESULT = {
+      "train": "nsynth-train.tfrecord",
+      "test": "nsynth-test.tfrecord",
+      "valid": "nsynth-valid.tfrecord",
+      "instrument_labels": "nsynth-instrument_labels.txt"
+  }
+
+
+if __name__ == "__main__":
+  tfds_test.test_main()
@@ -24,12 +24,13 @@
 import io
 import os
 import re
-
 import concurrent.futures
 import promise
 import requests
-import tensorflow as tf
 
+from six.moves import urllib
+
+import tensorflow as tf
 from tensorflow_datasets.core import units
 from tensorflow_datasets.core import utils
 from tensorflow_datasets.core.download import kaggle
@@ -90,7 +91,9 @@ def tqdm(self):
         yield
 
   def download(self, url_info, destination_path):
-    """Download url to given path. Returns Promise -> sha256 of downloaded file.
+    """Download url to given path.
+
+    Returns Promise -> sha256 of downloaded file.
 
     Args:
       url_info: `UrlInfo`, resource to download.
@@ -139,11 +142,21 @@ def _sync_file_copy(self, filepath, destination_path):
         out_path, checksum_cls=self._checksumer)
     return hexdigest, size
 
+  def _sync_ftp_download(self, url, destination_path):
+    out_path = os.path.join(destination_path, download_util.get_file_name(url))
+    urllib.request.urlretrieve(url, out_path)
+    hexdigest, size = utils.read_checksum_digest(
+        out_path, checksum_cls=self._checksumer)
+    return hexdigest, size
+
   def _sync_download(self, url, destination_path):
     """Synchronous version of `download` method."""
     if kaggle.KaggleFile.is_kaggle_url(url):
       return self._sync_kaggle_download(url, destination_path)
 
+    if url.startswith('ftp'):
+      return self._sync_ftp_download(url, destination_path)
+
     try:
       # If url is on a filesystem that gfile understands, use copy. Otherwise,
       # use requests.
@@ -166,8 +179,7 @@ def _sync_download(self, url, destination_path):
     size_mb = 0
     unit_mb = units.MiB
     self._pbar_dl_size.update_total(
-        int(response.headers.get('Content-length', 0)) // unit_mb
-    )
+        int(response.headers.get('Content-length', 0)) // unit_mb)
     with tf.io.gfile.GFile(path, 'wb') as file_:
       checksum = self._checksumer()
       for block in response.iter_content(chunk_size=io.DEFAULT_BUFFER_SIZE):
 
@@ -64,14 +64,19 @@ def setUp(self):
         'get',
         lambda *a, **kw: _FakeResponse(self.url, self.response, self.cookies),
     ).start()
-    absltest.mock.patch.object(
-        downloader.requests.Session,
-        'get',
-        lambda *a, **kw: _FakeResponse(self.url, self.response, self.cookies),
-    ).start()
     self.downloader._pbar_url = absltest.mock.MagicMock()
     self.downloader._pbar_dl_size = absltest.mock.MagicMock()
 
+    def write_fake_ftp_result(_, filename):
+      with open(filename, 'wb') as result:
+        result.write(self.response)
+
+    absltest.mock.patch.object(
+        downloader.urllib.request,
+        'urlretrieve',
+        write_fake_ftp_result,
+    ).start()
+
   def test_ok(self):
     promise = self.downloader.download(self.resource, self.tmp_dir)
     checksum, _ = promise.get()
@@ -122,6 +127,28 @@ def test_kaggle_api(self):
       with tf.io.gfile.GFile(os.path.join(self.tmp_dir, fname)) as f:
         self.assertEqual(fname, f.read())
 
+  def test_ftp(self):
+    resource = resource_lib.Resource(
+        url='ftp://username:password@example.com/foo.tar.gz')
+    promise = self.downloader.download(resource, self.tmp_dir)
+    checksum, _ = promise.get()
+    self.assertEqual(checksum, self.resp_checksum)
+    with open(self.path, 'rb') as result:
+      self.assertEqual(result.read(), self.response)
+    self.assertFalse(tf.io.gfile.exists(self.incomplete_path))
+
+  def test_ftp_error(self):
+    error = downloader.urllib.error.URLError('Problem serving file.')
+    absltest.mock.patch.object(
+        downloader.urllib.request,
+        'urlretrieve',
+        side_effect=error,
+    ).start()
+    resource = resource_lib.Resource(url='ftp://example.com/foo.tar.gz')
+    promise = self.downloader.download(resource, self.tmp_dir)
+    with self.assertRaises(downloader.urllib.error.URLError):
+      promise.get()
+
 
 class GetFilenameTest(testing.TestCase):
 
@@ -139,5 +166,6 @@ def test_headers(self):
     res = downloader._get_filename(resp)
     self.assertEqual(res, 'hello.zip')
 
+
 if __name__ == '__main__':
   testing.test_main()
@@ -101,7 +101,9 @@ def competition_files(self):
         self._competition_name,
     ]
     output = _run_kaggle_command(command, self._competition_name)
-    return sorted([line.split(",")[0] for line in output.split("\n")[1:]])
+    return sorted([
+        line.split(",")[0] for line in output.split("\n")[1:] if line
+    ])
 
   @utils.memoized_property
   def competition_urls(self):
 
@@ -30,6 +30,7 @@
 from tensorflow_datasets.image.image_folder import ImageLabelFolder
 from tensorflow_datasets.image.imagenet import Imagenet2012
 from tensorflow_datasets.image.lsun import Lsun
+from tensorflow_datasets.image.mnist import EMNIST
 from tensorflow_datasets.image.mnist import FashionMNIST
 from tensorflow_datasets.image.mnist import KMNIST
 from tensorflow_datasets.image.mnist import MNIST