apply the suggestions from the review; no numpy; only GFile;

hannesrichter · hannesrichter · commit c28a63fa9d9f · 2019-11-21T10:50:33.000+01:00
diff --git a/tensorflow_datasets/image/deep_weeds.py b/tensorflow_datasets/image/deep_weeds.py
@@ -24,7 +24,7 @@
 import tensorflow as tf
 import tensorflow_datasets.public_api as tfds
 
-import numpy as np
+import csv
 
 _URL = "https://nextcloud.qriscloud.org.au/index.php/s/a3KxPawpqkiorST/download"
 _URL_LBL = "https://raw.githubusercontent.com/AlexOlsen/DeepWeeds/master/labels/labels.csv"
@@ -85,54 +85,49 @@ def _info(self):
         description=(_DESCRIPTION),
         features=tfds.features.FeaturesDict({
             "image": tfds.features.Image(shape=_IMAGE_SHAPE),
-            "label": tfds.features.ClassLabel(names=_NAMES),
+            "label": tfds.features.ClassLabel(num_classes=9),
         }),
         supervised_keys=("image", "label"),
         homepage="https://github.com/AlexOlsen/DeepWeeds",
-        urls=[_URL, _URL_LBL],
         citation=_CITATION,
     )
 
   def _split_generators(self, dl_manager):
     """Define Splits."""
     # The file is in ZIP format, but URL doesn't mention it.
-    path = dl_manager.download_and_extract(
-        tfds.download.Resource(
-            url=_URL,
-            extract_method=tfds.download.ExtractMethod.ZIP))
-
-
-    path_lbl = dl_manager.download_and_extract(
-        tfds.download.Resource(
-            url=_URL_LBL,
-            extract_method=None))
+    paths = dl_manager.download_and_extract({
+        "image": tfds.download.Resource(
+                    url=_URL,
+                    extract_method=tfds.download.ExtractMethod.ZIP),
+        "label": _URL_LBL})
 	
-
-    # there are different label set for train and test
-    # for now we return the full dataset as 'train' set.
     return [
         tfds.core.SplitGenerator(
             name="train",
             gen_kwargs={
-                "data_dir_path": path,
-                "label_dir_path": path_lbl,
+                "data_dir_path": paths["image"],
+                "label_path": paths["label"],
             },
         ),
     ]
 
-  def _generate_examples(self, data_dir_path, label_dir_path):
+  def _generate_examples(self, data_dir_path, label_path):
     """Generate images and labels for splits."""
-    # parse the csv-label data
-    csv = np.loadtxt(label_dir_path,
-            dtype={'names': ('Filename', 'Label', 'Species'), 'formats': ('S21', 'i4', 'S1')},
-            skiprows=1,
-            delimiter=',')
-    
-    label_dict = {}
-    for entry in csv:
-        label_dict[entry[0].decode('UTF-8')] = int(entry[1])
     
+    with tf.io.gfile.GFile(label_path) as f:
+      reader = csv.DictReader(f)
+
+      # Extract the mapping int -> str and save the label name string to the feature
+      label_id_to_name = {
+          row['Label']: row['Species'] for row in reader
+      }
+      self.info.features['label'].names = [v for k, v in sorted(label_id_to_name.items())]
+
+    filename_to_label = {
+        row['Filename']: row['Species'] for row in reader
+    }
     for file_name in tf.io.gfile.listdir(data_dir_path):
-      image = os.path.join(data_dir_path, file_name)
-      label = _NAMES[label_dict[file_name]]
-      yield file_name, {"image": image, "label": label}
+      yield file_name, {
+          "image": os.path.join(data_dir_path, file_name), 
+          "label": filename_to_label[file_name]
+      }