19
19
from __future__ import division
20
20
from __future__ import print_function
21
21
22
+ import csv
22
23
import os
23
24
24
25
import tensorflow as tf
25
26
import tensorflow_datasets .public_api as tfds
26
27
27
-
28
28
_URL = "https://nextcloud.qriscloud.org.au/index.php/s/a3KxPawpqkiorST/download"
29
+ _URL_LABELS = "https://raw.githubusercontent.com/AlexOlsen/DeepWeeds/master/labels/labels.csv"
29
30
30
31
_DESCRIPTION = (
31
32
"""The DeepWeeds dataset consists of 17,509 images capturing eight different weed species native to Australia """
34
35
""" "Cluden", "Douglas", "Hervey Range", "Kelso", "McKinlay" and "Paluma"."""
35
36
)
36
37
37
- _NAMES = [
38
- "Chinee apple" , "Snake weed" , "Lantana" , "Prickly acacia" , "Siam weed" ,
39
- "Parthenium" , "Rubber vine" , "Parkinsonia" , "Negative"
40
- ]
41
-
42
38
_IMAGE_SHAPE = (256 , 256 , 3 )
43
39
44
40
_CITATION = """\
73
69
class DeepWeeds (tfds .core .GeneratorBasedBuilder ):
74
70
"""DeepWeeds Image Dataset Class."""
75
71
76
- VERSION = tfds .core .Version ("1 .0.0" )
72
+ VERSION = tfds .core .Version ("2 .0.0" )
77
73
78
74
def _info (self ):
79
75
"""Define Dataset Info."""
@@ -83,7 +79,7 @@ def _info(self):
83
79
description = (_DESCRIPTION ),
84
80
features = tfds .features .FeaturesDict ({
85
81
"image" : tfds .features .Image (shape = _IMAGE_SHAPE ),
86
- "label" : tfds .features .ClassLabel (names = _NAMES ),
82
+ "label" : tfds .features .ClassLabel (num_classes = 9 ),
87
83
}),
88
84
supervised_keys = ("image" , "label" ),
89
85
homepage = "https://github.com/AlexOlsen/DeepWeeds" ,
@@ -93,24 +89,39 @@ def _info(self):
93
89
def _split_generators (self , dl_manager ):
94
90
"""Define Splits."""
95
91
# The file is in ZIP format, but URL doesn't mention it.
96
- path = dl_manager .download_and_extract (
97
- tfds .download .Resource (
92
+ paths = dl_manager .download_and_extract ({
93
+ "image" : tfds .download .Resource (
98
94
url = _URL ,
99
- extract_method = tfds .download .ExtractMethod .ZIP ))
95
+ extract_method = tfds .download .ExtractMethod .ZIP ),
96
+ "label" : _URL_LABELS })
100
97
101
98
return [
102
99
tfds .core .SplitGenerator (
103
100
name = "train" ,
104
101
gen_kwargs = {
105
- "data_dir_path" : path ,
102
+ "data_dir_path" : paths ["image" ],
103
+ "label_path" : paths ["label" ],
106
104
},
107
105
),
108
106
]
109
107
110
- def _generate_examples (self , data_dir_path ):
108
+ def _generate_examples (self , data_dir_path , label_path ):
111
109
"""Generate images and labels for splits."""
112
110
111
+ with tf .io .gfile .GFile (label_path ) as f :
112
+ # Convert to list to reuse the iterator multiple times
113
+ reader = list (csv .DictReader (f ))
114
+
115
+ # Extract the mapping int -> str and save the label name string to the
116
+ # feature
117
+ label_id_to_name = {int (row ["Label" ]): row ["Species" ] for row in reader }
118
+ self .info .features ["label" ].names = [
119
+ v for _ , v in sorted (label_id_to_name .items ())
120
+ ]
121
+
122
+ filename_to_label = {row ["Filename" ]: row ["Species" ] for row in reader }
113
123
for file_name in tf .io .gfile .listdir (data_dir_path ):
114
- image = os .path .join (data_dir_path , file_name )
115
- label = _NAMES [int (file_name .split ("-" )[2 ].split ("." )[0 ])]
116
- yield file_name , {"image" : image , "label" : label }
124
+ yield file_name , {
125
+ "image" : os .path .join (data_dir_path , file_name ),
126
+ "label" : filename_to_label [file_name ]
127
+ }
0 commit comments