24
24
import tensorflow as tf
25
25
import tensorflow_datasets .public_api as tfds
26
26
27
+ import numpy as np
27
28
28
29
_URL = "https://nextcloud.qriscloud.org.au/index.php/s/a3KxPawpqkiorST/download"
30
+ _URL_LBL = "https://raw.githubusercontent.com/AlexOlsen/DeepWeeds/master/labels/labels.csv"
29
31
30
32
_DESCRIPTION = (
31
33
"""The DeepWeeds dataset consists of 17,509 images capturing eight different weed species native to Australia """
@@ -86,7 +88,7 @@ def _info(self):
86
88
"label" : tfds .features .ClassLabel (names = _NAMES ),
87
89
}),
88
90
supervised_keys = ("image" , "label" ),
89
- homepage = "https://github.com/AlexOlsen/DeepWeeds" ,
91
+ urls = [ _URL , _URL_LBL ] ,
90
92
citation = _CITATION ,
91
93
)
92
94
@@ -98,19 +100,38 @@ def _split_generators(self, dl_manager):
98
100
url = _URL ,
99
101
extract_method = tfds .download .ExtractMethod .ZIP ))
100
102
103
+
104
+ path_lbl = dl_manager .download_and_extract (
105
+ tfds .download .Resource (
106
+ url = _URL_LBL ,
107
+ extract_method = None ))
108
+
109
+
110
+ # there are different label set for train and test
111
+ # for now we return the full dataset as 'train' set.
101
112
return [
102
113
tfds .core .SplitGenerator (
103
114
name = "train" ,
104
115
gen_kwargs = {
105
116
"data_dir_path" : path ,
117
+ "label_dir_path" : path_lbl ,
106
118
},
107
119
),
108
120
]
109
121
110
- def _generate_examples (self , data_dir_path ):
122
+ def _generate_examples (self , data_dir_path , label_dir_path ):
111
123
"""Generate images and labels for splits."""
112
-
124
+ # parse the csv-label data
125
+ csv = np .loadtxt (label_dir_path ,
126
+ dtype = {'names' : ('Filename' , 'Label' , 'Species' ), 'formats' : ('S21' , 'i4' , 'S1' )},
127
+ skiprows = 1 ,
128
+ delimiter = ',' )
129
+
130
+ label_dict = {}
131
+ for entry in csv :
132
+ label_dict [entry [0 ].decode ('UTF-8' )] = int (entry [1 ])
133
+
113
134
for file_name in tf .io .gfile .listdir (data_dir_path ):
114
135
image = os .path .join (data_dir_path , file_name )
115
- label = _NAMES [int ( file_name . split ( "-" )[ 2 ]. split ( "." )[ 0 ]) ]
136
+ label = _NAMES [label_dict [ file_name ] ]
116
137
yield file_name , {"image" : image , "label" : label }
0 commit comments