@@ -187,25 +187,36 @@ def __init__(self, data_dir=None, config=None, version=None):
187
187
logging .info ("Load pre-computed datasetinfo (eg: splits) from bucket." )
188
188
self .info .initialize_from_bucket ()
189
189
190
- def _pick_version ( self , requested_version ):
191
- """Returns utils.Version instance, or raise AssertionError."""
190
+ @ utils . memoized_property
191
+ def canonical_version ( self ):
192
192
if self ._builder_config :
193
- canonical_version = self ._builder_config .version
194
- supported_versions = self ._builder_config .supported_versions
193
+ return self ._builder_config .version
195
194
else :
196
- canonical_version = self .VERSION
197
- supported_versions = self .SUPPORTED_VERSIONS
198
- versions = [
195
+ return self .VERSION
196
+
197
+ @utils .memoized_property
198
+ def supported_versions (self ):
199
+ if self ._builder_config :
200
+ return self ._builder_config .supported_versions
201
+ else :
202
+ return self .SUPPORTED_VERSIONS
203
+
204
+ @utils .memoized_property
205
+ def versions (self ):
206
+ """Versions (canonical + availables), in preference order."""
207
+ return [
199
208
utils .Version (v ) if isinstance (v , six .string_types ) else v
200
- for v in [canonical_version ] + supported_versions
209
+ for v in [self . canonical_version ] + self . supported_versions
201
210
]
211
+
212
+ def _pick_version (self , requested_version ):
213
+ """Returns utils.Version instance, or raise AssertionError."""
202
214
if requested_version == "experimental_latest" :
203
- return max (versions )
204
- for version in versions :
215
+ return max (self . versions )
216
+ for version in self . versions :
205
217
if requested_version is None or version .match (requested_version ):
206
218
return version
207
- available_versions = [str (v )
208
- for v in [canonical_version ] + supported_versions ]
219
+ available_versions = [str (v ) for v in self .versions ]
209
220
msg = "Dataset {} cannot be loaded at version {}, only: {}." .format (
210
221
self .name , requested_version , ", " .join (available_versions ))
211
222
raise AssertionError (msg )
@@ -253,9 +264,17 @@ def download_and_prepare(self, download_dir=None, download_config=None):
253
264
logging .info ("Reusing dataset %s (%s)" , self .name , self ._data_dir )
254
265
return
255
266
256
- dl_manager = self ._make_download_manager (
257
- download_dir = download_dir ,
258
- download_config = download_config )
267
+ if self .version .tfds_version_to_prepare :
268
+ available_to_prepare = ", " .join (str (v ) for v in self .versions
269
+ if not v .tfds_version_to_prepare )
270
+ raise AssertionError (
271
+ "The version of the dataset you are trying to use ({}:{}) can only "
272
+ "be generated using TFDS code synced @ {} or earlier. Either sync to "
273
+ "that version of TFDS to first prepare the data or use another "
274
+ "version of the dataset (available for `download_and_prepare`: "
275
+ "{})." .format (
276
+ self .name , self .version , self .version .tfds_version_to_prepare ,
277
+ available_to_prepare ))
259
278
260
279
# Currently it's not possible to overwrite the data because it would
261
280
# conflict with versioning: If the last version has already been generated,
@@ -266,13 +285,18 @@ def download_and_prepare(self, download_dir=None, download_config=None):
266
285
"the same version {} already exists. If the dataset has changed, "
267
286
"please update the version number." .format (self .name , self ._data_dir ,
268
287
self .version ))
288
+
269
289
logging .info ("Generating dataset %s (%s)" , self .name , self ._data_dir )
270
290
if not utils .has_sufficient_disk_space (
271
291
self .info .size_in_bytes , directory = self ._data_dir_root ):
272
292
raise IOError ("Not enough disk space. Needed: %s" %
273
293
units .size_str (self .info .size_in_bytes ))
274
294
self ._log_download_bytes ()
275
295
296
+ dl_manager = self ._make_download_manager (
297
+ download_dir = download_dir ,
298
+ download_config = download_config )
299
+
276
300
# Create a tmp dir and rename to self._data_dir on successful exit.
277
301
with file_format_adapter .incomplete_dir (self ._data_dir ) as tmp_data_dir :
278
302
# Temporarily assign _data_dir to tmp_data_dir to avoid having to forward
0 commit comments