@@ -66,6 +66,9 @@ def __init__(self, text_encoder_config=None, **kwargs):
66
66
super (IMDBReviewsConfig , self ).__init__ (
67
67
version = tfds .core .Version (
68
68
"0.1.0" , experiments = {tfds .core .Experiment .S3 : False }),
69
+ supported_versions = [
70
+ tfds .core .Version ("1.0.0" ),
71
+ ],
69
72
** kwargs )
70
73
self .text_encoder_config = (
71
74
text_encoder_config or tfds .features .text .TextEncoderConfig ())
@@ -118,8 +121,8 @@ def _info(self):
118
121
)
119
122
120
123
def _vocab_text_gen (self , archive ):
121
- for ex in self ._generate_examples (archive ,
122
- os .path .join ("aclImdb" , "train" )):
124
+ for ex in self ._generate_examples (
125
+ archive , os .path .join ("aclImdb" , "train" ), keys = False ):
123
126
yield ex ["text" ]
124
127
125
128
def _split_generators (self , dl_manager ):
@@ -149,7 +152,7 @@ def _split_generators(self, dl_manager):
149
152
"labeled" : False }),
150
153
]
151
154
152
- def _generate_examples (self , archive , directory , labeled = True ):
155
+ def _generate_examples (self , archive , directory , labeled = True , keys = True ):
153
156
"""Generate IMDB examples."""
154
157
# For labeled examples, extract the label from the path.
155
158
reg_path = "(?P<label>neg|pos)" if labeled else "unsup"
@@ -161,7 +164,11 @@ def _generate_examples(self, archive, directory, labeled=True):
161
164
continue
162
165
text = imdb_f .read ().strip ()
163
166
label = res .groupdict ()["label" ] if labeled else - 1
164
- yield {
167
+ record = {
165
168
"text" : text ,
166
169
"label" : label ,
167
170
}
171
+ if keys and self .version .implements (tfds .core .Experiment .S3 ):
172
+ yield path , record
173
+ else :
174
+ yield record
0 commit comments