Skip to content

Commit 6e3fdae

Browse files
adarobcopybara-github
authored andcommitted
Fix support for C4 1.0.0.
PiperOrigin-RevId: 278881195
1 parent efcba76 commit 6e3fdae

File tree

1 file changed

+13
-8
lines changed
  • tensorflow_datasets/text

1 file changed

+13
-8
lines changed

tensorflow_datasets/text/c4.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@
4949
"""
5050
_VERSION = tfds.core.Version(
5151
"1.0.1", experiments={tfds.core.Experiment.S3: False})
52-
_SUPPORTED_VERSIONS = [tfds.core.Version("1.0.0")]
52+
_SUPPORTED_VERSIONS = [
53+
tfds.core.Version("1.0.0", experiments={tfds.core.Experiment.S3: False})]
5354

5455
_DOWNLOAD_HOST = "https://commoncrawl.s3.amazonaws.com"
5556
_WET_PATH_URL = "https://commoncrawl.s3.amazonaws.com/crawl-data/CC-MAIN-{cc_version}/wet.paths.gz"
@@ -137,16 +138,20 @@ class C4(tfds.core.BeamBasedBuilder):
137138
]
138139

139140
def _info(self):
141+
features = {
142+
"text": tfds.features.Text(),
143+
"url": tfds.features.Text(),
144+
}
145+
if self.version > "1.0.0":
146+
features.update({
147+
"content-type": tfds.features.Text(),
148+
"content-length": tfds.features.Text(),
149+
"timestamp": tfds.features.Text(),
150+
})
140151
return tfds.core.DatasetInfo(
141152
builder=self,
142153
description=_DESCRIPTION,
143-
features=tfds.features.FeaturesDict({
144-
"text": tfds.features.Text(),
145-
"url": tfds.features.Text(),
146-
"content-type": tfds.features.Text(),
147-
"content-length": tfds.features.Text(),
148-
"timestamp": tfds.features.Text(),
149-
}),
154+
features=tfds.features.FeaturesDict(features),
150155
citation=_CITATION,
151156
homepage=
152157
"https://github.com/google-research/text-to-text-transfer-transformer#datasets",

0 commit comments

Comments
 (0)