|
49 | 49 | """
|
50 | 50 | _VERSION = tfds.core.Version(
|
51 | 51 | "1.0.1", experiments={tfds.core.Experiment.S3: False})
|
52 |
| -_SUPPORTED_VERSIONS = [tfds.core.Version("1.0.0")] |
| 52 | +_SUPPORTED_VERSIONS = [ |
| 53 | + tfds.core.Version("1.0.0", experiments={tfds.core.Experiment.S3: False})] |
53 | 54 |
|
54 | 55 | _DOWNLOAD_HOST = "https://commoncrawl.s3.amazonaws.com"
|
55 | 56 | _WET_PATH_URL = "https://commoncrawl.s3.amazonaws.com/crawl-data/CC-MAIN-{cc_version}/wet.paths.gz"
|
@@ -137,16 +138,20 @@ class C4(tfds.core.BeamBasedBuilder):
|
137 | 138 | ]
|
138 | 139 |
|
139 | 140 | def _info(self):
|
| 141 | + features = { |
| 142 | + "text": tfds.features.Text(), |
| 143 | + "url": tfds.features.Text(), |
| 144 | + } |
| 145 | + if self.version > "1.0.0": |
| 146 | + features.update({ |
| 147 | + "content-type": tfds.features.Text(), |
| 148 | + "content-length": tfds.features.Text(), |
| 149 | + "timestamp": tfds.features.Text(), |
| 150 | + }) |
140 | 151 | return tfds.core.DatasetInfo(
|
141 | 152 | builder=self,
|
142 | 153 | description=_DESCRIPTION,
|
143 |
| - features=tfds.features.FeaturesDict({ |
144 |
| - "text": tfds.features.Text(), |
145 |
| - "url": tfds.features.Text(), |
146 |
| - "content-type": tfds.features.Text(), |
147 |
| - "content-length": tfds.features.Text(), |
148 |
| - "timestamp": tfds.features.Text(), |
149 |
| - }), |
| 154 | + features=tfds.features.FeaturesDict(features), |
150 | 155 | citation=_CITATION,
|
151 | 156 | homepage=
|
152 | 157 | "https://github.com/google-research/text-to-text-transfer-transformer#datasets",
|
|
0 commit comments