We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 38189d9 commit 286d7a8Copy full SHA for 286d7a8
tensorflow_datasets/text/c4.py
@@ -120,6 +120,14 @@ def __init__(self,
120
class C4(tfds.core.BeamBasedBuilder):
121
"""C4 dataset based on Common Crawl."""
122
123
+ MANUAL_DOWNLOAD_INSTRUCTIONS = """\
124
+ For the WebText-like config, you must manually download 'OpenWebText.zip'
125
+ (from https://mega.nz/#F!EZZD0YwJ!9_PlEQzdMVLaNdKv_ICNVQ) and the Common Crawl
126
+ WET files from August 2018 to July 2019
127
+ (https://commoncrawl.org/the-data/get-started/) and place them in the
128
+ `manual_dir`.
129
+ """
130
+
131
BUILDER_CONFIGS = [
132
C4Config(language="en", description="English C4 dataset."),
133
C4Config(
0 commit comments