Skip to content

Commit 7f17a1d

Browse files
adarobcopybara-github
authored andcommitted
Fix paths to wmt15 en-fr dev and test sets.
PiperOrigin-RevId: 257455603
1 parent e6909b8 commit 7f17a1d

File tree

2 files changed

+17
-3
lines changed

2 files changed

+17
-3
lines changed

tensorflow_datasets/translate/wmt.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,13 @@ def get_path(self, src):
411411
url="http://data.statmt.org/wmt19/translation-task/dev.tgz",
412412
path=("dev/newsdev2015-fien-src.{src}.sgm",
413413
"dev/newsdev2015-fien-ref.en.sgm")),
414+
SubDataset(
415+
name="newsdiscussdev2015",
416+
target="en",
417+
sources={"ro", "tr"},
418+
url="http://data.statmt.org/wmt19/translation-task/dev.tgz",
419+
path=("dev/newsdiscussdev2015-{src}en-src.{src}.sgm",
420+
"dev/newsdiscussdev2015-{src}en-ref.en.sgm")),
414421
SubDataset(
415422
name="newsdev2016",
416423
target="en",
@@ -506,10 +513,17 @@ def get_path(self, src):
506513
SubDataset(
507514
name="newstest2015",
508515
target="en",
509-
sources={"cs", "de", "fi", "fr", "ru"},
516+
sources={"cs", "de", "fi", "ru"},
510517
url="http://data.statmt.org/wmt19/translation-task/dev.tgz",
511518
path=("dev/newstest2015-{src}en-src.{src}.sgm",
512519
"dev/newstest2015-{src}en-ref.en.sgm")),
520+
SubDataset(
521+
name="newsdiscusstest2015",
522+
target="en",
523+
sources={"fr"},
524+
url="http://data.statmt.org/wmt19/translation-task/dev.tgz",
525+
path=("dev/newsdiscusstest2015-{src}en-src.{src}.sgm",
526+
"dev/newsdiscusstest2015-{src}en-ref.en.sgm")),
513527
SubDataset(
514528
name="newstest2016",
515529
target="en",

tensorflow_datasets/translate/wmt15.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,9 @@ def _subsets(self):
7373
"newscommentary_v10", "gigafren", "czeng_10", "yandexcorpus",
7474
"wikiheadlines_fi", "wikiheadlines_ru"],
7575
tfds.Split.VALIDATION: [
76-
"newsdev2015", "newstest2014"
76+
"newsdev2015", "newsdiscussdev2015", "newstest2014"
7777
],
7878
tfds.Split.TEST: [
79-
"newstest2015"
79+
"newstest2015", "newsdiscusstest2015",
8080
]
8181
}

0 commit comments

Comments
 (0)