tensorflow
diff --git a/‎tensorflow_datasets/audio/librispeech_test.py
Lines changed: 60 additions & 0 deletions b/‎tensorflow_datasets/audio/librispeech_test.py
Lines changed: 60 additions & 0 deletions
diff --git a/‎tensorflow_datasets/testing/test_data/fake_examples/librispeech/dev-clean/LibriSpeech/BOOKS.TXT
Lines changed: 9 additions & 0 deletions b/‎tensorflow_datasets/testing/test_data/fake_examples/librispeech/dev-clean/LibriSpeech/BOOKS.TXT
Lines changed: 9 additions & 0 deletions
diff --git a/‎tensorflow_datasets/testing/test_data/fake_examples/librispeech/dev-clean/LibriSpeech/CHAPTERS.TXT
Lines changed: 28 additions & 0 deletions b/‎tensorflow_datasets/testing/test_data/fake_examples/librispeech/dev-clean/LibriSpeech/CHAPTERS.TXT
Lines changed: 28 additions & 0 deletions
diff --git a/‎tensorflow_datasets/testing/test_data/fake_examples/librispeech/dev-clean/LibriSpeech/LICENSE.TXT
Lines changed: 1 addition & 0 deletions b/‎tensorflow_datasets/testing/test_data/fake_examples/librispeech/dev-clean/LibriSpeech/LICENSE.TXT
Lines changed: 1 addition & 0 deletions
diff --git a/‎tensorflow_datasets/testing/test_data/fake_examples/librispeech/dev-clean/LibriSpeech/README.TXT
Lines changed: 1 addition & 0 deletions b/‎tensorflow_datasets/testing/test_data/fake_examples/librispeech/dev-clean/LibriSpeech/README.TXT
Lines changed: 1 addition & 0 deletions
diff --git a/‎tensorflow_datasets/testing/test_data/fake_examples/librispeech/dev-clean/LibriSpeech/SPEAKERS.TXT
Lines changed: 26 additions & 0 deletions b/‎tensorflow_datasets/testing/test_data/fake_examples/librispeech/dev-clean/LibriSpeech/SPEAKERS.TXT
Lines changed: 26 additions & 0 deletions
diff --git a/‎tensorflow_datasets/testing/test_data/fake_examples/librispeech/dev-clean/LibriSpeech/dev-clean/11/01/11-01-0000.flac
32.3 KB b/‎tensorflow_datasets/testing/test_data/fake_examples/librispeech/dev-clean/LibriSpeech/dev-clean/11/01/11-01-0000.flac
32.3 KB
diff --git a/‎tensorflow_datasets/testing/test_data/fake_examples/librispeech/dev-clean/LibriSpeech/dev-clean/11/01/11-01.trans.txt
Lines changed: 1 addition & 0 deletions b/‎tensorflow_datasets/testing/test_data/fake_examples/librispeech/dev-clean/LibriSpeech/dev-clean/11/01/11-01.trans.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎tensorflow_datasets/testing/test_data/fake_examples/librispeech/dev-clean/LibriSpeech/dev-clean/12/02/12-02-0000.flac
62.1 KB b/‎tensorflow_datasets/testing/test_data/fake_examples/librispeech/dev-clean/LibriSpeech/dev-clean/12/02/12-02-0000.flac
62.1 KB
diff --git a/‎tensorflow_datasets/testing/test_data/fake_examples/librispeech/dev-clean/LibriSpeech/dev-clean/12/02/12-02.trans.txt
Lines changed: 1 addition & 0 deletions b/‎tensorflow_datasets/testing/test_data/fake_examples/librispeech/dev-clean/LibriSpeech/dev-clean/12/02/12-02.trans.txt
Lines changed: 1 addition & 0 deletions
@@ -0,0 +1,60 @@
+# coding=utf-8
+# Copyright 2019 The TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for librispeech dataset module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow_datasets import testing
+from tensorflow_datasets.audio import librispeech
+import tensorflow_datasets.public_api as tfds
+
+
+class LibrispeechTest100(testing.DatasetBuilderTestCase):
+  DATASET_CLASS = librispeech.Librispeech
+  BUILDER_CONFIG_NAMES_TO_TEST = ["clean-100"]
+  SPLITS = {
+      "train": 2,
+      "test": 1,
+      "dev": 1,
+  }
+
+  DL_EXTRACT_RESULT = {
+      tfds.Split.TRAIN: ["train-clean-100"],
+      tfds.Split.TEST: ["test-clean"],
+      tfds.Split.VALIDATION: ["dev-clean"],
+  }
+
+
+class LibrispeechTest360(testing.DatasetBuilderTestCase):
+  DATASET_CLASS = librispeech.Librispeech
+  BUILDER_CONFIG_NAMES_TO_TEST = ["clean-360"]
+  SPLITS = {
+      "train": 1,
+      "test": 1,
+      "dev": 1,
+  }
+
+  DL_EXTRACT_RESULT = {
+      tfds.Split.TRAIN: ["train-clean-100", "train-clean-360"],
+      tfds.Split.TEST: ["test-clean"],
+      tfds.Split.VALIDATION: ["dev-clean"],
+  }
+
+
+if __name__ == "__main__":
+  testing.test_main()
@@ -0,0 +1,9 @@
+1     | Interstellar                                       | 
+2     | Gorzilla                                           | 
+3     | Iron Man                                           | 
+4     | God Father                                         | 
+5     | Spiderman Homecoming                               | 
+6     | Avengers                                           |
+7     | Guardians of Galaxy                                | 
+8     | Johnny English                                     |
+9     | X-Men                                              |
@@ -0,0 +1,28 @@
+; Some pipe(|) separated metadata about the audio chapters included in the corpus.
+;
+; The meaning of the fields in left-to-right order is as follows:
+;
+; chapter_id: the ID of the chapter in the LibriVox's database
+; reader_id: the ID of the reader in the LibriVox's database
+; duration: how many minutes of this chapter are used in the corpus
+; subset: the corpus subset to which this chapter is assigned
+; project_id: the LibriVox project ID
+; book_id: the Project Gutenberg's ID for the book on which the LibriVox project is based
+; chapter_title: the title of the chapter on LibriVox
+; project_title: the title of the LibriVox project
+;
+;ID    |READER|MINUTES| SUBSET           | PROJ.|BOOK ID| CH. TITLE  | PROJECT TITLE
+01     | 11   | 19.77 | dev-clean        | 53   | 2     | In Chancer | Bleak House
+02     | 12   | 10.30 | dev-clean        | 53   | 3     | In Fashion | Bleak House
+03     | 13   | 7.67  | dev-other        | 68   | 7     | Letter XXV | Unbeaten Tracks in Japan
+04     | 14   | 8.42  | dev-other        | 219  | 9     | Chapter 01 | Northanger Abbey
+05     | 15   | 11.68 | test-clean       | 219  | 1     | Chapter 02 | Northanger Abbey
+06     | 16   | 11.25 | test-clean       | 219  | 5     | Chapter 03 | Northanger Abbey
+07     | 17   | 7.57  | test-other       | 219  | 9     | Chapter 04 | Northanger Abbey
+08     | 18   | 12.76 | test-other       | 219  | 3     | Chapter 07 | Northanger Abbey
+09     | 19   | 12.82 | train-clean-100  | 219  | 4     | Chapter 08 | Northanger Abbey
+10     | 20   | 18.33 | train-clean-100  | 219  | 6     | Chapter 10 | Northanger Abbey
+11     | 21   | 12.95 | train-clean-360  | 219  | 8     | Chapter 11 | Northanger Abbey
+12     | 22   | 8.20  | train-clean-360  | 219  | 1     | Chapter 12 | Northanger Abbey
+13     | 23   | 12.09 | train-other-500  | 219  | 4     | Chapter 15 | Northanger Abbey
+14     | 24   | 6.19  | train-other-500  | 219  | 5     | Chapter 17 | Northanger Abbey
@@ -0,0 +1 @@
+
@@ -0,0 +1 @@
+
@@ -0,0 +1,26 @@
+; Some pipe(|) separated metadata about all LibriVox readers, whose work was used
+; in the corpus.
+;
+; The meaning of the fields in left-to-right order is as follows:
+;
+; reader_id: the ID of the reader in the LibriVox's database
+; gender: 'F' for female, 'M' for male
+; subset: the corpus subset to which the reader's audio is assigned
+; duration: total number of minutes of speech by the reader, included in the corpus
+; name: the name under which the reader is registered in LibriVox
+;
+;ID  |SEX| SUBSET           |MINUTES| NAME
+11   | F | dev-clean        | 25.03 | Wolverine
+12   | M | dev-clean        | 25.11 | Hulk
+13   | F | dev-other        | 25.04 | Zimmer
+14   | M | dev-other        | 25.19 | Carla
+15   | F | test-clean       | 30.07 | Groot
+16   | M | test-clean       | 25.14 | Tony
+17   | F | test-other       | 25.23 | Anita
+18   | M | test-other       | 30.16 | John
+19   | F | train-clean-100  | 25.08 | Denny
+20   | M | train-clean-100  | 20.14 | Sean
+21   | F | train-clean-360  | 25.03 | Kristin
+22   | M | train-clean-360  | 30.10 | Linton
+23   | F | train-other-500  | 25.19 | Annie
+24   | M | train-other-500  | 23.79 | Martin
@@ -0,0 +1 @@
+11-01-0000 GO DO YOU HEAR
@@ -0,0 +1 @@
+12-02-0000 FORGOTTEN TOO THE NAME OF GILLIAN THE LOVELY CAPTIVE
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+12-02-0000 FORGOTTEN TOO THE NAME OF GILLIAN THE LOVELY CAPTIVE`