Skip to content

Commit 3294cad

Browse files
Merge pull request #228 from ChanchalKumarMaji:issue-155
PiperOrigin-RevId: 238080357
2 parents 72d3988 + 40be1e6 commit 3294cad

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+529
-0
lines changed
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# coding=utf-8
2+
# Copyright 2019 The TensorFlow Datasets Authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""Tests for librispeech dataset module."""
17+
18+
from __future__ import absolute_import
19+
from __future__ import division
20+
from __future__ import print_function
21+
22+
from tensorflow_datasets import testing
23+
from tensorflow_datasets.audio import librispeech
24+
import tensorflow_datasets.public_api as tfds
25+
26+
27+
class LibrispeechTest100(testing.DatasetBuilderTestCase):
28+
DATASET_CLASS = librispeech.Librispeech
29+
BUILDER_CONFIG_NAMES_TO_TEST = ["clean-100"]
30+
SPLITS = {
31+
"train": 2,
32+
"test": 1,
33+
"dev": 1,
34+
}
35+
36+
DL_EXTRACT_RESULT = {
37+
tfds.Split.TRAIN: ["train-clean-100"],
38+
tfds.Split.TEST: ["test-clean"],
39+
tfds.Split.VALIDATION: ["dev-clean"],
40+
}
41+
42+
43+
class LibrispeechTest360(testing.DatasetBuilderTestCase):
44+
DATASET_CLASS = librispeech.Librispeech
45+
BUILDER_CONFIG_NAMES_TO_TEST = ["clean-360"]
46+
SPLITS = {
47+
"train": 1,
48+
"test": 1,
49+
"dev": 1,
50+
}
51+
52+
DL_EXTRACT_RESULT = {
53+
tfds.Split.TRAIN: ["train-clean-100", "train-clean-360"],
54+
tfds.Split.TEST: ["test-clean"],
55+
tfds.Split.VALIDATION: ["dev-clean"],
56+
}
57+
58+
59+
if __name__ == "__main__":
60+
testing.test_main()
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
1 | Interstellar |
2+
2 | Gorzilla |
3+
3 | Iron Man |
4+
4 | God Father |
5+
5 | Spiderman Homecoming |
6+
6 | Avengers |
7+
7 | Guardians of Galaxy |
8+
8 | Johnny English |
9+
9 | X-Men |
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
; Some pipe(|) separated metadata about the audio chapters included in the corpus.
2+
;
3+
; The meaning of the fields in left-to-right order is as follows:
4+
;
5+
; chapter_id: the ID of the chapter in the LibriVox's database
6+
; reader_id: the ID of the reader in the LibriVox's database
7+
; duration: how many minutes of this chapter are used in the corpus
8+
; subset: the corpus subset to which this chapter is assigned
9+
; project_id: the LibriVox project ID
10+
; book_id: the Project Gutenberg's ID for the book on which the LibriVox project is based
11+
; chapter_title: the title of the chapter on LibriVox
12+
; project_title: the title of the LibriVox project
13+
;
14+
;ID |READER|MINUTES| SUBSET | PROJ.|BOOK ID| CH. TITLE | PROJECT TITLE
15+
01 | 11 | 19.77 | dev-clean | 53 | 2 | In Chancer | Bleak House
16+
02 | 12 | 10.30 | dev-clean | 53 | 3 | In Fashion | Bleak House
17+
03 | 13 | 7.67 | dev-other | 68 | 7 | Letter XXV | Unbeaten Tracks in Japan
18+
04 | 14 | 8.42 | dev-other | 219 | 9 | Chapter 01 | Northanger Abbey
19+
05 | 15 | 11.68 | test-clean | 219 | 1 | Chapter 02 | Northanger Abbey
20+
06 | 16 | 11.25 | test-clean | 219 | 5 | Chapter 03 | Northanger Abbey
21+
07 | 17 | 7.57 | test-other | 219 | 9 | Chapter 04 | Northanger Abbey
22+
08 | 18 | 12.76 | test-other | 219 | 3 | Chapter 07 | Northanger Abbey
23+
09 | 19 | 12.82 | train-clean-100 | 219 | 4 | Chapter 08 | Northanger Abbey
24+
10 | 20 | 18.33 | train-clean-100 | 219 | 6 | Chapter 10 | Northanger Abbey
25+
11 | 21 | 12.95 | train-clean-360 | 219 | 8 | Chapter 11 | Northanger Abbey
26+
12 | 22 | 8.20 | train-clean-360 | 219 | 1 | Chapter 12 | Northanger Abbey
27+
13 | 23 | 12.09 | train-other-500 | 219 | 4 | Chapter 15 | Northanger Abbey
28+
14 | 24 | 6.19 | train-other-500 | 219 | 5 | Chapter 17 | Northanger Abbey
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
; Some pipe(|) separated metadata about all LibriVox readers, whose work was used
2+
; in the corpus.
3+
;
4+
; The meaning of the fields in left-to-right order is as follows:
5+
;
6+
; reader_id: the ID of the reader in the LibriVox's database
7+
; gender: 'F' for female, 'M' for male
8+
; subset: the corpus subset to which the reader's audio is assigned
9+
; duration: total number of minutes of speech by the reader, included in the corpus
10+
; name: the name under which the reader is registered in LibriVox
11+
;
12+
;ID |SEX| SUBSET |MINUTES| NAME
13+
11 | F | dev-clean | 25.03 | Wolverine
14+
12 | M | dev-clean | 25.11 | Hulk
15+
13 | F | dev-other | 25.04 | Zimmer
16+
14 | M | dev-other | 25.19 | Carla
17+
15 | F | test-clean | 30.07 | Groot
18+
16 | M | test-clean | 25.14 | Tony
19+
17 | F | test-other | 25.23 | Anita
20+
18 | M | test-other | 30.16 | John
21+
19 | F | train-clean-100 | 25.08 | Denny
22+
20 | M | train-clean-100 | 20.14 | Sean
23+
21 | F | train-clean-360 | 25.03 | Kristin
24+
22 | M | train-clean-360 | 30.10 | Linton
25+
23 | F | train-other-500 | 25.19 | Annie
26+
24 | M | train-other-500 | 23.79 | Martin
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
11-01-0000 GO DO YOU HEAR
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
12-02-0000 FORGOTTEN TOO THE NAME OF GILLIAN THE LOVELY CAPTIVE

0 commit comments

Comments
 (0)