| 
 | 1 | +#!/usr/bin/env python3  | 
 | 2 | + | 
 | 3 | +# Copyright 2020 Mobvoi AI Lab, Beijing, China (author: Fangjun Kuang)  | 
 | 4 | +# Apache 2.0  | 
 | 5 | + | 
 | 6 | +import os  | 
 | 7 | +import shutil  | 
 | 8 | +import tempfile  | 
 | 9 | +import unittest  | 
 | 10 | + | 
 | 11 | +import kaldi  | 
 | 12 | + | 
 | 13 | + | 
 | 14 | +def generate_test_lexicon(d):  | 
 | 15 | +    s = 'foo f o o\n'  | 
 | 16 | +    s += 'bar b a r\n'  | 
 | 17 | + | 
 | 18 | +    filename = os.path.join(d, 'lexicon.txt')  | 
 | 19 | +    with open(filename, 'w') as f:  | 
 | 20 | +        f.write(s)  | 
 | 21 | + | 
 | 22 | + | 
 | 23 | +def generate_test_tokens(d):  | 
 | 24 | +    s = '''<eps> 0  | 
 | 25 | +<blk> 1  | 
 | 26 | +a 2  | 
 | 27 | +b 3  | 
 | 28 | +f 4  | 
 | 29 | +o 5  | 
 | 30 | +r 6  | 
 | 31 | +'''  | 
 | 32 | +    filename = os.path.join(d, 'tokens.txt')  | 
 | 33 | +    with open(filename, 'w') as f:  | 
 | 34 | +        f.write(s)  | 
 | 35 | + | 
 | 36 | + | 
 | 37 | +def generate_test_text(d):  | 
 | 38 | +    s = 'utt1 foo bar bar\n'  | 
 | 39 | +    s += 'utt2 bar\n'  | 
 | 40 | + | 
 | 41 | +    filename = os.path.join(d, 'text')  | 
 | 42 | +    with open(filename, 'w') as f:  | 
 | 43 | +        f.write(s)  | 
 | 44 | + | 
 | 45 | + | 
 | 46 | +class ConvertTextToLablesTest(unittest.TestCase):  | 
 | 47 | + | 
 | 48 | +    def test(self):  | 
 | 49 | +        d = tempfile.mkdtemp()  | 
 | 50 | + | 
 | 51 | +        generate_test_lexicon(d)  | 
 | 52 | +        generate_test_tokens(d)  | 
 | 53 | +        generate_test_text(d)  | 
 | 54 | + | 
 | 55 | +        cmd = '''  | 
 | 56 | +        python3 ./local/convert_text_to_labels.py \  | 
 | 57 | +                --lexicon-filename {lexicon} \  | 
 | 58 | +                --tokens-filename {tokens} \  | 
 | 59 | +                --dir {dir}  | 
 | 60 | +        '''.format(lexicon=os.path.join(d, 'lexicon.txt'),  | 
 | 61 | +                   tokens=os.path.join(d, 'tokens.txt'),  | 
 | 62 | +                   dir=d)  | 
 | 63 | + | 
 | 64 | +        os.system(cmd)  | 
 | 65 | + | 
 | 66 | +        rspecifier = 'scp:{}/labels.scp'.format(d)  | 
 | 67 | + | 
 | 68 | +        reader = kaldi.SequentialIntVectorReader(rspecifier)  | 
 | 69 | + | 
 | 70 | +        expected_labels = dict()  | 
 | 71 | +        expected_labels['utt1'] = [3, 4, 4, 2, 1, 5, 2, 1, 5]  | 
 | 72 | +        expected_labels['utt2'] = [2, 1, 5]  | 
 | 73 | + | 
 | 74 | +        for key, value in reader:  | 
 | 75 | +            self.assertTrue(key in expected_labels)  | 
 | 76 | +            self.assertEqual(value, expected_labels[key])  | 
 | 77 | + | 
 | 78 | +        reader.Close()  | 
 | 79 | + | 
 | 80 | +        shutil.rmtree(d)  | 
 | 81 | + | 
 | 82 | + | 
 | 83 | +if __name__ == '__main__':  | 
 | 84 | +    unittest.main()  | 
0 commit comments