Skip to content

Commit 33beaad

Browse files
committed
Ordnung in tests
use `vocabulary.from_wordlist`, since starting the following commit, other methods will be obsolete.
1 parent c871d03 commit 33beaad

32 files changed

+2384
-207
lines changed

tests/alignment.ini

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,12 @@ series=["source"]
4747
data=["tests/data/val.tc.en"]
4848

4949
[encoder_vocabulary]
50-
class=vocabulary.from_dataset
51-
datasets=[<train_data>]
52-
series_ids=["source"]
53-
max_size=60
54-
save_file="tests/outputs/alignment/encoder_vocabulary.pickle"
55-
overwrite=True
50+
class=vocabulary.from_wordlist
51+
path="tests/data/encoder_vocab.tsv"
52+
53+
[decoder_vocabulary]
54+
class=vocabulary.from_wordlist
55+
path="tests/data/decoder_vocab.tsv"
5656

5757
[encoder]
5858
class=encoders.recurrent.SentenceEncoder
@@ -68,14 +68,6 @@ class=attention.Attention
6868
name="attention_sentence_encoder"
6969
encoder=<encoder>
7070

71-
[decoder_vocabulary]
72-
class=vocabulary.from_dataset
73-
datasets=[<train_data>]
74-
series_ids=["target"]
75-
max_size=70
76-
save_file="tests/outputs/alignment/decoder_vocabulary.pickle"
77-
overwrite=True
78-
7971
[decoder]
8072
class=decoders.decoder.Decoder
8173
name="bahdanau_decoder"

tests/bahdanau.ini

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,8 @@ data=["tests/data/val.tc.en"]
4040
outputs=[("encoded", "tests/outputs/bahdanau/encoded"), ("debugtensors", "tests/outputs/bahdanau/debugtensors")]
4141

4242
[encoder_vocabulary]
43-
class=vocabulary.from_dataset
44-
datasets=[<train_data>]
45-
series_ids=["source"]
46-
max_size=60
47-
save_file="tests/outputs/bahdanau/encoder_vocabulary.pickle"
48-
overwrite=True
43+
class=vocabulary.from_wordlist
44+
path="tests/data/encoder_vocab.tsv"
4945

5046
[encoder]
5147
class=encoders.recurrent.SentenceEncoder
@@ -62,12 +58,8 @@ name="attention_sentence_encoder"
6258
encoder=<encoder>
6359

6460
[decoder_vocabulary]
65-
class=vocabulary.from_dataset
66-
datasets=[<train_data>]
67-
series_ids=["target"]
68-
max_size=70
69-
save_file="tests/outputs/bahdanau/decoder_vocabulary.pickle"
70-
overwrite=True
61+
class=vocabulary.from_wordlist
62+
path="tests/data/decoder_vocab.tsv"
7163

7264
[decoder]
7365
class=decoders.decoder.Decoder

tests/beamsearch.ini

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ data=["tests/data/val.tc.en", "tests/data/val.tc.de", (processors.helpers.prepro
3535

3636
[encoder_vocabulary]
3737
class=vocabulary.from_wordlist
38-
path="tests/outputs/vocab/encoder_vocab.tsv"
38+
path="tests/data/encoder_vocab.tsv"
3939

4040
[inpseq]
4141
class=model.sequence.EmbeddedSequence
@@ -56,7 +56,7 @@ dropout_keep_prob=0.9
5656

5757
[decoder_vocabulary]
5858
class=vocabulary.from_wordlist
59-
path="tests/outputs/vocab/decoder_vocab.tsv"
59+
path="tests/data/decoder_vocab.tsv"
6060

6161
[decoder]
6262
class=decoders.transformer.TransformerDecoder

tests/beamsearch_ensembles.ini

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ data=["tests/data/val.tc.en", "tests/data/val.tc.de", (processors.helpers.prepro
3535

3636
[encoder_vocabulary]
3737
class=vocabulary.from_wordlist
38-
path="tests/outputs/vocab/encoder_vocab.tsv"
38+
path="tests/data/encoder_vocab.tsv"
3939

4040
[inpseq]
4141
class=model.sequence.EmbeddedSequence
@@ -56,7 +56,7 @@ dropout_keep_prob=0.9
5656

5757
[decoder_vocabulary]
5858
class=vocabulary.from_wordlist
59-
path="tests/outputs/vocab/decoder_vocab.tsv"
59+
path="tests/data/decoder_vocab.tsv"
6060

6161
[decoder]
6262
class=decoders.transformer.TransformerDecoder

tests/bpe.ini

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,8 @@ merge_file="tests/data/merges_100.bpe"
4949
class=processors.bpe.BPEPostprocessor
5050

5151
[bpe_vocabulary]
52-
class=vocabulary.from_dataset
53-
datasets=[<train_data>]
54-
series_ids=["source_bpe", "target_bpe"]
55-
max_size=209
56-
save_file="tests/outputs/bpe_vocabulary.pickle"
57-
overwrite=True
52+
class=vocabulary.from_wordlist
53+
path="tests/data/bpe_vocab.tsv"
5854

5955
[encoder_input]
6056
class=model.sequence.EmbeddedSequence

tests/captioning.ini

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,8 @@ name="attention_sentence_encoder"
5959
encoder=<imagenet>
6060

6161
[decoder_vocabulary]
62-
class=vocabulary.from_dataset
63-
datasets=[<train_data>]
64-
series_ids=["target"]
65-
max_size=70
66-
save_file="tests/outputs/captioning/decoder_vocabulary.pickle"
67-
overwrite=True
62+
class=vocabulary.from_wordlist
63+
path="tests/data/decoder_vocab.tsv"
6864

6965
[decoder]
7066
class=decoders.decoder.Decoder

tests/classifier.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ data=["tests/data/val.tc.en", "tests/data/val.words", (processors.helpers.prepro
3434

3535
[encoder_vocabulary]
3636
class=vocabulary.from_wordlist
37-
path="tests/outputs/vocab/encoder_vocab.tsv"
37+
path="tests/data/encoder_vocab.tsv"
3838

3939
[encoder_rnn]
4040
class=encoders.recurrent.SentenceEncoder

tests/ctc.ini

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,10 @@ feature_type="mfcc"
4747
delta_order=2
4848

4949
[decoder_vocabulary]
50-
class=vocabulary.from_dataset
51-
datasets=[<train_data>]
52-
series_ids=["target"]
53-
max_size=5
54-
50+
class=vocabulary.from_wordlist
51+
path="tests/data/yesno/yesno.vocab"
52+
contains_header=False
53+
contains_frequencies=False
5554

5655
[audio_encoder]
5756
class=encoders.raw_rnn_encoder.RawRNNEncoder

tests/data/bpe_vocab.tsv

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
Word Word counts
2+
<pad> 1
3+
<s> 1
4+
</s> 1
5+
<unk> 1
6+
t@@ 1632
7+
wo 135
8+
y@@ 313
9+
o@@ 1501
10+
un@@ 535
11+
g 378
12+
, 663
13+
W@@ 122
14+
hi@@ 381
15+
te 167
16+
m@@ 1099
17+
al@@ 407
18+
es 340
19+
ar@@ 840
20+
e 1429
21+
u@@ 1316
22+
si@@ 506
23+
d@@ 1358
24+
n@@ 818
25+
e@@ 1515
26+
ar 104
27+
an@@ 379
28+
y 429
29+
b@@ 1493
30+
s@@ 1449
31+
h@@ 910
32+
. 1939
33+
v@@ 491
34+
er@@ 926
35+
al 55
36+
en 1559
37+
in 969
38+
d 716
39+
at@@ 217
40+
s 1079
41+
p@@ 1257
42+
ing 1045
43+
a 1814
44+
g@@ 1326
45+
i@@ 1415
46+
t 1452
47+
l@@ 1441
48+
st@@ 618
49+
em 160
50+
li@@ 276
51+
tt@@ 270
52+
le 344
53+
ir@@ 122
54+
l 324
55+
c@@ 740
56+
in@@ 500
57+
to 140
58+
wo@@ 187
59+
la@@ 340
60+
man 407
61+
r@@ 883
62+
is 273
63+
and@@ 190
64+
on 519
65+
der 326
66+
w@@ 882
67+
do@@ 178
68+
w 92
69+
at 181
70+
the 368
71+
re@@ 452
72+
f@@ 986
73+
th@@ 260
74+
er 798
75+
mi@@ 78
76+
en@@ 577
77+
k@@ 799
78+
el@@ 463
79+
on@@ 266
80+
lo@@ 189
81+
n 200
82+
et 165
83+
with 212
84+
ge 245
85+
ur@@ 145
86+
bo@@ 220
87+
of 217
88+
j@@ 192
89+
q@@ 17
90+
ur 28
91+
we@@ 153
92+
to@@ 83
93+
p 142
94+
a@@ 1716
95+
ck 169
96+
and 389
97+
sp@@ 181
98+
or@@ 277
99+
m 379
100+
ri@@ 196
101+
ra@@ 314
102+
or 158
103+
ss@@ 163
104+
ro@@ 546
105+
ch 255
106+
o 28
107+
ei@@ 376
108+
r 50
109+
ge@@ 158
110+
f 56
111+
c 23
112+
M@@ 284
113+
ro 1
114+
ch@@ 631
115+
k 160
116+
und 439
117+
u 72
118+
i 15
119+
J@@ 145
120+
B@@ 278
121+
ck@@ 267
122+
z@@ 501
123+
be@@ 151
124+
x@@ 45
125+
an 245
126+
A@@ 110
127+
wei@@ 94
128+
au@@ 299
129+
G@@ 185
130+
C@@ 26
131+
D@@ 49
132+
&@@ 28
133+
;@@ 20
134+
do 3
135+
T@@ 123
136+
V 3
137+
h 64
138+
th 21
139+
wi@@ 86
140+
di@@ 82
141+
eh@@ 311
142+
b 30
143+
-@@ 117
144+
ir 41
145+
3@@ 1
146+
0 1
147+
el 69
148+
S@@ 416
149+
x 6
150+
ein@@ 127
151+
F@@ 179
152+
P@@ 187
153+
R@@ 74
154+
; 10
155+
E@@ 69
156+
Z@@ 48
157+
hr@@ 198
158+
5 2
159+
C 1
160+
2 10
161+
( 6
162+
) 6
163+
lo 1
164+
Q 2
165+
3 2
166+
die 225
167+
1@@ 2
168+
I@@ 11
169+
H@@ 348
170+
U@@ 22
171+
A 1
172+
K@@ 222
173+
ein 735
174+
wei 127
175+
ß@@ 217
176+
ä@@ 581
177+
Fr@@ 32
178+
N@@ 49
179+
ü@@ 299
180+
sch@@ 350
181+
mit 292
182+
tz@@ 170
183+
z 32
184+
Mann 278
185+
einem 481
186+
auf 323
187+
einer 280
188+
L@@ 97
189+
ten 183
190+
eine 360
191+
einen 159
192+
ö@@ 61
193+
Frau 133
194+
ei 79
195+
Frau@@ 25
196+
O@@ 52
197+
Man@@ 7
198+
tz 15
199+
ß 10
200+
hr 12
201+
V@@ 22
202+
au 4
203+
sch 27
204+
Ü@@ 1
205+
3
206+
3
207+
Ö@@ 1
208+
é 2
209+
- 2
210+
: 1

0 commit comments

Comments
 (0)