Skip to content

Commit 86968f7

Browse files
authored
Merge pull request #838 from Kaggle/fix-allennlp
Fix allennlp test following major version upgrade
2 parents c703d33 + bd13e72 commit 86968f7

File tree

2 files changed

+6
-7
lines changed

2 files changed

+6
-7
lines changed

Dockerfile

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,8 +373,7 @@ RUN pip install bcolz && \
373373
pip install fastai && \
374374
pip install torchtext && \
375375
pip install allennlp && \
376-
# b/149359379 remove once allennlp 1.0 is released which won't cause a spacy downgrade.
377-
pip install spacy==2.2.3 && python -m spacy download en && python -m spacy download en_core_web_lg && \
376+
python -m spacy download en && python -m spacy download en_core_web_lg && \
378377
apt-get install -y ffmpeg && \
379378
/tmp/clean-layer.sh
380379

tests/test_allennlp.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
import unittest
22

3-
from allennlp.data.tokenizers import WordTokenizer
3+
from allennlp.data.tokenizers import SpacyTokenizer
44

55

66
class TestAllenNlp(unittest.TestCase):
77
# reference
88
# https://github.com/allenai/allennlp/blob/master/allennlp/tests/data/tokenizers/word_tokenizer_test.py
99
def test_passes_through_correctly(self):
10-
tokenizer = WordTokenizer(start_tokens=['@@', '%%'], end_tokens=['^^'])
10+
tokenizer = SpacyTokenizer()
1111
sentence = "this (sentence) has 'crazy' \"punctuation\"."
1212
tokens = [t.text for t in tokenizer.tokenize(sentence)]
13-
expected_tokens = ["@@", "%%", "this", "(", "sentence", ")", "has", "'", "crazy", "'", "\"",
14-
"punctuation", "\"", ".", "^^"]
15-
assert tokens == expected_tokens
13+
expected_tokens = ["this", "(", "sentence", ")", "has", "'", "crazy", "'", "\"",
14+
"punctuation", "\"", "."]
15+
self.assertSequenceEqual(tokens, expected_tokens)

0 commit comments

Comments
 (0)