Skip to content

Commit 04d281f

Browse files
authored
Merge pull request #38 from huggingface/extension
Code moved in Cython, removed speakers handling, integration in spaCy pipeline
2 parents 5a2520e + 4fd89ec commit 04d281f

File tree

130 files changed

+1686
-419
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

130 files changed

+1686
-419
lines changed

.gitignore

Lines changed: 62 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,63 @@
1-
*.pyc
2-
*#*
3-
*.DS_STORE
4-
*.log
5-
*Data.fs*
6-
*flymake*
7-
dist/*
8-
*egg*
9-
urllist*
10-
build/
11-
__pycache__/
1+
# NeuralCoref
2+
/neuralcoref/train/runs/*
3+
/models/
4+
5+
# Cython / C extensions
6+
cythonize.json
7+
neuralcoref/*.html
8+
*.cpp
9+
*.so
10+
11+
# Vim / VSCode / editors
12+
*.swp
13+
*.sw*
14+
Profile.prof
15+
.vscode
16+
.sass-cache
17+
18+
# Python
1219
/.Python
13-
/bin/
14-
/include/
15-
/lib/
16-
/pip-selfcheck.json
17-
/runs/*
18-
test_corefs.txt
19-
test_mentions.txt
20-
.cache
21-
/.vscode/*
22-
/.vscode
20+
.python-version
21+
__pycache__/
22+
*.py[cod]
23+
.env/
24+
.env*
25+
.~env/
26+
.venv
27+
venv/
28+
.dev
29+
.denv
30+
.pypyenv
31+
32+
# Temporary files
33+
*.~*
34+
tmp/
35+
36+
# Installer logs
37+
pip-log.txt
38+
pip-delete-this-directory.txt
39+
40+
# Distribution / packaging
41+
env/
42+
build/
43+
develop-eggs/
44+
dist/
45+
eggs/
46+
lib/
47+
lib64/
48+
parts/
49+
sdist/
50+
var/
51+
*.egg-info/
52+
.installed.cfg
53+
*.egg
54+
.eggs
55+
MANIFEST
56+
57+
# Windows
58+
*.bat
59+
Thumbs.db
60+
Desktop.ini
61+
62+
# Mac OS X
63+
*.DS_Store

LICENCE.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
MIT License
22

3-
Copyright (c) 2016 Huggingface Inc.
3+
Copyright (c) 2018 Huggingface Inc.
44

55
Permission is hereby granted, free of charge, to any person obtaining a copy
66
of this software and associated documentation files (the "Software"), to deal

MANIFEST

Lines changed: 0 additions & 7 deletions
This file was deleted.

MANIFEST.in

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
1-
include neuralcoref/*
2-
include neuralcoref/weights/*
1+
recursive-include include *.h
2+
include LICENSE
3+
include README.rst

bin/cythonize.py

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
#!/usr/bin/env python
2+
""" cythonize.py
3+
4+
Cythonize pyx files into C++ files as needed.
5+
6+
Usage: cythonize.py [root]
7+
8+
Checks pyx files to see if they have been changed relative to their
9+
corresponding C++ files. If they have, then runs cython on these files to
10+
recreate the C++ files.
11+
12+
Additionally, checks pxd files and setup.py if they have been changed. If
13+
they have, rebuilds everything.
14+
15+
Change detection based on file hashes stored in JSON format.
16+
17+
For now, this script should be run by developers when changing Cython files
18+
and the resulting C++ files checked in, so that end-users (and Python-only
19+
developers) do not get the Cython dependencies.
20+
21+
Based upon:
22+
23+
https://raw.github.com/dagss/private-scipy-refactor/cythonize/cythonize.py
24+
https://raw.githubusercontent.com/numpy/numpy/master/tools/cythonize.py
25+
26+
Note: this script does not check any of the dependent C++ libraries.
27+
"""
28+
from __future__ import print_function
29+
30+
import os
31+
import sys
32+
import json
33+
import hashlib
34+
import subprocess
35+
import argparse
36+
37+
38+
HASH_FILE = 'cythonize.json'
39+
40+
41+
def process_pyx(fromfile, tofile):
42+
print('Processing %s' % fromfile)
43+
try:
44+
from Cython.Compiler.Version import version as cython_version
45+
from distutils.version import LooseVersion
46+
if LooseVersion(cython_version) < LooseVersion('0.19'):
47+
raise Exception('Require Cython >= 0.19')
48+
49+
except ImportError:
50+
pass
51+
52+
flags = ['--fast-fail']
53+
if tofile.endswith('.cpp'):
54+
flags += ['--cplus']
55+
56+
try:
57+
try:
58+
r = subprocess.call(['cython'] + flags + ['-o', tofile, fromfile],
59+
env=os.environ) # See Issue #791
60+
if r != 0:
61+
raise Exception('Cython failed')
62+
except OSError:
63+
# There are ways of installing Cython that don't result in a cython
64+
# executable on the path, see gh-2397.
65+
r = subprocess.call([sys.executable, '-c',
66+
'import sys; from Cython.Compiler.Main import '
67+
'setuptools_main as main; sys.exit(main())'] + flags +
68+
['-o', tofile, fromfile])
69+
if r != 0:
70+
raise Exception('Cython failed')
71+
except OSError:
72+
raise OSError('Cython needs to be installed')
73+
74+
75+
def preserve_cwd(path, func, *args):
76+
orig_cwd = os.getcwd()
77+
try:
78+
os.chdir(path)
79+
func(*args)
80+
finally:
81+
os.chdir(orig_cwd)
82+
83+
84+
def load_hashes(filename):
85+
try:
86+
return json.load(open(filename))
87+
except (ValueError, IOError):
88+
return {}
89+
90+
91+
def save_hashes(hash_db, filename):
92+
with open(filename, 'w') as f:
93+
f.write(json.dumps(hash_db))
94+
95+
96+
def get_hash(path):
97+
return hashlib.md5(open(path, 'rb').read()).hexdigest()
98+
99+
100+
def hash_changed(base, path, db):
101+
full_path = os.path.normpath(os.path.join(base, path))
102+
return not get_hash(full_path) == db.get(full_path)
103+
104+
105+
def hash_add(base, path, db):
106+
full_path = os.path.normpath(os.path.join(base, path))
107+
db[full_path] = get_hash(full_path)
108+
109+
110+
def process(base, filename, db):
111+
root, ext = os.path.splitext(filename)
112+
if ext in ['.pyx', '.cpp']:
113+
if hash_changed(base, filename, db) or not os.path.isfile(os.path.join(base, root + '.cpp')):
114+
preserve_cwd(base, process_pyx, root + '.pyx', root + '.cpp')
115+
hash_add(base, root + '.cpp', db)
116+
hash_add(base, root + '.pyx', db)
117+
118+
119+
def check_changes(root, db):
120+
res = False
121+
new_db = {}
122+
123+
setup_filename = 'setup.py'
124+
hash_add('.', setup_filename, new_db)
125+
if hash_changed('.', setup_filename, db):
126+
res = True
127+
128+
for base, _, files in os.walk(root):
129+
for filename in files:
130+
if filename.endswith('.pxd'):
131+
hash_add(base, filename, new_db)
132+
if hash_changed(base, filename, db):
133+
res = True
134+
135+
if res:
136+
db.clear()
137+
db.update(new_db)
138+
return res
139+
140+
141+
def run(root):
142+
db = load_hashes(HASH_FILE)
143+
144+
try:
145+
check_changes(root, db)
146+
for base, _, files in os.walk(root):
147+
for filename in files:
148+
process(base, filename, db)
149+
finally:
150+
save_hashes(db, HASH_FILE)
151+
152+
153+
if __name__ == '__main__':
154+
parser = argparse.ArgumentParser(description='Cythonize pyx files into C++ files as needed')
155+
parser.add_argument('root', help='root directory')
156+
args = parser.parse_args()
157+
run(args.root)

examples/server.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
"""Coreference resolution server example.
4+
A simple server serving the coreference system.
5+
"""
6+
from __future__ import unicode_literals
7+
8+
import sys
9+
import json
10+
from wsgiref.simple_server import make_server
11+
import falcon
12+
import spacy
13+
14+
IS_PYTHON2 = int(sys.version[0]) == 2
15+
unicode_ = unicode if IS_PYTHON2 else str
16+
17+
class AllResource(object):
18+
def __init__(self):
19+
self.nlp = spacy.load('en_coref_sm')
20+
print("Server loaded")
21+
self.response = None
22+
23+
def on_get(self, req, resp):
24+
self.response = {}
25+
26+
text_param = req.get_param("text")
27+
if text_param is not None:
28+
text = ",".join(text_param) if isinstance(text_param, list) else text_param
29+
text = unicode_(text)
30+
doc = self.nlp(text)
31+
if doc._.has_coref:
32+
mentions = [{'start': span.start_char,
33+
'end': span.end_char,
34+
'text': span.text,
35+
'resolved': span._.coref_main_mention.text
36+
} for span in doc._.coref_mentions]
37+
clusters = list(list(span.text for span in cluster)
38+
for cluster in doc._.coref_clusters)
39+
resolved = doc._.coref_resolved
40+
self.response['mentions'] = mentions
41+
self.response['clusters'] = clusters
42+
self.response['resolved'] = resolved
43+
44+
resp.body = json.dumps(self.response)
45+
resp.content_type = 'application/json'
46+
resp.append_header('Access-Control-Allow-Origin', "*")
47+
resp.status = falcon.HTTP_200
48+
49+
if __name__ == '__main__':
50+
RESSOURCE = AllResource()
51+
APP = falcon.API()
52+
APP.add_route('/', RESSOURCE)
53+
HTTPD = make_server('0.0.0.0', 8000, APP)
54+
HTTPD.serve_forever()

neuralcoref/__init__.pxd

Whitespace-only changes.

neuralcoref/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,5 @@
1-
from .algorithm import Coref
1+
# coding: utf8
2+
from __future__ import unicode_literals
3+
from .neuralcoref import NeuralCoref
4+
5+
__all__ = ['NeuralCoref']

neuralcoref/bld.bat

Lines changed: 0 additions & 2 deletions
This file was deleted.

neuralcoref/build.sh

Lines changed: 0 additions & 1 deletion
This file was deleted.

neuralcoref/meta.yaml

Lines changed: 0 additions & 24 deletions
This file was deleted.

0 commit comments

Comments
 (0)