Skip to content

Commit 2cdf685

Browse files
committed
Merge branch 'release-2.3.0'
2 parents 1b8fdaf + b4d2cd2 commit 2cdf685

File tree

140 files changed

+14409
-6640
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

140 files changed

+14409
-6640
lines changed

.travis.yml

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,12 @@ before_install:
1212
- export PATH=/home/travis/miniconda2/bin:$PATH
1313
- conda update --yes conda
1414
install:
15-
- conda create --yes -n gensim-test python=$TRAVIS_PYTHON_VERSION pip atlas numpy scipy
15+
- conda create --yes -n gensim-test python=$TRAVIS_PYTHON_VERSION pip atlas numpy==1.11.3 scipy==0.18.1
1616
- source activate gensim-test
17-
- pip install pyemd
18-
- pip install annoy
19-
- pip install testfixtures
20-
- pip install unittest2
21-
- pip install scikit-learn
22-
- pip install Morfessor==2.0.2a4
2317
- python setup.py install
24-
script:
18+
- pip install .[test]
19+
script:
20+
- pip freeze
2521
- python setup.py test
2622
- pip install flake8
2723
- continuous_integration/travis/flake8_diff.sh

CHANGELOG.md

Lines changed: 251 additions & 140 deletions
Large diffs are not rendered by default.

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,8 @@ Adopters
135135
| Stillwater Supercomputing | <img src="http://www.stillwater-sc.com/img/stillwater-logo.png" width="100"> | [stillwater-sc.com](http://www.stillwater-sc.com/) | Document comprehension and association with word2vec |
136136
| Channel 4 | <img src="http://www.channel4.com/static/info/images/lib/c4logo_2015_info_corporate.jpg" width="100"> | [channel4.com](http://www.channel4.com/) | Recommendation engine |
137137
| Amazon | <img src="http://g-ec2.images-amazon.com/images/G/01/social/api-share/amazon_logo_500500._V323939215_.png" width="100"> | [amazon.com](http://www.amazon.com/) | Document similarity|
138+
| SiteGround Hosting | <img src="https://www.siteground.com/img/knox/logos/siteground.png" width="100"> | [siteground.com](https://www.siteground.com/) | An ensemble search engine which uses different embeddings models and similarities, including word2vec, WMD, and LDA. |
139+
| Juju | <img src="https://d5k1a84rm5hwo.cloudfront.net/img/juju_home_logo.png" width="100"> | [www.juju.com](http://www.juju.com/) | Provide non-obvious related job suggestions. |
138140

139141
-------
140142

appveyor.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ test_script:
6666
# installed library.
6767
- "mkdir empty_folder"
6868
- "cd empty_folder"
69-
- "pip install pyemd testfixtures unittest2 sklearn Morfessor==2.0.2a4"
70-
69+
- "pip install pyemd testfixtures sklearn Morfessor==2.0.2a4"
70+
- "pip freeze"
7171
- "python -c \"import nose; nose.main()\" -s -v gensim"
7272
# Move back to the project folder
7373
- "cd .."

continuous_integration/appveyor/requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
# fix the versions of numpy to force the use of numpy and scipy to use the whl
88
# of the rackspace folder instead of trying to install from more recent
99
# source tarball published on PyPI
10-
numpy==1.9.3
11-
scipy==0.16.0
10+
numpy==1.11.3
11+
scipy==0.18.1
1212
cython
1313
six >= 1.5.0
1414
smart_open >= 1.2.1

continuous_integration/travis/flake8_diff.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,10 @@ echo -e '\nRunning flake8 on the diff in the range' "$COMMIT_RANGE" \
115115
echo '--------------------------------------------------------------------------------'
116116

117117
# We ignore files from sklearn/externals.
118+
# Excluding vec files since they contain non-utf8 content and flake8 raises exception for non-utf8 input
118119
# We need the following command to exit with 0 hence the echo in case
119120
# there is no match
120-
MODIFIED_FILES="$(git diff --name-only $COMMIT_RANGE || echo "no_match")"
121+
MODIFIED_FILES="$(git diff --name-only $COMMIT_RANGE -- . ':(exclude)*.vec' || echo "no_match")"
121122

122123
check_files() {
123124
files="$1"
@@ -133,6 +134,6 @@ check_files() {
133134
if [[ "$MODIFIED_FILES" == "no_match" ]]; then
134135
echo "No file has been modified"
135136
else
136-
check_files "$(echo "$MODIFIED_FILES" )" "--ignore=E501,E731,E12,W503 --exclude=*.sh,*.md,*.yml,*.rst,*.ipynb"
137+
check_files "$(echo "$MODIFIED_FILES" )" "--ignore=E501,E731,E12,W503 --exclude=*.sh,*.md,*.yml,*.rst,*.ipynb,*.txt,*.csv,*.vec,Dockerfile*,*.c,*.pyx,*.inc"
137138
fi
138139
echo -e "No problem detected by flake8\n"

docker/Dockerfile

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
FROM ubuntu:16.04
2+
3+
MAINTAINER Parul Sethi <parul1sethi@gmail.com>
4+
5+
ENV GENSIM_REPOSITORY https://github.com/RaRe-Technologies/gensim.git
6+
ENV GENSIM_BRANCH develop
7+
8+
# Installs python, pip and setup tools (with fixed versions)
9+
RUN apt-get update \
10+
&& apt-get install -y \
11+
ant=1.9.6-1ubuntu1 \
12+
cmake=3.5.1-1ubuntu3 \
13+
default-jdk=2:1.8-56ubuntu2 \
14+
g++=4:5.3.1-1ubuntu1 \
15+
git=1:2.7.4-0ubuntu1 \
16+
libboost-all-dev=1.58.0.1ubuntu1 \
17+
libgsl-dev=2.1+dfsg-2 \
18+
mercurial=3.7.3-1ubuntu1 \
19+
python3=3.5.1-3 \
20+
python3-pip=8.1.1-2ubuntu0.4 \
21+
python3-setuptools=20.7.0-1 \
22+
python=2.7.11-1 \
23+
python-pip=8.1.1-2ubuntu0.4 \
24+
python-setuptools=20.7.0-1 \
25+
unzip=6.0-20ubuntu1 \
26+
wget=1.17.1-1ubuntu1.1 \
27+
subversion=1.9.3-2ubuntu1 \
28+
locales=2.23-0ubuntu9 \
29+
libopenblas-dev=0.2.18-1ubuntu1 \
30+
libboost-program-options-dev=1.58.0.1ubuntu1 \
31+
zlib1g-dev=1:1.2.8.dfsg-2ubuntu4.1
32+
33+
# Setup python language
34+
RUN locale-gen en_US.UTF-8
35+
ENV LANG en_US.UTF-8
36+
ENV LC_CTYPE en_US.UTF-8
37+
ENV LC_ALL en_US.UTF-8
38+
39+
# Upgrade pip
40+
RUN pip2 install --upgrade pip
41+
RUN pip3 install --upgrade pip
42+
43+
# Install dependencies
44+
RUN pip2 install \
45+
cython==0.25.2 \
46+
jupyter==1.0.0 \
47+
matplotlib==2.0.0 \
48+
nltk==3.2.2 \
49+
pandas==0.19.2 \
50+
spacy==1.8.1 \
51+
git+https://github.com/mila-udem/blocks.git@7beb788f1fcfc78d56c59a5edf9b4e8d98f8d7d9 \
52+
-r https://raw.githubusercontent.com/mila-udem/blocks/stable/requirements.txt
53+
54+
RUN pip3 install \
55+
cython==0.25.2 \
56+
jupyter==1.0.0 \
57+
matplotlib==2.0.0 \
58+
nltk==3.2.2 \
59+
pandas==0.19.2 \
60+
spacy==1.8.1 \
61+
git+https://github.com/mila-udem/blocks.git@7beb788f1fcfc78d56c59a5edf9b4e8d98f8d7d9 \
62+
-r https://raw.githubusercontent.com/mila-udem/blocks/stable/requirements.txt
63+
64+
# avoid using old numpy version installed by blocks requirements
65+
RUN pip2 install -U numpy
66+
RUN pip3 install -U numpy
67+
68+
# Download english model of Spacy
69+
RUN python2 -m spacy download en
70+
RUN python3 -m spacy download en
71+
72+
# Download gensim from Github
73+
RUN git clone $GENSIM_REPOSITORY \
74+
&& cd /gensim \
75+
&& git checkout $GENSIM_BRANCH \
76+
&& pip2 install .[test] \
77+
&& python2 setup.py install \
78+
&& pip3 install .[test] \
79+
&& python3 setup.py install
80+
81+
# Create gensim dependencies directory
82+
RUN mkdir /gensim/gensim_dependencies
83+
84+
# Set ENV variables for wrappers
85+
ENV WR_HOME /gensim/gensim_dependencies/wordrank
86+
ENV FT_HOME /gensim/gensim_dependencies/fastText
87+
ENV MALLET_HOME /gensim/gensim_dependencies/mallet
88+
ENV DTM_PATH /gensim/gensim_dependencies/dtm/dtm/main
89+
ENV VOWPAL_WABBIT_PATH /gensim/gensim_dependencies/vowpal_wabbit/vowpalwabbit/vw
90+
91+
# For fixed version downloads of gensim wrappers dependencies
92+
ENV WORDRANK_VERSION 44f3f7786f76c79c083dfad9d64e20bacfb4a0b0
93+
ENV FASTTEXT_VERSION f24a781021862f0e475a5fb9c55b7c1cec3b6e2e
94+
ENV MORPHOLOGICALPRIORSFORWORDEMBEDDINGS_VERSION ec2e37a3bcb8bd7b56b75b043c47076bc5decf22
95+
ENV DTM_VERSION 67139e6f526b2bc33aef56dc36176a1b8b210056
96+
ENV MALLET_VERSION 2.0.8
97+
ENV VOWPAL_WABBIT_VERSION 69ecc2847fa0c876c6e0557af409f386f0ced59a
98+
99+
# Install custom dependencies
100+
101+
# Install mpich (a wordrank dependency) and remove openmpi to avoid mpirun conflict
102+
RUN apt-get purge -y openmpi-common openmpi-bin libopenmpi1.10
103+
RUN apt-get install -y mpich
104+
105+
# Install wordrank
106+
RUN cd /gensim/gensim_dependencies \
107+
&& git clone https://bitbucket.org/shihaoji/wordrank \
108+
&& cd /gensim/gensim_dependencies/wordrank \
109+
&& git checkout $WORDRANK_VERSION \
110+
&& sed -i -e 's/#export CC=gcc CXX=g++/export CC=gcc CXX=g++/g' install.sh \
111+
&& sh ./install.sh
112+
113+
# Install fastText
114+
RUN cd /gensim/gensim_dependencies \
115+
&& git clone https://github.com/facebookresearch/fastText.git \
116+
&& cd /gensim/gensim_dependencies/fastText \
117+
&& git checkout $FASTTEXT_VERSION \
118+
&& make
119+
120+
# Install MorphologicalPriorsForWordEmbeddings
121+
RUN cd /gensim/gensim_dependencies \
122+
&& git clone https://github.com/rguthrie3/MorphologicalPriorsForWordEmbeddings.git \
123+
&& cd /gensim/gensim_dependencies/MorphologicalPriorsForWordEmbeddings \
124+
&& git checkout $MORPHOLOGICALPRIORSFORWORDEMBEDDINGS_VERSION
125+
126+
# Install DTM
127+
RUN cd /gensim/gensim_dependencies \
128+
&& git clone https://github.com/blei-lab/dtm.git \
129+
&& cd /gensim/gensim_dependencies/dtm/dtm \
130+
&& git checkout $DTM_VERSION \
131+
&& make
132+
133+
# Install Mallet
134+
RUN mkdir /gensim/gensim_dependencies/mallet \
135+
&& mkdir /gensim/gensim_dependencies/download \
136+
&& cd /gensim/gensim_dependencies/download \
137+
&& wget --quiet http://mallet.cs.umass.edu/dist/mallet-$MALLET_VERSION.zip \
138+
&& unzip mallet-$MALLET_VERSION.zip \
139+
&& mv ./mallet-$MALLET_VERSION/* /gensim/gensim_dependencies/mallet \
140+
&& rm -rf /gensim/gensim_dependencies/download \
141+
&& cd /gensim/gensim_dependencies/mallet \
142+
&& ant
143+
144+
# Install Vowpal wabbit
145+
RUN cd /gensim/gensim_dependencies \
146+
&& git clone https://github.com/JohnLangford/vowpal_wabbit.git \
147+
&& cd /gensim/gensim_dependencies/vowpal_wabbit \
148+
&& git checkout $VOWPAL_WABBIT_VERSION \
149+
&& make \
150+
&& make install
151+
152+
# Start gensim
153+
154+
# Run check script
155+
RUN python2 /gensim/docker/check_fast_version.py
156+
RUN python3 /gensim/docker/check_fast_version.py
157+
158+
# Add running permission to startup script
159+
RUN chmod +x /gensim/docker/start_jupyter_notebook.sh
160+
161+
# Define the starting command for this container and expose its running port
162+
CMD sh -c '/gensim/docker/start_jupyter_notebook.sh 9000'
163+
EXPOSE 9000

docker/README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Build gensim image
2+
3+
In docker directory run the following command to build the image locally:
4+
5+
```
6+
docker build -t gensim .
7+
```
8+
9+
# Run ipython notebook with installed gensim
10+
11+
Just execute:
12+
13+
```
14+
docker run -p 9000:9000 gensim
15+
```
16+
17+
# Run the interactive bash mode
18+
19+
```
20+
docker run -it gensim /bin/bash
21+
```

docker/check_fast_version.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import sys
2+
3+
try:
4+
from gensim.models.word2vec_inner import FAST_VERSION
5+
6+
print('FAST_VERSION ok ! Retrieved with value ', FAST_VERSION)
7+
sys.exit()
8+
except ImportError:
9+
print('Failed... fall back to plain numpy (20-80x slower training than the above)')
10+
sys.exit(-1)

docker/start_jupyter_notebook.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
3+
PORT=$1
4+
NOTEBOOK_DIR=/gensim/docs/notebooks
5+
DEFAULT_URL=/notebooks/gensim%20Quick%20Start.ipynb
6+
7+
jupyter notebook --no-browser --ip=* --port=$PORT --allow-root --notebook-dir=$NOTEBOOK_DIR --NotebookApp.token=\"\" --NotebookApp.default_url=$DEFAULT_URL

0 commit comments

Comments
 (0)