Skip to content
This repository was archived by the owner on Mar 19, 2021. It is now read-only.

Commit 7470ce2

Browse files
authored
Merge pull request #337 from stan-dev/feature/issue-321-lookuptable
Feature/issue 321 lookuptable
2 parents 58c43e8 + 9db8a4d commit 7470ce2

File tree

8 files changed

+1127
-3
lines changed

8 files changed

+1127
-3
lines changed

continuous_integration/install.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ export PATH=$HOME/miniconda3/bin:$PATH
2424
# Update conda itself
2525
conda update --yes --quiet conda
2626
PYTHON_VERSION_MAJOR=${TRAVIS_PYTHON_VERSION:0:1}
27-
if [[ $PYTHON_VERSION_MAJOR == '2' ]]; then conda create --quiet --yes -n env_name python=$TRAVIS_PYTHON_VERSION pip Cython=0.22 numpy=1.7 scipy nose matplotlib; fi
28-
if [[ $PYTHON_VERSION_MAJOR == '3' ]]; then conda create --quiet --yes -n env_name python=$TRAVIS_PYTHON_VERSION pip Cython numpy scipy nose matplotlib; fi
27+
if [[ $PYTHON_VERSION_MAJOR == '2' ]]; then conda create --quiet --yes -n env_name python=$TRAVIS_PYTHON_VERSION pip Cython=0.22 numpy=1.7 scipy nose matplotlib pandas; fi
28+
if [[ $PYTHON_VERSION_MAJOR == '3' ]]; then conda create --quiet --yes -n env_name python=$TRAVIS_PYTHON_VERSION pip Cython numpy scipy nose matplotlib pandas; fi
2929
source activate env_name
3030
python -c "import numpy"
3131

pystan/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from pystan.api import stanc, stan
1010
from pystan.misc import read_rdump, stan_rdump
1111
from pystan.model import StanModel
12+
from pystan.lookup import lookup
1213

1314
logger = logging.getLogger('pystan')
1415
logger.addHandler(logging.NullHandler())

pystan/lookup.py

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
#-----------------------------------------------------------------------------
2+
# Copyright (c) 2017, PyStan developers
3+
#
4+
# This file is licensed under Version 3.0 of the GNU General Public
5+
# License. See LICENSE for a text of the license.
6+
#-----------------------------------------------------------------------------
7+
8+
import numpy as np
9+
import re
10+
import pkg_resources
11+
import io
12+
13+
lookuptable = None
14+
stanftable = None
15+
16+
def lookup(name, min_similarity_ratio=.75):
17+
"""
18+
Look up for a Stan function with similar functionality to a Python
19+
function (or even an R function, see examples). If the function is
20+
not present on the lookup table, then attempts to find similar one
21+
and prints the results. This function requires package `pandas`.
22+
23+
Parameters
24+
-----------
25+
name : str
26+
Name of the function one wants to look for.
27+
min_similarity_ratio : float
28+
In case no exact match is found on the lookup table, the
29+
function will attempt to find similar names using
30+
`difflib.SequenceMatcher.ratio()`, and then results with
31+
calculated ratio below `min_similarity_ratio` will be discarded.
32+
33+
Examples
34+
---------
35+
#Look up for a Stan function similar to scipy.stats.skewnorm
36+
lookup("scipy.stats.skewnorm")
37+
#Look up for a Stan function similar to R dnorm
38+
lookup("R.dnorm")
39+
#Look up for a Stan function similar to numpy.hstack
40+
lookup("numpy.hstack")
41+
#List Stan log probability mass functions
42+
lookup("lpmfs")
43+
#List Stan log cumulative density functions
44+
lookup("lcdfs")
45+
46+
Returns
47+
---------
48+
A pandas.core.frame.DataFrame if exact or at least one similar
49+
result is found, None otherwise.
50+
"""
51+
if lookuptable is None:
52+
build()
53+
if name not in lookuptable.keys():
54+
from difflib import SequenceMatcher
55+
from operator import itemgetter
56+
print("No match for " + name + " in the lookup table.")
57+
58+
lkt_keys = list(lookuptable.keys())
59+
mapfunction = lambda x: SequenceMatcher(a=name, b=x).ratio()
60+
similars = list(map(mapfunction, lkt_keys))
61+
similars = zip(range(len(similars)), similars)
62+
similars = list(filter(lambda x: x[1] >= min_similarity_ratio,
63+
similars))
64+
similars = sorted(similars, key=itemgetter(1))
65+
66+
if (len(similars)):
67+
print("But the following similar entries were found: ")
68+
for i in range(len(similars)):
69+
print(lkt_keys[similars[i][0]] + " ===> with similary "
70+
"ratio of " + str(round(similars[i][1], 3)) + "")
71+
print("Will return results for entry"
72+
" " + lkt_keys[similars[i][0]] + " "
73+
"(which is the most similar entry found).")
74+
return lookup(lkt_keys[similars[i][0]])
75+
else:
76+
print("And no similar entry found. You may try to decrease"
77+
"the min_similarity_ratio parameter.")
78+
return
79+
entries = stanftable[lookuptable[name]]
80+
if not len(entries):
81+
return "Found no equivalent Stan function available for " + name
82+
83+
try:
84+
import pandas as pd
85+
except ImportError:
86+
raise ImportError('Package pandas is require to use this '
87+
'function.')
88+
89+
return pd.DataFrame(entries)
90+
91+
92+
93+
def build():
94+
def load_table_file(fname):
95+
fname = "lookuptable/" + fname
96+
fbytes = pkg_resources.resource_string(__name__, fname)
97+
return io.BytesIO(fbytes)
98+
stanfunctions_file = load_table_file("stan-functions.txt")
99+
rfunctions_file = load_table_file("R.txt")
100+
pythontb_file = load_table_file("python.txt")
101+
102+
stanftb = np.genfromtxt(stanfunctions_file, delimiter=';',
103+
names=True, skip_header=True,
104+
dtype=['<U200','<U200','<U200' ,"int"])
105+
rpl_textbar = np.vectorize(lambda x: x.replace("\\textbar \\", "|"))
106+
stanftb['Arguments'] = rpl_textbar(stanftb['Arguments'])
107+
108+
StanFunction = stanftb["StanFunction"]
109+
110+
#Auto-extract R functions
111+
rmatches = [re.findall(r'('
112+
'(?<=RFunction\[StanFunction == \").+?(?=\")'
113+
'|(?<=grepl\(").+?(?=", StanFunction\))'
114+
'|(?<= \<\- ").+?(?="\)))'
115+
'|NA\_character\_', l.decode("utf-8"))
116+
for l in rfunctions_file]
117+
tomatch = list(filter(lambda x: len(x) == 2, rmatches))
118+
tomatch = np.array(tomatch, dtype=str)
119+
tomatch[:, 1] = np.vectorize(lambda x: "R." + x)(tomatch[:,1])
120+
121+
#Get packages lookup table for Python packages
122+
pymatches = np.genfromtxt(pythontb_file, delimiter='; ', dtype=str)
123+
tomatch = np.vstack((tomatch, pymatches))
124+
125+
lookuptb = dict()
126+
for i in range(tomatch.shape[0]):
127+
matchedlines = np.vectorize(lambda x: re.match(tomatch[i, 0],
128+
x))(StanFunction)
129+
lookuptb[tomatch[i, 1]] = np.where(matchedlines)[0]
130+
131+
#debug: list of rmatches that got wrong
132+
#print(list(filter(lambda x: len(x) != 2 and len(x) != 0,
133+
# rmatches)))
134+
135+
#debug: list of nodes without matches on lookup table
136+
#for k in lookuptb:
137+
# if len(lookuptb[k]) == 0:
138+
# print(k)
139+
global lookuptable
140+
global stanftable
141+
142+
stanftable = stanftb
143+
lookuptable = lookuptb

0 commit comments

Comments
 (0)