|
| 1 | +#----------------------------------------------------------------------------- |
| 2 | +# Copyright (c) 2017, PyStan developers |
| 3 | +# |
| 4 | +# This file is licensed under Version 3.0 of the GNU General Public |
| 5 | +# License. See LICENSE for a text of the license. |
| 6 | +#----------------------------------------------------------------------------- |
| 7 | + |
| 8 | +import numpy as np |
| 9 | +import re |
| 10 | +import pkg_resources |
| 11 | +import io |
| 12 | + |
| 13 | +lookuptable = None |
| 14 | +stanftable = None |
| 15 | + |
| 16 | +def lookup(name, min_similarity_ratio=.75): |
| 17 | + """ |
| 18 | + Look up for a Stan function with similar functionality to a Python |
| 19 | + function (or even an R function, see examples). If the function is |
| 20 | + not present on the lookup table, then attempts to find similar one |
| 21 | + and prints the results. This function requires package `pandas`. |
| 22 | +
|
| 23 | + Parameters |
| 24 | + ----------- |
| 25 | + name : str |
| 26 | + Name of the function one wants to look for. |
| 27 | + min_similarity_ratio : float |
| 28 | + In case no exact match is found on the lookup table, the |
| 29 | + function will attempt to find similar names using |
| 30 | + `difflib.SequenceMatcher.ratio()`, and then results with |
| 31 | + calculated ratio below `min_similarity_ratio` will be discarded. |
| 32 | +
|
| 33 | + Examples |
| 34 | + --------- |
| 35 | + #Look up for a Stan function similar to scipy.stats.skewnorm |
| 36 | + lookup("scipy.stats.skewnorm") |
| 37 | + #Look up for a Stan function similar to R dnorm |
| 38 | + lookup("R.dnorm") |
| 39 | + #Look up for a Stan function similar to numpy.hstack |
| 40 | + lookup("numpy.hstack") |
| 41 | + #List Stan log probability mass functions |
| 42 | + lookup("lpmfs") |
| 43 | + #List Stan log cumulative density functions |
| 44 | + lookup("lcdfs") |
| 45 | +
|
| 46 | + Returns |
| 47 | + --------- |
| 48 | + A pandas.core.frame.DataFrame if exact or at least one similar |
| 49 | + result is found, None otherwise. |
| 50 | + """ |
| 51 | + if lookuptable is None: |
| 52 | + build() |
| 53 | + if name not in lookuptable.keys(): |
| 54 | + from difflib import SequenceMatcher |
| 55 | + from operator import itemgetter |
| 56 | + print("No match for " + name + " in the lookup table.") |
| 57 | + |
| 58 | + lkt_keys = list(lookuptable.keys()) |
| 59 | + mapfunction = lambda x: SequenceMatcher(a=name, b=x).ratio() |
| 60 | + similars = list(map(mapfunction, lkt_keys)) |
| 61 | + similars = zip(range(len(similars)), similars) |
| 62 | + similars = list(filter(lambda x: x[1] >= min_similarity_ratio, |
| 63 | + similars)) |
| 64 | + similars = sorted(similars, key=itemgetter(1)) |
| 65 | + |
| 66 | + if (len(similars)): |
| 67 | + print("But the following similar entries were found: ") |
| 68 | + for i in range(len(similars)): |
| 69 | + print(lkt_keys[similars[i][0]] + " ===> with similary " |
| 70 | + "ratio of " + str(round(similars[i][1], 3)) + "") |
| 71 | + print("Will return results for entry" |
| 72 | + " " + lkt_keys[similars[i][0]] + " " |
| 73 | + "(which is the most similar entry found).") |
| 74 | + return lookup(lkt_keys[similars[i][0]]) |
| 75 | + else: |
| 76 | + print("And no similar entry found. You may try to decrease" |
| 77 | + "the min_similarity_ratio parameter.") |
| 78 | + return |
| 79 | + entries = stanftable[lookuptable[name]] |
| 80 | + if not len(entries): |
| 81 | + return "Found no equivalent Stan function available for " + name |
| 82 | + |
| 83 | + try: |
| 84 | + import pandas as pd |
| 85 | + except ImportError: |
| 86 | + raise ImportError('Package pandas is require to use this ' |
| 87 | + 'function.') |
| 88 | + |
| 89 | + return pd.DataFrame(entries) |
| 90 | + |
| 91 | + |
| 92 | + |
| 93 | +def build(): |
| 94 | + def load_table_file(fname): |
| 95 | + fname = "lookuptable/" + fname |
| 96 | + fbytes = pkg_resources.resource_string(__name__, fname) |
| 97 | + return io.BytesIO(fbytes) |
| 98 | + stanfunctions_file = load_table_file("stan-functions.txt") |
| 99 | + rfunctions_file = load_table_file("R.txt") |
| 100 | + pythontb_file = load_table_file("python.txt") |
| 101 | + |
| 102 | + stanftb = np.genfromtxt(stanfunctions_file, delimiter=';', |
| 103 | + names=True, skip_header=True, |
| 104 | + dtype=['<U200','<U200','<U200' ,"int"]) |
| 105 | + rpl_textbar = np.vectorize(lambda x: x.replace("\\textbar \\", "|")) |
| 106 | + stanftb['Arguments'] = rpl_textbar(stanftb['Arguments']) |
| 107 | + |
| 108 | + StanFunction = stanftb["StanFunction"] |
| 109 | + |
| 110 | + #Auto-extract R functions |
| 111 | + rmatches = [re.findall(r'(' |
| 112 | + '(?<=RFunction\[StanFunction == \").+?(?=\")' |
| 113 | + '|(?<=grepl\(").+?(?=", StanFunction\))' |
| 114 | + '|(?<= \<\- ").+?(?="\)))' |
| 115 | + '|NA\_character\_', l.decode("utf-8")) |
| 116 | + for l in rfunctions_file] |
| 117 | + tomatch = list(filter(lambda x: len(x) == 2, rmatches)) |
| 118 | + tomatch = np.array(tomatch, dtype=str) |
| 119 | + tomatch[:, 1] = np.vectorize(lambda x: "R." + x)(tomatch[:,1]) |
| 120 | + |
| 121 | + #Get packages lookup table for Python packages |
| 122 | + pymatches = np.genfromtxt(pythontb_file, delimiter='; ', dtype=str) |
| 123 | + tomatch = np.vstack((tomatch, pymatches)) |
| 124 | + |
| 125 | + lookuptb = dict() |
| 126 | + for i in range(tomatch.shape[0]): |
| 127 | + matchedlines = np.vectorize(lambda x: re.match(tomatch[i, 0], |
| 128 | + x))(StanFunction) |
| 129 | + lookuptb[tomatch[i, 1]] = np.where(matchedlines)[0] |
| 130 | + |
| 131 | + #debug: list of rmatches that got wrong |
| 132 | + #print(list(filter(lambda x: len(x) != 2 and len(x) != 0, |
| 133 | + # rmatches))) |
| 134 | + |
| 135 | + #debug: list of nodes without matches on lookup table |
| 136 | + #for k in lookuptb: |
| 137 | + # if len(lookuptb[k]) == 0: |
| 138 | + # print(k) |
| 139 | + global lookuptable |
| 140 | + global stanftable |
| 141 | + |
| 142 | + stanftable = stanftb |
| 143 | + lookuptable = lookuptb |
0 commit comments