Skip to content

Commit 68ee9b9

Browse files
committed
utils
1 parent 6c99444 commit 68ee9b9

14 files changed

+31585
-0
lines changed

libreco/__init__.py

Whitespace-only changes.

libreco/utils/__init__.py

Whitespace-only changes.

libreco/utils/_similarities.cpp

Lines changed: 29931 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

libreco/utils/_similarities.pyx

Lines changed: 514 additions & 0 deletions
Large diffs are not rendered by default.

libreco/utils/column_mapping.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
from collections import defaultdict, OrderedDict
2+
import numpy as np
3+
4+
5+
def col_name2index(user_col=None, item_col=None,
6+
sparse_col=None, dense_col=None):
7+
# format: {column_family_name: {column_name: index}}
8+
# if no such family, default format would be: {column_family_name: {[]: []}
9+
name_mapping = defaultdict(OrderedDict)
10+
if sparse_col:
11+
sparse_col_dict = {col: i for i, col in enumerate(sparse_col)}
12+
name_mapping["sparse_col"].update(sparse_col_dict)
13+
if dense_col:
14+
dense_col_dict = {col: i for i, col in enumerate(dense_col)}
15+
name_mapping["dense_col"].update(dense_col_dict)
16+
17+
if user_col and sparse_col:
18+
user_sparse_col = _extract_common_col(sparse_col, user_col)
19+
for col in user_sparse_col:
20+
name_mapping["user_sparse_col"].update(
21+
{col: name_mapping["sparse_col"][col]}
22+
)
23+
if user_col and dense_col:
24+
user_dense_col = _extract_common_col(dense_col, user_col)
25+
for col in user_dense_col:
26+
name_mapping["user_dense_col"].update(
27+
{col: name_mapping["dense_col"][col]}
28+
)
29+
30+
if item_col and sparse_col:
31+
item_sparse_col = _extract_common_col(sparse_col, item_col)
32+
for col in item_sparse_col:
33+
name_mapping["item_sparse_col"].update(
34+
{col: name_mapping["sparse_col"][col]}
35+
)
36+
if item_col and dense_col:
37+
item_dense_col = _extract_common_col(dense_col, item_col)
38+
for col in item_dense_col:
39+
name_mapping["item_dense_col"].update(
40+
{col: name_mapping["dense_col"][col]}
41+
)
42+
43+
return name_mapping
44+
45+
46+
def _extract_common_col(col1, col2):
47+
# np.intersect1d will return the sorted common column names,
48+
# but we want to preserve the original order of common column in
49+
# col1 and col2
50+
common_col, indices_in_col1, _ = np.intersect1d(col1, col2,
51+
assume_unique=True,
52+
return_indices=True)
53+
return common_col[np.lexsort((common_col, indices_in_col1))]
54+

libreco/utils/exception.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
class NotSamplingError(Exception):
2+
"""Exception related to sampling data
3+
4+
If client wants to use batch_sampling and then evaluate on the dataset,
5+
but forgot to do whole data sampling beforehand, this exception will be
6+
raised. Because in this case, unsampled data can't be evaluated.
7+
"""
8+
pass
9+

libreco/utils/initializers.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import numpy as np
2+
3+
4+
def truncated_normal(shape, mean=0.0, scale=0.05):
5+
total_num = np.multiply(*shape)
6+
array = np.random.normal(mean, scale, total_num).astype(np.float32)
7+
while True:
8+
index = np.logical_and(
9+
(array > mean + 2 * scale),
10+
(array < mean - 2 * scale)
11+
)
12+
num = len(np.where(index)[0])
13+
if num == 0:
14+
break
15+
array[index] = np.random.normal(mean, scale, num)
16+
return array.reshape(*shape)
17+
18+
19+
def xavier_init(fan_in, fan_out):
20+
std = np.sqrt(2.0 / (fan_in + fan_out))
21+
return truncated_normal(mean=0.0, scale=std, shape=[fan_in, fan_out])
22+
23+
24+
def he_init(fan_in, fan_out):
25+
std = 2.0 / np.sqrt(fan_in + fan_out)
26+
# std = np.sqrt(2.0 / fan_in)
27+
return truncated_normal(mean=0.0, scale=std, shape=[fan_in, fan_out])
28+
29+
30+
def variance_scaling(scala, fan_in=None, fan_out=None, mode="fan_in"):
31+
"""
32+
xavier: mode = "fan_average", scale = 1.0
33+
he: mode = "fan_in", scale = 2.0
34+
he2: mode = "fan_average", scale = 2.0
35+
"""
36+
if mode == "fan_in":
37+
std = np.sqrt(scala / fan_in)
38+
elif mode == "fan_out":
39+
std = np.sqrt(scala / fan_out)
40+
elif mode == "fan_average":
41+
std = np.sqrt(2.0 * scala / (fan_in + fan_out))
42+
else:
43+
raise ValueError(
44+
"mode must be one of these: fan_in, fan_out, fan_average")
45+
return truncated_normal(mean=0.0, scale=std, shape=[fan_in, fan_out])
46+
47+
48+

libreco/utils/misc.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import functools
2+
import time
3+
from contextlib import contextmanager
4+
import numpy as np
5+
import tensorflow as tf
6+
7+
8+
def shuffle_data(length, *args):
9+
mask = np.random.permutation(range(length))
10+
return tuple(map(lambda x: x[mask], [*args]))
11+
12+
13+
def count_params():
14+
total_params = np.sum(
15+
[
16+
np.prod(v.get_shape().as_list())
17+
for v in tf.trainable_variables()
18+
]
19+
)
20+
embedding_params = np.sum(
21+
[
22+
np.prod(v.get_shape().as_list())
23+
for v in tf.trainable_variables()
24+
if 'feat' in v.name
25+
]
26+
)
27+
network_params = total_params - embedding_params
28+
total_params = f"{total_params:,}"
29+
embedding_params = f"{embedding_params:,}"
30+
network_params = f"{network_params:,}"
31+
print_params = (f"total params: "
32+
f"{colorize(total_params, 'yellow')} | "
33+
f"embedding params: "
34+
f"{colorize(embedding_params, 'yellow')} | "
35+
f"network params: "
36+
f"{colorize(network_params, 'yellow')}")
37+
print(print_params)
38+
39+
40+
def time_func(func):
41+
@functools.wraps(func)
42+
def wrapper(*args, **kwargs):
43+
start = time.perf_counter()
44+
result = func(*args, **kwargs)
45+
end = time.perf_counter()
46+
print(f"{func.__name__} elapsed: {(end - start):3.3f}s")
47+
return result
48+
return wrapper
49+
50+
51+
@contextmanager
52+
def time_block(block_name="block", verbose=1):
53+
if verbose > 0:
54+
start = time.perf_counter()
55+
try:
56+
yield
57+
except Exception:
58+
raise
59+
else:
60+
end = time.perf_counter()
61+
print(f"{block_name} elapsed: {(end - start):3.3f}s")
62+
63+
else:
64+
try:
65+
yield
66+
except Exception:
67+
raise
68+
69+
70+
def colorize(string, color, bold=False, highlight=False):
71+
"""Return string surrounded by appropriate terminal color codes to
72+
print colorized text. Valid colors: gray, red, green, yellow,
73+
blue, magenta, cyan, white, crimson
74+
75+
Original source from openAI `gym`:
76+
https://github.com/openai/gym/blob/master/gym/utils/colorize.py
77+
"""
78+
79+
attr = []
80+
num = color2num[color]
81+
if highlight: num += 10
82+
attr.append(str(num))
83+
if bold: attr.append('1')
84+
attrs = ';'.join(attr)
85+
return '\x1b[%sm%s\x1b[0m' % (attrs, string)
86+
87+
88+
color2num = dict(
89+
gray=30,
90+
red=31,
91+
green=32,
92+
yellow=33,
93+
blue=34,
94+
magenta=35,
95+
cyan=36,
96+
white=37,
97+
crimson=38
98+
)
99+
100+

0 commit comments

Comments
 (0)