massquantity
diff --git a/‎libreco/__init__.py b/‎libreco/__init__.py
diff --git a/‎libreco/utils/__init__.py b/‎libreco/utils/__init__.py
diff --git a/‎libreco/utils/_similarities.cpp
Lines changed: 29931 additions & 0 deletions b/‎libreco/utils/_similarities.cpp
Lines changed: 29931 additions & 0 deletions
diff --git a/‎libreco/utils/_similarities.pyx
Lines changed: 514 additions & 0 deletions b/‎libreco/utils/_similarities.pyx
Lines changed: 514 additions & 0 deletions
diff --git a/‎libreco/utils/column_mapping.py
Lines changed: 54 additions & 0 deletions b/‎libreco/utils/column_mapping.py
Lines changed: 54 additions & 0 deletions
diff --git a/‎libreco/utils/exception.py
Lines changed: 9 additions & 0 deletions b/‎libreco/utils/exception.py
Lines changed: 9 additions & 0 deletions
diff --git a/‎libreco/utils/initializers.py
Lines changed: 48 additions & 0 deletions b/‎libreco/utils/initializers.py
Lines changed: 48 additions & 0 deletions
diff --git a/‎libreco/utils/misc.py
Lines changed: 100 additions & 0 deletions b/‎libreco/utils/misc.py
Lines changed: 100 additions & 0 deletions
@@ -0,0 +1,54 @@
+from collections import defaultdict, OrderedDict
+import numpy as np
+
+
+def col_name2index(user_col=None, item_col=None,
+                   sparse_col=None, dense_col=None):
+    # format: {column_family_name: {column_name: index}}
+    # if no such family, default format would be: {column_family_name: {[]: []}
+    name_mapping = defaultdict(OrderedDict)
+    if sparse_col:
+        sparse_col_dict = {col: i for i, col in enumerate(sparse_col)}
+        name_mapping["sparse_col"].update(sparse_col_dict)
+    if dense_col:
+        dense_col_dict = {col: i for i, col in enumerate(dense_col)}
+        name_mapping["dense_col"].update(dense_col_dict)
+
+    if user_col and sparse_col:
+        user_sparse_col = _extract_common_col(sparse_col, user_col)
+        for col in user_sparse_col:
+            name_mapping["user_sparse_col"].update(
+                {col: name_mapping["sparse_col"][col]}
+            )
+    if user_col and dense_col:
+        user_dense_col = _extract_common_col(dense_col, user_col)
+        for col in user_dense_col:
+            name_mapping["user_dense_col"].update(
+                {col: name_mapping["dense_col"][col]}
+            )
+
+    if item_col and sparse_col:
+        item_sparse_col = _extract_common_col(sparse_col, item_col)
+        for col in item_sparse_col:
+            name_mapping["item_sparse_col"].update(
+                {col: name_mapping["sparse_col"][col]}
+            )
+    if item_col and dense_col:
+        item_dense_col = _extract_common_col(dense_col, item_col)
+        for col in item_dense_col:
+            name_mapping["item_dense_col"].update(
+                {col: name_mapping["dense_col"][col]}
+            )
+
+    return name_mapping
+
+
+def _extract_common_col(col1, col2):
+    # np.intersect1d will return the sorted common column names,
+    # but we want to preserve the original order of common column in
+    # col1 and col2
+    common_col, indices_in_col1, _ = np.intersect1d(col1, col2,
+                                                    assume_unique=True,
+                                                    return_indices=True)
+    return common_col[np.lexsort((common_col, indices_in_col1))]
+
@@ -0,0 +1,9 @@
+class NotSamplingError(Exception):
+    """Exception related to sampling data
+
+    If client wants to use batch_sampling and then evaluate on the dataset,
+    but forgot to do whole data sampling beforehand, this exception will be
+    raised. Because in this case, unsampled data can't be evaluated.
+    """
+    pass
+
@@ -0,0 +1,48 @@
+import numpy as np
+
+
+def truncated_normal(shape, mean=0.0, scale=0.05):
+    total_num = np.multiply(*shape)
+    array = np.random.normal(mean, scale, total_num).astype(np.float32)
+    while True:
+        index = np.logical_and(
+            (array > mean + 2 * scale),
+            (array < mean - 2 * scale)
+        )
+        num = len(np.where(index)[0])
+        if num == 0:
+            break
+        array[index] = np.random.normal(mean, scale, num)
+    return array.reshape(*shape)
+
+
+def xavier_init(fan_in, fan_out):
+    std = np.sqrt(2.0 / (fan_in + fan_out))
+    return truncated_normal(mean=0.0, scale=std, shape=[fan_in, fan_out])
+
+
+def he_init(fan_in, fan_out):
+    std = 2.0 / np.sqrt(fan_in + fan_out)
+    # std = np.sqrt(2.0 / fan_in)
+    return truncated_normal(mean=0.0, scale=std, shape=[fan_in, fan_out])
+
+
+def variance_scaling(scala, fan_in=None, fan_out=None, mode="fan_in"):
+    """
+    xavier:  mode = "fan_average", scale = 1.0
+    he: mode = "fan_in", scale = 2.0
+    he2: mode = "fan_average", scale = 2.0
+    """
+    if mode == "fan_in":
+        std = np.sqrt(scala / fan_in)
+    elif mode == "fan_out":
+        std = np.sqrt(scala / fan_out)
+    elif mode == "fan_average":
+        std = np.sqrt(2.0 * scala / (fan_in + fan_out))
+    else:
+        raise ValueError(
+            "mode must be one of these: fan_in, fan_out, fan_average")
+    return truncated_normal(mean=0.0, scale=std, shape=[fan_in, fan_out])
+
+
+
@@ -0,0 +1,100 @@
+import functools
+import time
+from contextlib import contextmanager
+import numpy as np
+import tensorflow as tf
+
+
+def shuffle_data(length, *args):
+    mask = np.random.permutation(range(length))
+    return tuple(map(lambda x: x[mask], [*args]))
+
+
+def count_params():
+    total_params = np.sum(
+        [
+            np.prod(v.get_shape().as_list())
+            for v in tf.trainable_variables()
+        ]
+    )
+    embedding_params = np.sum(
+        [
+            np.prod(v.get_shape().as_list())
+            for v in tf.trainable_variables()
+            if 'feat' in v.name
+        ]
+    )
+    network_params = total_params - embedding_params
+    total_params = f"{total_params:,}"
+    embedding_params = f"{embedding_params:,}"
+    network_params = f"{network_params:,}"
+    print_params = (f"total params: "
+                    f"{colorize(total_params, 'yellow')} | " 
+                    f"embedding params: "
+                    f"{colorize(embedding_params, 'yellow')} | " 
+                    f"network params: "
+                    f"{colorize(network_params, 'yellow')}")
+    print(print_params)
+
+
+def time_func(func):
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        start = time.perf_counter()
+        result = func(*args, **kwargs)
+        end = time.perf_counter()
+        print(f"{func.__name__} elapsed: {(end - start):3.3f}s")
+        return result
+    return wrapper
+
+
+@contextmanager
+def time_block(block_name="block", verbose=1):
+    if verbose > 0:
+        start = time.perf_counter()
+        try:
+            yield
+        except Exception:
+            raise
+        else:
+            end = time.perf_counter()
+            print(f"{block_name} elapsed: {(end - start):3.3f}s")
+
+    else:
+        try:
+            yield
+        except Exception:
+            raise
+
+
+def colorize(string, color, bold=False, highlight=False):
+    """Return string surrounded by appropriate terminal color codes to
+    print colorized text.  Valid colors: gray, red, green, yellow,
+    blue, magenta, cyan, white, crimson
+
+    Original source from openAI `gym`:
+    https://github.com/openai/gym/blob/master/gym/utils/colorize.py
+    """
+
+    attr = []
+    num = color2num[color]
+    if highlight: num += 10
+    attr.append(str(num))
+    if bold: attr.append('1')
+    attrs = ';'.join(attr)
+    return '\x1b[%sm%s\x1b[0m' % (attrs, string)
+
+
+color2num = dict(
+    gray=30,
+    red=31,
+    green=32,
+    yellow=33,
+    blue=34,
+    magenta=35,
+    cyan=36,
+    white=37,
+    crimson=38
+)
+
+