Skip to content

Commit 282261b

Browse files
authored
iALS tuning strategy of doubling dimensions (#95)
* - Implement doubling dimension strategy. - Use `Sequence[Suggestion]` instead of `List[Suggestion]` for covariance. * Add test for doubling dimension tuning. * - Add example of doubling dimension strategy. - increment `n_trials_following` default value. * Added docstrings
1 parent 2e7fd31 commit 282261b

File tree

9 files changed

+305
-92
lines changed

9 files changed

+305
-92
lines changed

examples/movielens/movielens_20m_cold.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
import pandas as pd
55
from scipy import sparse as sps
66

7+
from irspack import EvaluatorWithColdUser, IALSRecommender
78
from irspack.dataset.movielens import MovieLens20MDataManager
8-
from irspack.evaluator import EvaluatorWithColdUser
99
from irspack.optimizers import (
1010
AsymmetricCosineKNNOptimizer,
1111
BaseOptimizer,
@@ -69,7 +69,6 @@
6969
(AsymmetricCosineKNNOptimizer, 40, dict()),
7070
(P3alphaOptimizer, 30, dict(alpha=1)),
7171
(RP3betaOptimizer, 40, dict(alpha=1)),
72-
(IALSOptimizer, 40, dict()),
7372
(DenseSLIMOptimizer, 20, dict()),
7473
(
7574
MultVAEOptimizer,
@@ -87,7 +86,9 @@
8786
valid_evaluator,
8887
fixed_params=config,
8988
)
90-
(best_param, validation_result_df) = optimizer.optimize(n_trials=n_trials)
89+
(best_param, validation_result_df) = optimizer.optimize(
90+
n_trials=n_trials, random_seed=0
91+
)
9192
validation_result_df["recommender_name"] = recommender_name
9293
validation_results.append(validation_result_df)
9394
pd.concat(validation_results).to_csv(f"validation_scores.csv")
@@ -100,3 +101,33 @@
100101
)
101102
with open("test_results.json", "w") as ofs:
102103
json.dump(test_results, ofs, indent=2)
104+
105+
# Tuning following the strategy of
106+
# "Revisiting the Performance of iALS on Item Recommendation Benchmarks"
107+
# https://arxiv.org/abs/2110.14037
108+
ials_optimizer = IALSOptimizer(
109+
data_train.X_all,
110+
valid_evaluator,
111+
)
112+
(
113+
best_param_ials,
114+
validation_result_df_ials,
115+
) = ials_optimizer.optimize_doubling_dimension(
116+
initial_dimension=128,
117+
maximal_dimension=1024,
118+
random_seed=0,
119+
n_trials_initial=80,
120+
n_trials_following=40,
121+
)
122+
validation_result_df_ials["recommender_name"] = "IALSRecommender"
123+
validation_results.append(validation_result_df_ials)
124+
pd.concat(validation_results).to_csv(f"validation_scores.csv")
125+
test_recommender_ials = IALSRecommender(X_train_val_all, **best_param_ials)
126+
test_recommender_ials.learn()
127+
test_scores_ials = test_evaluator.get_scores(test_recommender_ials, [20, 50, 100])
128+
129+
test_results.append(
130+
dict(name="IALSRecommender", best_param=best_param_ials, **test_scores_ials)
131+
)
132+
with open("test_results.json", "w") as ofs:
133+
json.dump(test_results, ofs, indent=2)

irspack/optimizers/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
CosineUserKNNOptimizer,
88
DenseSLIMOptimizer,
99
EDLAEOptimizer,
10-
IALSOptimizer,
1110
JaccardKNNOptimizer,
1211
P3alphaOptimizer,
1312
RP3betaOptimizer,
@@ -17,6 +16,7 @@
1716
)
1817
from irspack.optimizers.autopilot import autopilot
1918
from irspack.optimizers.base_optimizer import get_optimizer_class
19+
from irspack.optimizers.ials import IALSOptimizer
2020

2121
__all__ = [
2222
"BaseOptimizer",

irspack/optimizers/_optimizers.py

Lines changed: 13 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
1-
import logging
2-
from typing import Any, Dict, List, Optional
1+
from typing import Sequence
32

43
from irspack.definitions import InteractionMatrix
54

6-
from ..evaluator import Evaluator
75
from ..optimizers.base_optimizer import (
86
BaseOptimizer,
97
BaseOptimizerWithEarlyStopping,
@@ -23,7 +21,6 @@
2321
CosineUserKNNRecommender,
2422
DenseSLIMRecommender,
2523
EDLAERecommender,
26-
IALSRecommender,
2724
JaccardKNNRecommender,
2825
P3alphaRecommender,
2926
RP3betaRecommender,
@@ -51,64 +48,24 @@ def _get_maximal_n_components_for_budget(
5148

5249

5350
class TopPopOptimizer(BaseOptimizer):
54-
default_tune_range: List[Suggestion] = []
51+
default_tune_range: Sequence[Suggestion] = []
5552
recommender_class = TopPopRecommender
5653

5754
@classmethod
5855
def tune_range_given_memory_budget(
5956
cls, X: InteractionMatrix, memory_budget: int
60-
) -> List[Suggestion]:
57+
) -> Sequence[Suggestion]:
6158
return []
6259

6360

64-
class IALSOptimizer(BaseOptimizerWithEarlyStopping):
65-
default_tune_range = [
66-
IntegerSuggestion("n_components", 4, 300),
67-
LogUniformSuggestion("alpha0", 3e-3, 1),
68-
LogUniformSuggestion("reg", 1e-4, 1e-1),
69-
]
70-
recommender_class = IALSRecommender
71-
72-
def __init__(
73-
self,
74-
data: InteractionMatrix,
75-
val_evaluator: Evaluator,
76-
logger: Optional[logging.Logger] = None,
77-
suggest_overwrite: List[Suggestion] = [],
78-
fixed_params: Dict[str, Any] = {},
79-
max_epoch: int = 16,
80-
validate_epoch: int = 1,
81-
score_degradation_max: int = 5,
82-
):
83-
super().__init__(
84-
data,
85-
val_evaluator,
86-
logger=logger,
87-
suggest_overwrite=suggest_overwrite,
88-
fixed_params=fixed_params,
89-
max_epoch=max_epoch,
90-
validate_epoch=validate_epoch,
91-
score_degradation_max=score_degradation_max,
92-
)
93-
94-
@classmethod
95-
def tune_range_given_memory_budget(
96-
cls, X: InteractionMatrix, memory_budget: int
97-
) -> List[Suggestion]:
98-
n_components = _get_maximal_n_components_for_budget(X, memory_budget, 300)
99-
return [
100-
IntegerSuggestion("n_components", 4, n_components),
101-
]
102-
103-
10461
class DenseSLIMOptimizer(BaseOptimizer):
105-
default_tune_range: List[Suggestion] = [LogUniformSuggestion("reg", 1, 1e4)]
62+
default_tune_range: Sequence[Suggestion] = [LogUniformSuggestion("reg", 1, 1e4)]
10663
recommender_class = DenseSLIMRecommender
10764

10865
@classmethod
10966
def tune_range_given_memory_budget(
11067
cls, X: InteractionMatrix, memory_budget: int
111-
) -> List[Suggestion]:
68+
) -> Sequence[Suggestion]:
11269
n_items: int = X.shape[1]
11370
if (1e6 * memory_budget) < (4 * 2 * n_items**2):
11471
raise LowMemoryError(
@@ -118,7 +75,7 @@ def tune_range_given_memory_budget(
11875

11976

12077
class EDLAEOptimizer(BaseOptimizer):
121-
default_tune_range: List[Suggestion] = [
78+
default_tune_range: Sequence[Suggestion] = [
12279
LogUniformSuggestion("reg", 1, 1e4),
12380
UniformSuggestion("dropout_p", 0.0, 0.99),
12481
]
@@ -127,7 +84,7 @@ class EDLAEOptimizer(BaseOptimizer):
12784
@classmethod
12885
def tune_range_given_memory_budget(
12986
cls, X: InteractionMatrix, memory_budget: int
130-
) -> List[Suggestion]:
87+
) -> Sequence[Suggestion]:
13188
n_items: int = X.shape[1]
13289
if (1e6 * memory_budget) < (4 * 2 * n_items**2):
13390
raise LowMemoryError(
@@ -146,7 +103,7 @@ class TruncatedSVDOptimizer(BaseOptimizer):
146103
@classmethod
147104
def tune_range_given_memory_budget(
148105
cls, X: InteractionMatrix, memory_budget: int
149-
) -> List[Suggestion]:
106+
) -> Sequence[Suggestion]:
150107
n_components = _get_maximal_n_components_for_budget(X, memory_budget, 512)
151108
return [
152109
IntegerSuggestion("n_components", 4, n_components),
@@ -163,7 +120,7 @@ class NMFOptimizer(BaseOptimizer):
163120
@classmethod
164121
def tune_range_given_memory_budget(
165122
cls, X: InteractionMatrix, memory_budget: int
166-
) -> List[Suggestion]:
123+
) -> Sequence[Suggestion]:
167124
n_components = _get_maximal_n_components_for_budget(X, memory_budget, 512)
168125
return [
169126
IntegerSuggestion("n_components", 4, n_components),
@@ -177,7 +134,7 @@ class SimilarityBasedOptimizerBase(BaseOptimizer):
177134
@classmethod
178135
def tune_range_given_memory_budget(
179136
cls, X: InteractionMatrix, memory_budget: int
180-
) -> List[Suggestion]:
137+
) -> Sequence[Suggestion]:
181138
top_k_max = min(int(1e6 * memory_budget / 4 // (X.shape[1] + 1)), 1024)
182139
if top_k_max <= 4:
183140
raise LowMemoryError(
@@ -249,7 +206,7 @@ class UserSimilarityBasedOptimizerBase(BaseOptimizer):
249206
@classmethod
250207
def tune_range_given_memory_budget(
251208
cls, X: InteractionMatrix, memory_budget: int
252-
) -> List[Suggestion]:
209+
) -> Sequence[Suggestion]:
253210
top_k_max = min(int(1e6 * memory_budget / 4 // (X.shape[0] + 1)), 1024)
254211
return [
255212
IntegerSuggestion("top_k", 4, top_k_max),
@@ -287,7 +244,7 @@ class BPRFMOptimizer(BaseOptimizerWithEarlyStopping):
287244
@classmethod
288245
def tune_range_given_memory_budget(
289246
cls, X: InteractionMatrix, memory_budget: int
290-
) -> List[Suggestion]:
247+
) -> Sequence[Suggestion]:
291248
# memory usage will be roughly 4 (float) * (n_users + n_items) * k
292249
n_components = _get_maximal_n_components_for_budget(X, memory_budget, 300)
293250
return [
@@ -312,7 +269,7 @@ class MultVAEOptimizer(BaseOptimizerWithEarlyStopping):
312269
@classmethod
313270
def tune_range_given_memory_budget(
314271
cls, X: InteractionMatrix, memory_budget: int
315-
) -> List[Suggestion]:
272+
) -> Sequence[Suggestion]:
316273
if memory_budget * 1e6 > (X.shape[1] * 2048 * 8):
317274
raise LowMemoryError(
318275
f"Memory budget {memory_budget} too small for MultVAE to work."

irspack/optimizers/autopilot.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from multiprocessing import Pipe as mp_pipe
77
from multiprocessing import Process
88
from pathlib import Path
9-
from typing import Any, Callable, Dict, List, Optional, Tuple, Type
9+
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Type
1010
from uuid import uuid1
1111

1212
import numpy as np
@@ -37,7 +37,7 @@ def search_one(
3737
X: InteractionMatrix,
3838
evaluator: Evaluator,
3939
optimizer_names: List[str],
40-
suggest_overwrites: Dict[str, List[Suggestion]],
40+
suggest_overwrites: Dict[str, Sequence[Suggestion]],
4141
db_url: str,
4242
study_name: str,
4343
random_seed: int,
@@ -72,7 +72,7 @@ def __init__(
7272
X: InteractionMatrix,
7373
evaluator: Evaluator,
7474
optimizer_names: List[str],
75-
suggest_overwrites: Dict[str, List[Suggestion]],
75+
suggest_overwrites: Dict[str, Sequence[Suggestion]],
7676
db_url: str,
7777
study_name: str,
7878
random_seed: int,
@@ -111,7 +111,7 @@ def __init__(
111111
X: InteractionMatrix,
112112
evaluator: Evaluator,
113113
optimizer_names: List[str],
114-
suggest_overwrites: Dict[str, List[Suggestion]],
114+
suggest_overwrites: Dict[str, Sequence[Suggestion]],
115115
db_url: str,
116116
study_name: str,
117117
random_seed: int,
@@ -156,7 +156,7 @@ def __init__(
156156
X: InteractionMatrix,
157157
evaluator: Evaluator,
158158
optimizer_names: List[str],
159-
suggest_overwrites: Dict[str, List[Suggestion]],
159+
suggest_overwrites: Dict[str, Sequence[Suggestion]],
160160
db_url: str,
161161
study_name: str,
162162
random_seed: int,
@@ -275,7 +275,7 @@ def autopilot(
275275
if storage is not None and study_name is None:
276276
raise ValueError('"study_name" must be specified if "storage" is given.')
277277
RNS = np.random.RandomState(random_seed)
278-
suggest_overwrites: Dict[str, List[Suggestion]] = {}
278+
suggest_overwrites: Dict[str, Sequence[Suggestion]] = {}
279279
optimizer_names: List[str] = []
280280
for rec_name in algorithms:
281281
optimizer_class_name = rec_name + "Optimizer"

0 commit comments

Comments
 (0)