Skip to content

Commit 33a9877

Browse files
tohtskytohtsky
and
tohtsky
authored
Improve id mapping (#87)
* Use typervar * allowed_item_ids and per_user_item_ids * Allowed_item_ids can be set uniformly * pre-commit should run on every push * improve test * Run isort Co-authored-by: tohtsky <you@example.com>
1 parent 2b3328e commit 33a9877

File tree

4 files changed

+125
-47
lines changed

4 files changed

+125
-47
lines changed

.github/workflows/pre-commit.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ name: pre-commit
22
on:
33
pull_request:
44
push:
5-
branches: [master]
65
jobs:
76
pre-commit:
87
runs-on: ubuntu-latest

cpp_source/util.hpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ retrieve_recommend_from_score(
433433
check_arg(n_threads > 0, "n_threads must not be 0.");
434434
check_arg(
435435
(score.rows() == static_cast<int64_t>(allowed_indices.size())) ||
436-
allowed_indices.empty(),
436+
(allowed_indices.size() == 1u) || allowed_indices.empty(),
437437
"allowed_indices, if not empty, must have a size equal to X.rows()");
438438
std::vector<std::vector<score_and_index>> result(score.rows());
439439
std::vector<std::future<void>> workers;
@@ -458,7 +458,16 @@ retrieve_recommend_from_score(
458458

459459
index_holder.clear();
460460
if (!allowed_indices.empty()) {
461-
for (auto item_index : allowed_indices.at(current)) {
461+
std::vector<int64_t>::const_iterator begin, end;
462+
if (allowed_indices.size() == 1u) {
463+
begin = allowed_indices[0].cbegin();
464+
end = allowed_indices[0].cend();
465+
} else {
466+
begin = allowed_indices.at(current).cbegin();
467+
end = allowed_indices.at(current).cend();
468+
}
469+
for (; begin != end; begin++) {
470+
auto item_index = *begin;
462471
if ((item_index < n_items) && (item_index >= 0)) {
463472
index_holder.emplace_back(item_index, score_ptr[item_index]);
464473
}

irspack/utils/id_mapping.py

Lines changed: 97 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
from typing import (
22
TYPE_CHECKING,
3-
Any,
43
Dict,
4+
Generic,
55
Iterable,
66
List,
77
Optional,
88
Sequence,
99
Tuple,
10+
TypeVar,
1011
Union,
1112
)
1213

@@ -43,7 +44,11 @@ def retrieve_recommend_from_score(
4344
raise ValueError("Only float32 or float64 are allowed.")
4445

4546

46-
class IDMappedRecommender:
47+
UserIdType = TypeVar("UserIdType")
48+
ItemIdType = TypeVar("ItemIdType")
49+
50+
51+
class IDMappedRecommender(Generic[UserIdType, ItemIdType]):
4752
"""A utility class that helps mapping user/item ids to index, retrieving recommendation score,
4853
and making a recommendation.
4954
@@ -63,7 +68,10 @@ class IDMappedRecommender:
6368
"""
6469

6570
def __init__(
66-
self, recommender: "BaseRecommender", user_ids: List[Any], item_ids: List[Any]
71+
self,
72+
recommender: "BaseRecommender",
73+
user_ids: List[UserIdType],
74+
item_ids: List[ItemIdType],
6775
):
6876

6977
if (recommender.n_users != len(user_ids)) or (
@@ -79,11 +87,11 @@ def __init__(
7987
self.user_id_to_index = {user_id: i for i, user_id in enumerate(user_ids)}
8088
self.item_id_to_index = {item_id: i for i, item_id in enumerate(item_ids)}
8189

82-
def _item_id_list_to_index_list(self, ids: Iterable[Any]) -> List[int]:
90+
def _item_id_list_to_index_list(self, ids: Iterable[ItemIdType]) -> List[int]:
8391
return [self.item_id_to_index[id] for id in ids if id in self.item_id_to_index]
8492

8593
def _user_profile_to_data_col(
86-
self, profile: Union[List[Any], Dict[Any, float]]
94+
self, profile: Union[List[ItemIdType], Dict[ItemIdType, float]]
8795
) -> Tuple[List[float], List[int]]:
8896
data: List[float]
8997
cols: List[int]
@@ -101,7 +109,7 @@ def _user_profile_to_data_col(
101109
return data, cols
102110

103111
def _list_of_user_profile_to_matrix(
104-
self, users_info: Sequence[Union[List[Any], Dict[Any, float]]]
112+
self, users_info: Sequence[Union[List[ItemIdType], Dict[ItemIdType, float]]]
105113
) -> sps.csr_matrix:
106114
data: List[float] = []
107115
indptr: List[int] = [0]
@@ -120,11 +128,11 @@ def _list_of_user_profile_to_matrix(
120128

121129
def get_recommendation_for_known_user_id(
122130
self,
123-
user_id: Any,
131+
user_id: UserIdType,
124132
cutoff: int = 20,
125-
allowed_item_ids: Optional[List[Any]] = None,
126-
forbidden_item_ids: Optional[List[Any]] = None,
127-
) -> List[Tuple[Any, float]]:
133+
allowed_item_ids: Optional[List[ItemIdType]] = None,
134+
forbidden_item_ids: Optional[List[ItemIdType]] = None,
135+
) -> List[Tuple[ItemIdType, float]]:
128136
"""Retrieve recommendation result for a known user.
129137
Args:
130138
user_id:
@@ -151,7 +159,7 @@ def get_recommendation_for_known_user_id(
151159
)
152160

153161
score = self.recommender.get_score_remove_seen(user_index)[0, :]
154-
return self._score_to_recommended_items(
162+
return self.score_to_recommended_items(
155163
score,
156164
cutoff=cutoff,
157165
allowed_item_ids=allowed_item_ids,
@@ -160,12 +168,13 @@ def get_recommendation_for_known_user_id(
160168

161169
def get_recommendation_for_known_user_batch(
162170
self,
163-
user_ids: List[Any],
171+
user_ids: List[UserIdType],
164172
cutoff: int = 20,
165-
allowed_item_ids: Optional[List[List[Any]]] = None,
166-
forbidden_item_ids: Optional[List[List[Any]]] = None,
173+
allowed_item_ids: Optional[List[ItemIdType]] = None,
174+
per_user_allowed_item_ids: Optional[List[List[ItemIdType]]] = None,
175+
forbidden_item_ids: Optional[List[List[ItemIdType]]] = None,
167176
n_threads: Optional[int] = None,
168-
) -> List[List[Tuple[Any, float]]]:
177+
) -> List[List[Tuple[ItemIdType, float]]]:
169178
"""Retrieve recommendation result for a list of known users.
170179
171180
Args:
@@ -174,13 +183,21 @@ def get_recommendation_for_known_user_batch(
174183
cutoff:
175184
Maximal number of recommendations allowed.
176185
allowed_item_ids:
186+
If not ``None``, defines "a list of recommendable item IDs".
187+
Ignored if `per_user_allowed_item_ids` is set.
188+
per_user_allowed_item_ids:
177189
If not ``None``, defines "a list of list of recommendable item IDs"
178-
and ``len(allowed_item_ids)`` must be equal to ``len(item_ids)``.
190+
and ``len(allowed_item_ids)`` must be equal to ``score.shape[0]``.
179191
Defaults to ``None``.
192+
180193
forbidden_item_ids:
181194
If not ``None``, defines "a list of list of forbidden item IDs"
182195
and ``len(allowed_item_ids)`` must be equal to ``len(item_ids)``
183196
Defaults to ``None``.
197+
n_threads:
198+
Specifies the number of threads to use for the computation.
199+
If ``None``, the environment variable ``"IRSPACK_NUM_THREADS_DEFAULT"`` will be looked up,
200+
and if the variable is not set, it will be set to ``os.cpu_count()``. Defaults to None.
184201
185202
Returns:
186203
A list of list of tuples consisting of ``(item_id, score)``.
@@ -191,21 +208,22 @@ def get_recommendation_for_known_user_batch(
191208
)
192209

193210
score = self.recommender.get_score_remove_seen(user_indexes)
194-
return self._score_to_recommended_items_batch(
211+
return self.score_to_recommended_items_batch(
195212
score,
196213
cutoff=cutoff,
197214
allowed_item_ids=allowed_item_ids,
215+
per_user_allowed_item_ids=per_user_allowed_item_ids,
198216
forbidden_item_ids=forbidden_item_ids,
199217
n_threads=get_n_threads(n_threads=n_threads),
200218
)
201219

202220
def get_recommendation_for_new_user(
203221
self,
204-
user_profile: Union[List[Any], Dict[Any, float]],
222+
user_profile: Union[List[ItemIdType], Dict[ItemIdType, float]],
205223
cutoff: int = 20,
206-
allowed_item_ids: Optional[List[Any]] = None,
207-
forbidden_item_ids: Optional[List[Any]] = None,
208-
) -> List[Tuple[Any, float]]:
224+
allowed_item_ids: Optional[List[ItemIdType]] = None,
225+
forbidden_item_ids: Optional[List[ItemIdType]] = None,
226+
) -> List[Tuple[ItemIdType, float]]:
209227
"""Retrieve recommendation result for a previously unseen user using item ids with which he or she interacted.
210228
211229
Args:
@@ -229,7 +247,7 @@ def get_recommendation_for_new_user(
229247
(data, cols, [0, len(cols)]), shape=(1, len(self.item_ids))
230248
)
231249
score = self.recommender.get_score_cold_user_remove_seen(X_input)[0]
232-
return self._score_to_recommended_items(
250+
return self.score_to_recommended_items(
233251
score,
234252
cutoff,
235253
allowed_item_ids=allowed_item_ids,
@@ -238,12 +256,13 @@ def get_recommendation_for_new_user(
238256

239257
def get_recommendation_for_new_user_batch(
240258
self,
241-
user_profiles: Sequence[Union[List[Any], Dict[Any, float]]],
259+
user_profiles: Sequence[Union[List[ItemIdType], Dict[ItemIdType, float]]],
242260
cutoff: int = 20,
243-
allowed_item_ids: Optional[List[List[Any]]] = None,
244-
forbidden_item_ids: Optional[List[List[Any]]] = None,
261+
allowed_item_ids: Optional[List[ItemIdType]] = None,
262+
per_user_allowed_item_ids: Optional[List[List[ItemIdType]]] = None,
263+
forbidden_item_ids: Optional[List[List[ItemIdType]]] = None,
245264
n_threads: Optional[int] = None,
246-
) -> List[List[Tuple[Any, float]]]:
265+
) -> List[List[Tuple[ItemIdType, float]]]:
247266
"""Retrieve recommendation result for a previously unseen users using item ids with which they have interacted.
248267
249268
Args:
@@ -254,35 +273,43 @@ def get_recommendation_for_new_user_batch(
254273
cutoff:
255274
Maximal number of recommendations allowed.
256275
allowed_item_ids:
276+
If not ``None``, defines "a list of recommendable item IDs".
277+
Ignored if `per_user_allowed_item_ids` is set.
278+
per_user_allowed_item_ids:
257279
If not ``None``, defines "a list of list of recommendable item IDs"
258-
and ``len(allowed_item_ids)`` must be equal to ``len(item_ids)``.
280+
and ``len(allowed_item_ids)`` must be equal to ``score.shape[0]``.
259281
Defaults to ``None``.
260282
forbidden_item_ids:
261283
If not ``None``, defines "a list of list of forbidden item IDs"
262284
and ``len(allowed_item_ids)`` must be equal to ``len(item_ids)``
263285
Defaults to ``None``.
286+
n_threads:
287+
Specifies the number of threads to use for the computation.
288+
If ``None``, the environment variable ``"IRSPACK_NUM_THREADS_DEFAULT"`` will be looked up,
289+
and if the variable is not set, it will be set to ``os.cpu_count()``. Defaults to None.
264290
265291
Returns:
266292
A list of list of tuples consisting of ``(item_id, score)``.
267293
Each internal list corresponds to the recommender's recommendation output.
268294
"""
269295
X_input = self._list_of_user_profile_to_matrix(user_profiles)
270296
score = self.recommender.get_score_cold_user_remove_seen(X_input)
271-
return self._score_to_recommended_items_batch(
297+
return self.score_to_recommended_items_batch(
272298
score,
273299
cutoff,
274300
allowed_item_ids=allowed_item_ids,
301+
per_user_allowed_item_ids=per_user_allowed_item_ids,
275302
forbidden_item_ids=forbidden_item_ids,
276303
n_threads=get_n_threads(n_threads=n_threads),
277304
)
278305

279-
def _score_to_recommended_items(
306+
def score_to_recommended_items(
280307
self,
281308
score: DenseScoreArray,
282309
cutoff: int,
283-
allowed_item_ids: Optional[List[Any]] = None,
284-
forbidden_item_ids: Optional[List[Any]] = None,
285-
) -> List[Tuple[Any, float]]:
310+
allowed_item_ids: Optional[List[ItemIdType]] = None,
311+
forbidden_item_ids: Optional[List[ItemIdType]] = None,
312+
) -> List[Tuple[ItemIdType, float]]:
286313
if allowed_item_ids is not None:
287314
allowed_item_indices = np.asarray(
288315
self._item_id_list_to_index_list(allowed_item_ids), dtype=np.int64
@@ -292,7 +319,7 @@ def _score_to_recommended_items(
292319
]
293320
else:
294321
high_score_inds = score.argsort()[::-1]
295-
recommendations: List[Tuple[Any, float]] = []
322+
recommendations: List[Tuple[ItemIdType, float]] = []
296323
for i in high_score_inds:
297324
i_int = int(i)
298325
score_this = score[i_int]
@@ -307,24 +334,51 @@ def _score_to_recommended_items(
307334
break
308335
return recommendations
309336

310-
def _score_to_recommended_items_batch(
337+
def score_to_recommended_items_batch(
311338
self,
312339
score: DenseScoreArray,
313340
cutoff: int,
314-
allowed_item_ids: Optional[List[List[Any]]] = None,
315-
forbidden_item_ids: Optional[List[List[Any]]] = None,
316-
n_threads: int = 1,
317-
) -> List[List[Tuple[Any, float]]]:
341+
allowed_item_ids: Optional[List[ItemIdType]] = None,
342+
per_user_allowed_item_ids: Optional[List[List[ItemIdType]]] = None,
343+
forbidden_item_ids: Optional[List[List[ItemIdType]]] = None,
344+
n_threads: Optional[int] = None,
345+
) -> List[List[Tuple[ItemIdType, float]]]:
346+
r"""Retrieve recommendation from score array.
347+
Args:
348+
score:
349+
1d numpy ndarray for score.
350+
cutoff:
351+
Maximal number of recommendations allowed.
352+
allowed_item_ids:
353+
If not ``None``, defines "a list of recommendable item IDs".
354+
Ignored if `per_user_allowed_item_ids` is set.
355+
per_user_allowed_item_ids:
356+
If not ``None``, defines "a list of list of recommendable item IDs"
357+
and ``len(allowed_item_ids)`` must be equal to ``score.shape[0]``.
358+
Defaults to ``None``.
359+
allowed_item_ids:
360+
If not ``None``, defines "a list of list of recommendable item IDs"
361+
and ``len(allowed_item_ids)`` must be equal to ``len(item_ids)``.
362+
Defaults to ``None``.
363+
forbidden_item_ids:
364+
If not ``None``, defines "a list of list of forbidden item IDs"
365+
and ``len(allowed_item_ids)`` must be equal to ``len(item_ids)``
366+
Defaults to ``None``.
367+
368+
"""
369+
318370
if forbidden_item_ids is not None:
319371
assert len(forbidden_item_ids) == score.shape[0]
320-
if allowed_item_ids is not None:
321-
assert len(allowed_item_ids) == score.shape[0]
372+
if per_user_allowed_item_ids is not None:
373+
assert len(per_user_allowed_item_ids) == score.shape[0]
322374

323375
allowed_item_indices: List[List[int]] = []
324-
if allowed_item_ids is not None:
376+
if per_user_allowed_item_ids is not None:
325377
allowed_item_indices = [
326-
self._item_id_list_to_index_list(_) for _ in allowed_item_ids
378+
self._item_id_list_to_index_list(_) for _ in per_user_allowed_item_ids
327379
]
380+
elif allowed_item_ids is not None:
381+
allowed_item_indices = [self._item_id_list_to_index_list(allowed_item_ids)]
328382
if forbidden_item_ids is not None:
329383
for u, forbidden_ids_per_user in enumerate(forbidden_item_ids):
330384
score[
@@ -335,7 +389,7 @@ def _score_to_recommended_items_batch(
335389
score,
336390
allowed_item_indices,
337391
cutoff,
338-
n_threads=n_threads,
392+
n_threads=get_n_threads(n_threads),
339393
)
340394
return [
341395
[

tests/utils/test_id_mapper.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ def test_basic_usecase(dtype: str) -> None:
220220
nonzero_batch,
221221
cutoff=n_items,
222222
forbidden_item_ids=forbidden_items_batch,
223-
allowed_item_ids=allowed_items_batch,
223+
per_user_allowed_item_ids=allowed_items_batch,
224224
n_threads=1,
225225
)
226226
assert len(batch_result_masked_restricted) == n_users
@@ -256,3 +256,19 @@ def test_basic_usecase(dtype: str) -> None:
256256
softmax_denom = X.shape[1] - nnz + np.exp(2) * nnz
257257
for _, score in result:
258258
assert score == pytest.approx(1 / softmax_denom)
259+
260+
allowed_items_uniform = [str(x) for x in RNS.choice(item_ids, size=2)]
261+
batch_result_masked_uniform_allowed_ids = (
262+
mapped_rec.get_recommendation_for_new_user_batch(
263+
nonzero_batch,
264+
cutoff=n_items,
265+
allowed_item_ids=allowed_items_uniform,
266+
n_threads=1,
267+
)
268+
)
269+
cnt = 0
270+
for x in batch_result_masked_uniform_allowed_ids:
271+
for rec_id, score in x:
272+
assert rec_id in allowed_items_uniform
273+
cnt += 1
274+
assert cnt > 0

0 commit comments

Comments
 (0)