Skip to content

Commit 03e3d80

Browse files
committed
Merge pull request #90 from ajjahn/optimize
Scaling/Performance with a large number of raters
2 parents ae4b00a + 4df3be8 commit 03e3d80

File tree

1 file changed

+54
-25
lines changed

1 file changed

+54
-25
lines changed

lib/recommendable/helpers/calculations.rb

Lines changed: 54 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,29 @@ def similarity_between(user_id, other_user_id)
2222
disliked_set = Recommendable::Helpers::RedisKeyMapper.disliked_set_for(klass, user_id)
2323
other_disliked_set = Recommendable::Helpers::RedisKeyMapper.disliked_set_for(klass, other_user_id)
2424

25+
results = Recommendable.redis.pipelined do
26+
# Agreements
27+
Recommendable.redis.sinter(liked_set, other_liked_set)
28+
Recommendable.redis.sinter(disliked_set, other_disliked_set)
29+
30+
# Disagreements
31+
Recommendable.redis.sinter(liked_set, other_disliked_set)
32+
Recommendable.redis.sinter(disliked_set, other_liked_set)
33+
34+
Recommendable.redis.scard(liked_set)
35+
Recommendable.redis.scard(disliked_set)
36+
end
37+
2538
# Agreements
26-
similarity += Recommendable.redis.sinter(liked_set, other_liked_set).size
27-
similarity += Recommendable.redis.sinter(disliked_set, other_disliked_set).size
39+
similarity += results[0].size
40+
similarity += results[1].size
2841

2942
# Disagreements
30-
similarity -= Recommendable.redis.sinter(liked_set, other_disliked_set).size
31-
similarity -= Recommendable.redis.sinter(disliked_set, other_liked_set).size
43+
similarity -= results[2].size
44+
similarity -= results[3].size
3245

33-
liked_count += Recommendable.redis.scard(liked_set)
34-
disliked_count += Recommendable.redis.scard(disliked_set)
46+
liked_count += results[4]
47+
disliked_count += results[5]
3548
end
3649

3750
similarity / (liked_count + disliked_count).to_f
@@ -65,9 +78,12 @@ def update_similarities_for(user_id)
6578
end
6679
end
6780

68-
relevant_user_ids.each do |id|
69-
next if id == user_id # Skip comparing with self.
70-
Recommendable.redis.zadd(similarity_set, similarity_between(user_id, id), id)
81+
similarity_values = relevant_user_ids.map { |id| similarity_between(user_id, id) }
82+
Recommendable.redis.pipelined do
83+
relevant_user_ids.zip(similarity_values).each do |id, similarity_value|
84+
next if id == user_id # Skip comparing with self.
85+
Recommendable.redis.zadd(similarity_set, similarity_value, id)
86+
end
7187
end
7288

7389
if knn = Recommendable.config.nearest_neighbors
@@ -98,8 +114,10 @@ def update_recommendations_for(user_id)
98114
temp_set = Recommendable::Helpers::RedisKeyMapper.temp_set_for(Recommendable.config.user_class, user_id)
99115
similarity_set = Recommendable::Helpers::RedisKeyMapper.similarity_set_for(user_id)
100116
recommended_set = Recommendable::Helpers::RedisKeyMapper.recommended_set_for(klass, user_id)
101-
most_similar_user_ids = Recommendable.redis.zrevrange(similarity_set, 0, nearest_neighbors - 1)
102-
least_similar_user_ids = Recommendable.redis.zrange(similarity_set, 0, nearest_neighbors - 1)
117+
most_similar_user_ids, least_similar_user_ids = Recommendable.redis.pipelined do
118+
Recommendable.redis.zrevrange(similarity_set, 0, nearest_neighbors - 1)
119+
Recommendable.redis.zrange(similarity_set, 0, nearest_neighbors - 1)
120+
end
103121

104122
# Get likes from the most similar users
105123
sets_to_union = most_similar_user_ids.inject([]) do |sets, id|
@@ -117,8 +135,10 @@ def update_recommendations_for(user_id)
117135
Recommendable.redis.sunionstore(temp_set, *sets_to_union)
118136
item_ids = Recommendable.redis.sdiff(temp_set, *rated_sets)
119137
scores = item_ids.map { |id| [predict_for(user_id, klass, id), id] }
120-
scores.each do |s|
121-
Recommendable.redis.zadd(recommended_set, s[0], s[1])
138+
Recommendable.redis.pipelined do
139+
scores.each do |s|
140+
Recommendable.redis.zadd(recommended_set, s[0], s[1])
141+
end
122142
end
123143

124144
Recommendable.redis.del(temp_set)
@@ -146,31 +166,40 @@ def predict_for(user_id, klass, item_id)
146166
user_id = user_id.to_s
147167
item_id = item_id.to_s
148168

149-
similarity_set = Recommendable::Helpers::RedisKeyMapper.similarity_set_for(user_id)
150169
liked_by_set = Recommendable::Helpers::RedisKeyMapper.liked_by_set_for(klass, item_id)
151170
disliked_by_set = Recommendable::Helpers::RedisKeyMapper.disliked_by_set_for(klass, item_id)
152171
similarity_sum = 0.0
172+
173+
similarity_sum += similarity_total_for(user_id, liked_by_set)
174+
similarity_sum -= similarity_total_for(user_id, disliked_by_set)
153175

154-
similarity_sum += Recommendable.redis.smembers(liked_by_set).inject(0) do |memo, id|
155-
memo += Recommendable.redis.zscore(similarity_set, id).to_f
176+
liked_by_count, disliked_by_count = Recommendable.redis.pipelined do
177+
Recommendable.redis.scard(liked_by_set)
178+
Recommendable.redis.scard(disliked_by_set)
156179
end
157-
158-
similarity_sum += Recommendable.redis.smembers(disliked_by_set).inject(0) do |memo, id|
159-
memo -= Recommendable.redis.zscore(similarity_set, id).to_f
160-
end
161-
162-
liked_by_count = Recommendable.redis.scard(liked_by_set)
163-
disliked_by_count = Recommendable.redis.scard(disliked_by_set)
164180
prediction = similarity_sum / (liked_by_count + disliked_by_count).to_f
165181
prediction.finite? ? prediction : 0.0
166182
end
183+
184+
def similarity_total_for(user_id, set)
185+
similarity_set = Recommendable::Helpers::RedisKeyMapper.similarity_set_for(user_id)
186+
ids = Recommendable.redis.smembers(set)
187+
similarity_values = Recommendable.redis.pipelined do
188+
ids.each do |id|
189+
Recommendable.redis.zscore(similarity_set, id)
190+
end
191+
end
192+
similarity_values.map(&:to_f).reduce(&:+).to_f
193+
end
167194

168195
def update_score_for(klass, id)
169196
score_set = Recommendable::Helpers::RedisKeyMapper.score_set_for(klass)
170197
liked_by_set = Recommendable::Helpers::RedisKeyMapper.liked_by_set_for(klass, id)
171198
disliked_by_set = Recommendable::Helpers::RedisKeyMapper.disliked_by_set_for(klass, id)
172-
liked_by_count = Recommendable.redis.scard(liked_by_set)
173-
disliked_by_count = Recommendable.redis.scard(disliked_by_set)
199+
liked_by_count, disliked_by_count = Recommendable.redis.pipelined do
200+
Recommendable.redis.scard(liked_by_set)
201+
Recommendable.redis.scard(disliked_by_set)
202+
end
174203

175204
return 0.0 unless liked_by_count + disliked_by_count > 0
176205

0 commit comments

Comments
 (0)