Skip to content

Commit 4650790

Browse files
alda-optimizelyrashidsp
authored andcommitted
IP Anonymiziation & Bucketing IDs (#67, #68)
1 parent 9f5a688 commit 4650790

File tree

9 files changed

+282
-67
lines changed

9 files changed

+282
-67
lines changed

lib/optimizely/bucketer.rb

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ module Optimizely
2020
class Bucketer
2121
# Optimizely bucketing algorithm that evenly distributes visitors.
2222

23-
BUCKETING_ID_TEMPLATE = '%{user_id}%{entity_id}'
23+
BUCKETING_ID_TEMPLATE = '%{bucketing_id}%{entity_id}'
2424
HASH_SEED = 1
2525
MAX_HASH_VALUE = 2**32
2626
MAX_TRAFFIC_VALUE = 10_000
@@ -35,13 +35,15 @@ def initialize(config)
3535
@config = config
3636
end
3737

38-
def bucket(experiment, user_id)
38+
def bucket(experiment, bucketing_id, user_id)
3939
# Determines ID of variation to be shown for a given experiment key and user ID.
4040
#
4141
# experiment - Experiment for which visitor is to be bucketed.
42+
# bucketing_id - String A customer-assigned value used to generate the bucketing key
4243
# user_id - String ID for user.
4344
#
4445
# Returns variation in which visitor with ID user_id has been placed. Nil if no variation.
46+
return nil if experiment.nil?
4547

4648
# check if experiment is in a group; if so, check if user is bucketed into specified experiment
4749
experiment_id = experiment['id']
@@ -51,7 +53,7 @@ def bucket(experiment, user_id)
5153
group = @config.group_key_map.fetch(group_id)
5254
if Helpers::Group.random_policy?(group)
5355
traffic_allocations = group.fetch('trafficAllocation')
54-
bucketed_experiment_id = find_bucket(user_id, group_id, traffic_allocations)
56+
bucketed_experiment_id = find_bucket(bucketing_id, user_id, group_id, traffic_allocations)
5557
# return if the user is not bucketed into any experiment
5658
unless bucketed_experiment_id
5759
@config.logger.log(Logger::INFO, "User '#{user_id}' is in no experiment.")
@@ -76,7 +78,7 @@ def bucket(experiment, user_id)
7678
end
7779

7880
traffic_allocations = experiment['trafficAllocation']
79-
variation_id = find_bucket(user_id, experiment_id, traffic_allocations)
81+
variation_id = find_bucket(bucketing_id, user_id, experiment_id, traffic_allocations)
8082
if variation_id && variation_id != ''
8183
variation = @config.get_variation_from_id(experiment_key, variation_id)
8284
variation_key = variation ? variation['key'] : nil
@@ -96,18 +98,18 @@ def bucket(experiment, user_id)
9698
nil
9799
end
98100

99-
def find_bucket(user_id, parent_id, traffic_allocations)
101+
def find_bucket(bucketing_id, user_id, parent_id, traffic_allocations)
100102
# Helper function to find the matching entity ID for a given bucketing value in a list of traffic allocations.
101103
#
104+
# bucketing_id - String A customer-assigned value user to generate bucketing key
102105
# user_id - String ID for user
103106
# parent_id - String entity ID to use for bucketing ID
104107
# traffic_allocations - Array of traffic allocations
105108
#
106109
# Returns entity ID corresponding to the provided bucket value or nil if no match is found.
107-
108-
bucketing_id = sprintf(BUCKETING_ID_TEMPLATE, user_id: user_id, entity_id: parent_id)
109-
bucket_value = generate_bucket_value(bucketing_id)
110-
@config.logger.log(Logger::DEBUG, "Assigned bucket #{bucket_value} to user '#{user_id}'.")
110+
bucketing_key = sprintf(BUCKETING_ID_TEMPLATE, bucketing_id: bucketing_id, entity_id: parent_id)
111+
bucket_value = generate_bucket_value(bucketing_key)
112+
@config.logger.log(Logger::DEBUG, "Assigned bucket #{bucket_value} to user '#{user_id}' with bucketing ID: '#{bucketing_id}'.")
111113

112114
traffic_allocations.each do |traffic_allocation|
113115
current_end_of_range = traffic_allocation['endOfRange']
@@ -122,25 +124,25 @@ def find_bucket(user_id, parent_id, traffic_allocations)
122124

123125
private
124126

125-
def generate_bucket_value(bucketing_id)
127+
def generate_bucket_value(bucketing_key)
126128
# Helper function to generate bucket value in half-closed interval [0, MAX_TRAFFIC_VALUE).
127129
#
128-
# bucketing_id - String ID for bucketing.
130+
# bucketing_key - String - Value used to generate bucket value
129131
#
130-
# Returns bucket value corresponding to the provided bucketing ID.
132+
# Returns bucket value corresponding to the provided bucketing key.
131133

132-
ratio = (generate_unsigned_hash_code_32_bit(bucketing_id)).to_f / MAX_HASH_VALUE
134+
ratio = (generate_unsigned_hash_code_32_bit(bucketing_key)).to_f / MAX_HASH_VALUE
133135
(ratio * MAX_TRAFFIC_VALUE).to_i
134136
end
135137

136-
def generate_unsigned_hash_code_32_bit(bucketing_id)
138+
def generate_unsigned_hash_code_32_bit(bucketing_key)
137139
# Helper function to retreive hash code
138140
#
139-
# bucketing_id - String ID for bucketing.
141+
# bucketing_key - String - Value used for the key of the murmur hash
140142
#
141143
# Returns hash code which is a 32 bit unsigned integer.
142144

143-
MurmurHash3::V32.str_hash(bucketing_id, @bucket_seed) & UNSIGNED_MAX_32_BIT_VALUE
145+
MurmurHash3::V32.str_hash(bucketing_key, @bucket_seed) & UNSIGNED_MAX_32_BIT_VALUE
144146
end
145147
end
146148
end

lib/optimizely/decision_service.rb

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
require_relative './bucketer'
1717

1818
module Optimizely
19+
20+
RESERVED_ATTRIBUTE_KEY_BUCKETING_ID = "\$opt_bucketing_id".freeze
21+
1922
class DecisionService
2023
# Optimizely's decision service that determines into which variation of an experiment a user will be allocated.
2124
#
@@ -47,6 +50,17 @@ def get_variation(experiment_key, user_id, attributes = nil)
4750
#
4851
# Returns variation ID where visitor will be bucketed (nil if experiment is inactive or user does not meet audience conditions)
4952

53+
# By default, the bucketing ID should be the user ID
54+
bucketing_id = user_id;
55+
56+
# If the bucketing ID key is defined in attributes, then use that in place of the userID
57+
if attributes and attributes[RESERVED_ATTRIBUTE_KEY_BUCKETING_ID].is_a? String
58+
unless attributes[RESERVED_ATTRIBUTE_KEY_BUCKETING_ID].empty?
59+
bucketing_id = attributes[RESERVED_ATTRIBUTE_KEY_BUCKETING_ID]
60+
@config.logger.log(Logger::DEBUG, "Setting the bucketing ID '#{bucketing_id}'")
61+
end
62+
end
63+
5064
# Check to make sure experiment is active
5165
experiment = @config.get_experiment_from_key(experiment_key)
5266
if experiment.nil?
@@ -88,7 +102,7 @@ def get_variation(experiment_key, user_id, attributes = nil)
88102
end
89103

90104
# Bucket normally
91-
variation = @bucketer.bucket(experiment, user_id)
105+
variation = @bucketer.bucket(experiment, bucketing_id, user_id)
92106
variation_id = variation ? variation['id'] : nil
93107

94108
# Persist bucketing decision

lib/optimizely/event_builder.rb

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
require 'securerandom'
2121

2222
module Optimizely
23+
24+
RESERVED_ATTRIBUTE_KEY_BUCKETING_ID_EVENT_PARAM_KEY = "optimizely_bucketing_id".freeze
25+
2326
class Event
2427
# Representation of an event which can be sent to the Optimizely logging endpoint.
2528

@@ -69,22 +72,33 @@ def get_common_params(user_id, attributes)
6972
attribute_value = attributes[attribute_key]
7073
next if attribute_value.nil?
7174

72-
# Skip attributes not in the datafile
73-
attribute_id = @config.get_attribute_id(attribute_key)
74-
next unless attribute_id
75-
76-
feature = {
77-
entity_id: attribute_id,
78-
key: attribute_key,
79-
type: CUSTOM_ATTRIBUTE_FEATURE_TYPE,
80-
value: attribute_value
81-
}
75+
if attribute_key.eql? RESERVED_ATTRIBUTE_KEY_BUCKETING_ID
76+
# TODO (Copied from PHP-SDK) (Alda): the type for bucketing ID attribute may change so
77+
# that custom attributes are not overloaded
78+
feature = {
79+
entity_id: RESERVED_ATTRIBUTE_KEY_BUCKETING_ID,
80+
key: RESERVED_ATTRIBUTE_KEY_BUCKETING_ID_EVENT_PARAM_KEY,
81+
type: CUSTOM_ATTRIBUTE_FEATURE_TYPE,
82+
value: attribute_value
83+
}
84+
else
85+
# Skip attributes not in the datafile
86+
attribute_id = @config.get_attribute_id(attribute_key)
87+
next unless attribute_id
88+
89+
feature = {
90+
entity_id: attribute_id,
91+
key: attribute_key,
92+
type: CUSTOM_ATTRIBUTE_FEATURE_TYPE,
93+
value: attribute_value
94+
}
8295

83-
visitor_attributes.push(feature)
8496
end
85-
end
97+
visitor_attributes.push(feature)
98+
end
99+
end
86100

87-
common_params = {
101+
common_params = {
88102
account_id: @config.account_id,
89103
project_id: @config.project_id,
90104
visitors: [
@@ -93,11 +107,12 @@ def get_common_params(user_id, attributes)
93107
snapshots: [],
94108
visitor_id: user_id
95109
}
96-
],
97-
revision: @config.revision,
98-
client_name: CLIENT_ENGINE,
99-
client_version: VERSION
100-
}
110+
],
111+
anonymize_ip: @config.anonymize_ip,
112+
revision: @config.revision,
113+
client_name: CLIENT_ENGINE,
114+
client_version: VERSION
115+
}
101116

102117
common_params
103118
end

lib/optimizely/project_config.rb

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ class ProjectConfig
3838
attr_reader :groups
3939
attr_reader :parsing_succeeded
4040
attr_reader :project_id
41+
# Boolean - denotes if Optimizely should remove the last block of visitors' IP address before storing event data
42+
attr_reader :anonymize_ip
4143
attr_reader :revision
4244
attr_reader :rollouts
4345
attr_reader :version
@@ -86,6 +88,7 @@ def initialize(datafile, logger, error_handler)
8688
@feature_flags = config.fetch('featureFlags', [])
8789
@groups = config.fetch('groups', [])
8890
@project_id = config['projectId']
91+
@anonymize_ip = (config.has_key? 'anonymizeIP')? config['anonymizeIP'] :false
8992
@revision = config['revision']
9093
@rollouts = config.fetch('rollouts', [])
9194

0 commit comments

Comments
 (0)