15
15
class ClickstreamProvider (BaseProvider ):
16
16
"""
17
17
A Provider for clickstream related test data.
18
+
18
19
>>> from faker import Faker
19
20
>>> from faker_clickstream import ClickstreamProvider
20
21
>>> fake = Faker()
@@ -23,15 +24,39 @@ class ClickstreamProvider(BaseProvider):
23
24
"""
24
25
25
26
def user_agent (self ):
27
+ """
28
+ Generate random user agent.
29
+
30
+ :return: User agent string
31
+ """
26
32
return choice (user_agents )
27
33
28
34
def event (self ):
35
+ """
36
+ Generate random event type name for e-commerce site.
37
+
38
+ :return: Event type string
39
+ """
29
40
return choice (events )
30
41
31
42
def weighted_event (self ):
43
+ """
44
+ Generate a random event object according to popularity weight. Higher popularity increases the
45
+ chances of occurrence.
46
+
47
+ :return: Event object (JSON)
48
+ """
32
49
return random .choices (weighted_events , weights = [e ['popularity' ] for e in weighted_events ], k = 1 )[0 ]
33
50
34
51
def session_clickstream (self , rand_session_max_size : int = 25 ):
52
+ """
53
+ Generate session clickstream events.
54
+
55
+ :param rand_session_max_size: Max number of possible events in session. Defaults to 25.
56
+ :return: List of session events
57
+ """
58
+
59
+ # Initialize static session values
35
60
session_events = []
36
61
user_id = _get_user_id ()
37
62
user_agent = self .user_agent ()
@@ -40,11 +65,17 @@ def session_clickstream(self, rand_session_max_size: int = 25):
40
65
channel_type = _get_channel ()
41
66
random_session_size = randint (1 , rand_session_max_size )
42
67
incremental_delta_delay = randint (1 , 60 )
68
+
69
+ # Keep track of unique values in a session
43
70
unique_session_events = set ()
44
71
product_codes = set ()
72
+
45
73
for s in range (random_session_size ):
74
+ # Mock time delay between events
46
75
incremental_delta_delay = incremental_delta_delay + (s * randint (1 , 60 ))
47
76
event_time = _format_time (_get_event_time (delta = incremental_delta_delay ))
77
+
78
+ # Fetch weighted event
48
79
event = self .weighted_event ()
49
80
50
81
if (event ['name' ] == 'Login' and event ['name' ] in unique_session_events ) \
@@ -85,6 +116,7 @@ def session_clickstream(self, rand_session_max_size: int = 25):
85
116
if 'DecreaseQuantity' in unique_session_events :
86
117
unique_session_events .remove ('DecreaseQuantity' )
87
118
119
+ # Fill metadata object conditionally
88
120
metadata = {}
89
121
if event ['name' ] == 'Search' :
90
122
sample_product = _get_weighted_mobile_phone ()
@@ -106,6 +138,7 @@ def session_clickstream(self, rand_session_max_size: int = 25):
106
138
if event ['name' ] == 'CheckOrderStatus' :
107
139
metadata ['order_id' ] = _get_order_id ()
108
140
141
+ # Construct final event object
109
142
r = {
110
143
"ip" : ip ,
111
144
"user_id" : user_id ,
@@ -121,6 +154,11 @@ def session_clickstream(self, rand_session_max_size: int = 25):
121
154
122
155
123
156
def _get_session_id ():
157
+ """
158
+ Generate session ID
159
+
160
+ :return: Session ID string
161
+ """
124
162
return hashlib .sha256 (
125
163
('%s%s%s' % (
126
164
datetime .now ().strftime ("%d/%m/%Y %H:%M:%S.%f" ),
@@ -131,40 +169,86 @@ def _get_session_id():
131
169
132
170
133
171
def _get_product_code ():
172
+ """
173
+ Generate random product code from range 1 to 999999.
174
+
175
+ :return: Random integer number
176
+ """
134
177
return randint (1 , 999999 )
135
178
136
179
137
180
def _get_order_id ():
181
+ """
182
+ Generate random order id from range 1 to 999999.
183
+
184
+ :return: Random integer number
185
+ """
138
186
return randint (1 , 999999 )
139
187
140
188
141
189
def _get_user_id (start : int = 0 , end : int = 999999 ):
190
+ """
191
+ Generate random user id from range 0 to 999999. Zero value may identify null user.
192
+
193
+ :param start: Index start (Default: 0)
194
+ :param end: Index end (Default: 999999)
195
+ :return:
196
+ """
142
197
return randint (start , end )
143
198
144
199
145
200
def _get_event_time (delta ):
201
+ """
202
+ Generate current event time, added by some delta value.
203
+
204
+ :param delta: Delta time value in seconds
205
+ :return: Event time
206
+ """
146
207
return datetime .now () + timedelta (seconds = delta )
147
208
148
209
149
210
def _format_time (t ):
150
- return t . strftime ( "%d/%m/%Y %H:%M:%S.%f" )
151
-
211
+ """
212
+ Format time to string.
152
213
153
- def _get_event_name ():
154
- return choice (events )
214
+ :param t: Time object
215
+ :return: Time string in format like 28/03/2022 23:22:15.360252
216
+ """
217
+ return t .strftime ("%d/%m/%Y %H:%M:%S.%f" )
155
218
156
219
157
220
def _get_quantity ():
221
+ """
222
+ Get random product order quantity from 1 to 5. Values are given a weight, decreasing as the quantity number
223
+ increases.
224
+
225
+ :return: Product quantity number
226
+ """
158
227
return random .choices ([1 , 2 , 3 , 4 , 5 ], weights = [50 , 20 , 20 , 5 , 5 ], k = 1 )[0 ]
159
228
160
229
161
230
def _get_weighted_mobile_phone ():
231
+ """
232
+ Get mobile phone object according to popularity
233
+
234
+ :return: Mobile phone object
235
+ """
162
236
return random .choices (mobile_phones , weights = [e ['popularity' ] for e in mobile_phones ], k = 1 )[0 ]
163
237
164
238
165
239
def _get_ip ():
240
+ """
241
+ Get random IP address from list.
242
+
243
+ :return: IP address string
244
+ """
166
245
return choice (ip_list )
167
246
168
247
169
248
def _get_channel ():
249
+ """
250
+ Get user origin channel (e.g. "Organic search", "Direct", "Social media", "Referral", "Other")
251
+
252
+ :return: Origin channel string
253
+ """
170
254
return choice (channel )
0 commit comments