@@ -23,7 +23,7 @@ class DBS(Player):
23
23
violation_threshold and rejection_threshold
24
24
25
25
Parameters
26
- ----------
26
+
27
27
discount_factor : float, optional
28
28
used when computing discounted frequencies to learn opponent's
29
29
strategy. Must be between 0 and 1. The default is 0.75
@@ -100,6 +100,9 @@ def reset(self):
100
100
self .history_by_cond [(D , D )] = ([0 ], [1 ])
101
101
102
102
def should_promote (self , r_plus , promotion_threshold = 3 ):
103
+ """
104
+
105
+ """
103
106
if r_plus [1 ] == C :
104
107
opposite_action = 0
105
108
elif r_plus [1 ] == D :
@@ -119,7 +122,7 @@ def should_promote(self, r_plus, promotion_threshold=3):
119
122
if (self .history_by_cond [r_plus [0 ]][1 ][1 :][- k ] == 1 ):
120
123
count += 1
121
124
k += 1
122
- if (count >= promotion_threshold ):
125
+ if (count >= promotion_threshold ):
123
126
return True
124
127
return False
125
128
@@ -147,14 +150,13 @@ def compute_prob_rule(self, outcome, alpha):
147
150
discounted_f = 0
148
151
alpha_k = 1
149
152
for g ,f in zip (G [::- 1 ], F [::- 1 ]):
150
- discounted_g += alpha_k * g
151
- discounted_f += alpha_k * f
152
- alpha_k = alpha * alpha_k
153
+ discounted_g += alpha_k * g
154
+ discounted_f += alpha_k * f
155
+ alpha_k = alpha * alpha_k
153
156
p_cond = discounted_g / discounted_f
154
157
return p_cond
155
158
156
159
def strategy (self , opponent : Player ) -> Action :
157
- """This is the actual strategy"""
158
160
159
161
# First move
160
162
if not self .history :
@@ -195,14 +197,14 @@ def strategy(self, opponent: Player) -> Action:
195
197
196
198
# r+ in Rc
197
199
r_plus_in_Rc = (
198
- r_plus [0 ] in self .Rc .keys ()
199
- and self .Rc [r_plus [0 ]] == action_to_int (r_plus [1 ])
200
- )
200
+ r_plus [0 ] in self .Rc .keys ()
201
+ and self .Rc [r_plus [0 ]] == action_to_int (r_plus [1 ])
202
+ )
201
203
# r- in Rd
202
204
r_minus_in_Rd = (
203
- r_minus [0 ] in self .Rd .keys ()
204
- and self .Rd [r_minus [0 ]] == action_to_int (r_minus [1 ])
205
- )
205
+ r_minus [0 ] in self .Rd .keys ()
206
+ and self .Rd [r_minus [0 ]] == action_to_int (r_minus [1 ])
207
+ )
206
208
207
209
if r_minus_in_Rd :
208
210
self .v += 1
@@ -217,7 +219,7 @@ def strategy(self, opponent: Player) -> Action:
217
219
all_cond = [(C , C ), (C , D ), (D , C ), (D , D )]
218
220
for outcome in all_cond :
219
221
if ((outcome not in self .Rc .keys ())
220
- and (outcome not in self .Rd .keys ())):
222
+ and (outcome not in self .Rd .keys ())):
221
223
# then we need to compute opponent's C answer probability
222
224
Rp [outcome ] = self .compute_prob_rule (outcome , self .alpha )
223
225
@@ -229,13 +231,13 @@ def strategy(self, opponent: Player) -> Action:
229
231
230
232
# React to the opponent's last move
231
233
return MoveGen ((self .history [- 1 ], opponent .history [- 1 ]), self .Pi ,
232
- depth_search_tree = self .tree_depth )
234
+ depth_search_tree = self .tree_depth )
233
235
234
236
235
237
class Node (object ):
236
238
"""
237
239
Nodes used to build a tree for the tree-search procedure
238
- The tree has Determinist ans Stochastic nodes, as the opponent's
240
+ The tree has Deterministic ans Stochastic nodes, as the opponent's
239
241
strategy is learned as a probability distribution
240
242
"""
241
243
@@ -262,17 +264,17 @@ def __init__(self, own_action, pC, depth):
262
264
263
265
def get_siblings (self ):
264
266
# siblings of a stochastic node get depth += 1
265
- opponent_c_choice = DeterministNode (self .own_action , C , self .depth + 1 )
266
- opponent_d_choice = DeterministNode (self .own_action , D , self .depth + 1 )
267
+ opponent_c_choice = DeterministicNode (self .own_action , C , self .depth + 1 )
268
+ opponent_d_choice = DeterministicNode (self .own_action , D , self .depth + 1 )
267
269
return (opponent_c_choice , opponent_d_choice )
268
270
269
271
def is_stochastic (self ):
270
272
return True
271
273
272
274
273
- class DeterministNode (Node ):
275
+ class DeterministicNode (Node ):
274
276
"""
275
- Nodes (C, C), (C, D), (D, C), or (D, D) with determinist choice
277
+ Nodes (C, C), (C, D), (D, C), or (D, D) with deterministic choice
276
278
for siblings
277
279
"""
278
280
@@ -288,11 +290,11 @@ def get_siblings(self, policy):
288
290
same depth
289
291
"""
290
292
c_choice = StochasticNode (
291
- C , policy [(self .action1 , self .action2 )], self .depth
292
- )
293
+ C , policy [(self .action1 , self .action2 )], self .depth
294
+ )
293
295
d_choice = StochasticNode (
294
- D , policy [(self .action1 , self .action2 )], self .depth
295
- )
296
+ D , policy [(self .action1 , self .action2 )], self .depth
297
+ )
296
298
return (c_choice , d_choice )
297
299
298
300
def is_stochastic (self ):
@@ -315,12 +317,7 @@ def create_policy(pCC, pCD, pDC, pDD):
315
317
where p is the probability to cooperate after prev_move,
316
318
where prev_move can be (C, C), (C, D), (D, C) or (D, D)
317
319
"""
318
- pol = {}
319
- pol [(C , C )] = pCC
320
- pol [(C , D )] = pCD
321
- pol [(D , C )] = pDC
322
- pol [(D , D )] = pDD
323
- return pol
320
+ return {(C , C ): pCC , (C , D ): pCD , (D , C ): pDC , (D , D ): pDD }
324
321
325
322
326
323
def action_to_int (action ):
@@ -343,16 +340,12 @@ def minimax_tree_search(begin_node, policy, max_depth):
343
340
# The stochastic node value is the expected values of siblings
344
341
node_value = (
345
342
begin_node .pC * minimax_tree_search (
346
- siblings [0 ],
347
- policy ,
348
- max_depth )
343
+ siblings [0 ], policy , max_depth )
349
344
+ (1 - begin_node .pC ) * minimax_tree_search (
350
- siblings [1 ],
351
- policy ,
352
- max_depth )
345
+ siblings [1 ], policy , max_depth )
353
346
)
354
347
return node_value
355
- else : # determinist node
348
+ else : # deterministic node
356
349
if begin_node .depth == max_depth :
357
350
# this is an end node, we just return its outcome value
358
351
return begin_node .get_value ()
@@ -368,7 +361,7 @@ def minimax_tree_search(begin_node, policy, max_depth):
368
361
)
369
362
elif begin_node .depth < max_depth :
370
363
siblings = begin_node .get_siblings (policy )
371
- # the determinist node value is the max of both siblings values
364
+ # the deterministic node value is the max of both siblings values
372
365
# + the score of the outcome of the node
373
366
a = minimax_tree_search (siblings [0 ], policy , max_depth )
374
367
b = minimax_tree_search (siblings [1 ], policy , max_depth )
@@ -381,7 +374,7 @@ def MoveGen(outcome, policy, depth_search_tree=5):
381
374
returns the best move considering opponent's policy and last move,
382
375
using tree-search procedure
383
376
"""
384
- current_node = DeterministNode (outcome [0 ], outcome [1 ], depth = 0 )
377
+ current_node = DeterministicNode (outcome [0 ], outcome [1 ], depth = 0 )
385
378
values_of_choices = minimax_tree_search (
386
379
current_node , policy , depth_search_tree )
387
380
# returns the Action which correspond to the best choice in terms of
0 commit comments