Skip to content

Commit 2a60d7d

Browse files
adds docstrings for functions. adds comments describing variables. formats comments and parameters
1 parent d796578 commit 2a60d7d

File tree

1 file changed

+116
-28
lines changed

1 file changed

+116
-28
lines changed

axelrod/strategies/dbs.py

Lines changed: 116 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ class DBS(Player):
2828
used when computing discounted frequencies to learn opponent's
2929
strategy. Must be between 0 and 1. The default is 0.75
3030
promotion_threshold : int, optional
31-
number of observations needed to promote a change in opponent's
32-
strategy. The default is 3.
31+
number of successive observations needed to promote an
32+
opponent behavior as a deterministic rule. The default is 3.
3333
violation_threshold : int, optional
3434
number of observations needed to considerate opponent's
3535
strategy has changed. You can lower it when noise increases.
@@ -58,35 +58,65 @@ class DBS(Player):
5858
def __init__(self, discount_factor=.75, promotion_threshold=3,
5959
violation_threshold=4, reject_threshold=3, tree_depth=5):
6060
super().__init__()
61-
62-
# default opponent's policy is TitForTat
61+
62+
# The opponent's behavior is represented by a 3 dicts :
63+
# Rd, Rc, and Rp.
64+
# His behavior his modeled by a set of rules. A rule is the move that
65+
# the opponent will play (C or D or a probability to play C) after a
66+
# given outcome (for instance after (C, D))
67+
# A rule can be deterministic or probabilistic
68+
# - Rc is the set of deterministic rules
69+
# - Rp is the set of probabilistic rules
70+
# - Rd is the default rule set which is used for initialization but also
71+
# keeps track of previous policies when change in the opponent behavior
72+
# happens, in order to have a smooth transition
73+
# - Pi is a set of rules that aggregates all above sets of rules in
74+
# order to fully model the opponent's behavior
75+
76+
# Default rule set Rd
77+
# Default opponent's policy is TitForTat
6378
self.Rd = create_policy(1, 1, 0, 0)
79+
# Set of current deterministic rules Rc
6480
self.Rc = {}
65-
self.Pi = self.Rd # policy used by MoveGen
81+
# Aggregated rule set Pi
82+
self.Pi = self.Rd
83+
# For each rule in Rd we need to count the number of successive
84+
# violations. Those counts are saved in violation_counts.
6685
self.violation_counts = {}
6786
self.reject_threshold = reject_threshold
6887
self.violation_threshold = violation_threshold
6988
self.promotion_threshold = promotion_threshold
7089
self.tree_depth = tree_depth
90+
# v is a violation count used to know when to clean the default rule
91+
# set Rd
7192
self.v = 0
93+
# A discount factor for computing the probabilistic rules
7294
self.alpha = discount_factor
73-
self.history_by_cond = {}
74-
# to compute the discount frequencies, we need to keep
75-
# up to date an history of what has been played for each
76-
# condition:
95+
96+
# The probabilistic rule set Rp is not saved as an attribute, but each
97+
# rule is computed only when needed.
98+
# The rules are computed as discounted frequencies of opponent's past
99+
# moves. To compute the discounted frequencies, we need to keep
100+
# up to date an history of what has been played following each
101+
# outcome (or condition):
77102
# We save it as a dict history_by_cond; keys are conditions
78-
# (ex (C,C)) and values are a tuple of 2 lists (G,F)
79-
# for a condition j:
103+
# (ex (C, C)) and values are a tuple of 2 lists (G, F)
104+
# for a condition j and an iteration i in the match :
80105
# G[i] = 1 if cond j was True at turn i-1 and C has been played
81-
# by the opponent; else G[i]=0
82-
# F[i] = 1 if cond j was True at turn i-1; else G[i]=0
106+
# by the opponent; else G[i] = 0
107+
# F[i] = 1 if cond j was True at turn i-1; else F[i]=0
108+
# this representation makes the computing of discounted frequencies
109+
# easy and efficient
83110
# initial hypothesized policy is TitForTat
84-
self.history_by_cond[(C, C)] = ([1], [1])
85-
self.history_by_cond[(C, D)] = ([1], [1])
86-
self.history_by_cond[(D, C)] = ([0], [1])
87-
self.history_by_cond[(D, D)] = ([0], [1])
111+
self.history_by_cond = {
112+
[(C, C)] = ([1], [1])
113+
[(C, D)] = ([1], [1])
114+
[(D, C)] = ([0], [1])
115+
[(D, D)] = ([0], [1])
116+
}
88117

89118
def reset(self):
119+
""" Reset instance properties. """
90120
super().reset()
91121
self.Rd = create_policy(1, 1, 0, 0)
92122
self.Rc = {}
@@ -101,7 +131,26 @@ def reset(self):
101131

102132
def should_promote(self, r_plus, promotion_threshold=3):
103133
"""
104-
134+
This function determines if the move r_plus is a deterministic
135+
behavior of the opponent, and then returns True, or if r_plus
136+
is due to a random behavior (or noise) which would require a
137+
probabilistic rule, in which case it returns False
138+
139+
To do so it looks into the game history : if the K last times
140+
when the opponent was in the same situation than in r_plus, he
141+
played the same thing, then then r_plus is considered as a
142+
deterministic rule (where K is the user-defined
143+
promotion_threshold)
144+
145+
Parameters
146+
147+
r_plus : tuple of (tuple of actions.Actions, actions.Actions)
148+
exemple: ((C, C), D)
149+
r_plus represents one outcome of the history, and the
150+
following move played by the opponent
151+
promotion_threshold : int, optionnal
152+
number of successive observations needed to promote an
153+
opponent behavior as a deterministic rule. Default is 3.
105154
"""
106155
if r_plus[1] == C:
107156
opposite_action = 0
@@ -127,9 +176,17 @@ def should_promote(self, r_plus, promotion_threshold=3):
127176
return False
128177

129178
def should_demote(self, r_minus, violation_threshold=4):
179+
"""
180+
Checks if the number of successive violations of a deterministic
181+
rule (in the opponent's behavior) exceeds the user-defined
182+
violation_threshold
183+
"""
130184
return (self.violation_counts[r_minus[0]] >= violation_threshold)
131185

132186
def update_history_by_cond(self, opponent_history):
187+
"""
188+
Updates self.history_by_cond, between each turns of the game.
189+
"""
133190
two_moves_ago = (self.history[-2], opponent_history[-2])
134191
for outcome,GF in self.history_by_cond.items():
135192
G,F = GF
@@ -143,7 +200,25 @@ def update_history_by_cond(self, opponent_history):
143200
G.append(0)
144201
F.append(0)
145202

146-
def compute_prob_rule(self, outcome, alpha):
203+
def compute_prob_rule(self, outcome, alpha=1):
204+
"""
205+
Uses the game history to compute the probability of the opponent
206+
playing C, in the outcome situation
207+
(exemple : outcome = (C, C)).
208+
When alpha = 1, the results is approximately equal to the frequency
209+
of the occurence of outcome -> C.
210+
alpha is a discount factor that allows to give more weight to recent
211+
events than earlier ones.
212+
213+
Parameters
214+
215+
outcome : tuple of two actions.Actions
216+
in {(C, C), (C, D), (D, C), (D, D)}
217+
We want to compute the probability that the opponent plays C
218+
following this outcome in the game
219+
alpha : int, optionnal
220+
Discount factor. Default is 1.
221+
"""
147222
G = self.history_by_cond[outcome][0]
148223
F = self.history_by_cond[outcome][1]
149224
discounted_g = 0
@@ -153,25 +228,30 @@ def compute_prob_rule(self, outcome, alpha):
153228
discounted_g += alpha_k * g
154229
discounted_f += alpha_k * f
155230
alpha_k = alpha * alpha_k
156-
p_cond = discounted_g/discounted_f
231+
p_cond = discounted_g / discounted_f
157232
return p_cond
158233

159234
def strategy(self, opponent: Player) -> Action:
160-
161235
# First move
162236
if not self.history:
163237
return C
164238

165239
if(len(opponent.history) >= 2):
166240

167-
# update history_by_cond
241+
# We begin by update history_by_cond
168242
# (i.e. update Rp)
169243
self.update_history_by_cond(opponent.history)
170244

171245
two_moves_ago = (self.history[-2], opponent.history[-2])
246+
# r_plus is the information of what the opponent just played,
247+
# following the previous outcome two_moves_ago
172248
r_plus = (two_moves_ago, opponent.history[-1])
249+
# r_minus is the opposite move, following the same outcome
173250
r_minus = (two_moves_ago, ({C, D} - {opponent.history[-1]}).pop())
174251

252+
# If r_plus and r_minus are not in the current set of deterministic
253+
# rules, we check if r_plus should be added to it (following the
254+
# rule defined in the should_promote function)
175255
if r_plus[0] not in self.Rc.keys():
176256
if self.should_promote(r_plus, self.promotion_threshold):
177257
self.Rc[r_plus[0]] = action_to_int(r_plus[1])
@@ -187,9 +267,14 @@ def strategy(self, opponent: Player) -> Action:
187267
self.violation_counts[r_plus[0]] = 0
188268
# (if r- in Rc)
189269
elif r_minus[1] == to_check:
190-
# increment violation count of r-
270+
# Increment violation count of r-
191271
self.violation_counts[r_plus[0]] += 1
192-
if self.should_demote(r_minus,self.violation_threshold):
272+
# As we observe that the behavior of the opponent is
273+
# opposed to a rule modeled in Rc, we check if the number
274+
# of consecutive violations of this rule is superior to
275+
# a threshold. If it is, we clean Rc, but we keep the rules
276+
# of Rc in Rd for smooth transition
277+
if self.should_demote(r_minus, self.violation_threshold):
193278
self.Rd.update(self.Rc)
194279
self.Rc.clear()
195280
self.violation_counts.clear()
@@ -206,25 +291,28 @@ def strategy(self, opponent: Player) -> Action:
206291
and self.Rd[r_minus[0]] == action_to_int(r_minus[1])
207292
)
208293

294+
# Increment number of violations of Rd rules
209295
if r_minus_in_Rd:
210296
self.v += 1
211-
297+
# If the number of violations is superior to a threshold, clean Rd
212298
if (self.v > self.reject_threshold
213299
or (r_plus_in_Rc and r_minus_in_Rd)):
214300
self.Rd.clear()
215301
self.v = 0
216302

217-
# compute Rp for conditions that are neither in Rc or Rd
303+
# Compute Rp for conditions that are neither in Rc or Rd
218304
Rp = {}
219305
all_cond = [(C, C), (C, D), (D, C), (D, D)]
220306
for outcome in all_cond:
221307
if ((outcome not in self.Rc.keys())
222308
and (outcome not in self.Rd.keys())):
223-
# then we need to compute opponent's C answer probability
309+
# Compute opponent's C answer probability
224310
Rp[outcome] = self.compute_prob_rule(outcome, self.alpha)
225311

312+
# We aggregate the rules of Rc, Rd, and Rp in a set of rule Pi
226313
self.Pi = {}
227-
# algorithm ensure no duplicate keys -> no key overwriting
314+
# The algorithm makes sure that a rule cannot be in two different
315+
# sets of rule so we do not need to check for duplicates.
228316
self.Pi.update(self.Rc)
229317
self.Pi.update(self.Rd)
230318
self.Pi.update(Rp)

0 commit comments

Comments
 (0)