Skip to content

Commit e8a198f

Browse files
committed
Update Battle Geese rewards to be based on steps and length.
1 parent 22939ff commit e8a198f

File tree

3 files changed

+21
-17
lines changed

3 files changed

+21
-17
lines changed

kaggle_environments/core.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -155,16 +155,16 @@ def step(self, actions):
155155
action_state[index] = {**self.state[index], "action": None}
156156

157157
if isinstance(action, DeadlineExceeded):
158-
self.__debug_print(f"Timeout: {str(action)}")
158+
self.debug_print(f"Timeout: {str(action)}")
159159
action_state[index]["status"] = "TIMEOUT"
160160
elif isinstance(action, BaseException):
161-
self.__debug_print(f"Error: {str(action)}")
161+
self.debug_print(f"Error: {str(action)}")
162162
action_state[index]["status"] = "ERROR"
163163
else:
164164
err, data = process_schema(
165165
self.__state_schema.properties.action, action)
166166
if err:
167-
self.__debug_print(f"Invalid Action: {str(err)}")
167+
self.debug_print(f"Invalid Action: {str(err)}")
168168
action_state[index]["status"] = "INVALID"
169169
else:
170170
action_state[index]["action"] = data
@@ -498,7 +498,7 @@ def __run_interpreter(self, state):
498498
*args[:self.interpreter.__code__.co_argcount]))
499499
for agent in new_state:
500500
if agent.status not in self.__state_schema.properties.status.enum:
501-
self.__debug_print(f"Invalid Action: {agent.status}")
501+
self.debug_print(f"Invalid Action: {agent.status}")
502502
agent.status = "INVALID"
503503
if agent.status in ["ERROR", "INVALID", "TIMEOUT"]:
504504
agent.reward = None
@@ -595,6 +595,6 @@ def update_props(shared_state, state, schema_props):
595595

596596
return update_props(self.state[0], state, self.__state_schema.properties)
597597

598-
def __debug_print(self, message):
598+
def debug_print(self, message):
599599
if self.debug:
600600
print(message)

kaggle_environments/envs/battlegeese/battlegeese.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@
3333
}
3434
},
3535
"reward": {
36-
"description": "-1 = Lost, 0 = Draw/Ongoing, 1 = Won",
37-
"enum": [-1, 0, 1],
36+
"description": "The number of steps the goose has moved plus it's length.",
37+
"type": "integer",
3838
"default": 0
3939
},
4040
"observation": {

kaggle_environments/envs/battlegeese/battlegeese.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,11 @@ def interpreter(state, env):
110110
food.append(starting_positions[index + num_agents])
111111
return state
112112

113+
# Update active agents rewards.
114+
for index, agent in enumerate(state):
115+
if agent.status == "ACTIVE":
116+
agent.reward = len(env.steps) + len(geese[index])
117+
113118
# Apply the actions from active agents.
114119
for index, agent in enumerate(state):
115120
if agent.status != "ACTIVE":
@@ -121,15 +126,15 @@ def interpreter(state, env):
121126

122127
# Wall Hit.
123128
if new_head == -1:
124-
agent.status = f"Wall Hit: {action}"
125-
agent.reward = 0
129+
env.debug_print(f"Wall Hit: {action}")
130+
agent.status = "INACTIVE"
126131
geese[index] = []
127132
continue
128133

129134
# Last Body Hit.
130135
if len(goose) > 1 and goose[1] == new_head:
131-
agent.status = f"Body Hit: {action}"
132-
agent.reward = 0
136+
env.debug_print(f"Body Hit: {action}")
137+
agent.status = "INACTIVE"
133138
geese[index] = []
134139
continue
135140

@@ -146,8 +151,8 @@ def interpreter(state, env):
146151
if len(env.steps) % hunger_rate == 0:
147152
goose.pop()
148153
if len(goose) == 0:
149-
agent.status = f"Goose Starved: {action}"
150-
agent.reward = 0
154+
env.debug_print(f"Goose Starved: {action}")
155+
agent.status = "INACTIVE"
151156
geese[index] = []
152157
continue
153158

@@ -159,8 +164,8 @@ def interpreter(state, env):
159164
for index, agent in enumerate(state):
160165
for pos in geese[index]:
161166
if collisions[pos] > 1:
162-
agent.status = f"Goose Collision: {agent.action}"
163-
agent.reward = 0
167+
env.debug_print(f"Goose Collision: {agent.action}")
168+
agent.status = "INACTIVE"
164169
geese[index] = []
165170
continue
166171

@@ -172,11 +177,10 @@ def interpreter(state, env):
172177
available_positions.remove(pos)
173178
food.extend(sample(available_positions, min_food - len(food)))
174179

175-
# If only one ACTIVE agent left, set it's reward to 1 and make INACTIVE.
180+
# If only one ACTIVE agent left, set it to INACTIVE.
176181
active_agents = [a for a in state if a.status == "ACTIVE"]
177182
if len(active_agents) == 1:
178183
active_agents[0].status = "INACTIVE"
179-
active_agents[0].reward = 1
180184

181185
return state
182186

0 commit comments

Comments
 (0)