1
1
import gymnasium as gym
2
2
import mesa
3
3
import numpy as np
4
+ from agent import CitizenRL , CopRL
4
5
from mesa .examples .advanced .epstein_civil_violence .model import EpsteinCivilViolence
5
6
from ray .rllib .env import MultiAgentEnv
6
-
7
- from .agent import CitizenRL , CopRL
8
- from .utility import create_intial_agents , grid_to_observation
7
+ from utility import create_intial_agents , grid_to_observation
9
8
10
9
11
10
class EpsteinCivilViolenceRL (EpsteinCivilViolence , MultiAgentEnv ):
@@ -88,7 +87,7 @@ def step(self, action_dict):
88
87
self .action_dict = action_dict
89
88
90
89
# Step the model
91
- self .schedule . step ( )
90
+ self .agents . shuffle_do ( "step" )
92
91
self .datacollector .collect (self )
93
92
94
93
# Calculate rewards
@@ -104,10 +103,10 @@ def step(self, action_dict):
104
103
] # Get the values from the observation grid for the neighborhood cells
105
104
106
105
# RL specific outputs for the environment
107
- done = {a .unique_id : False for a in self .schedule . agents }
108
- truncated = {a .unique_id : False for a in self .schedule . agents }
106
+ done = {a .unique_id : False for a in self .agents }
107
+ truncated = {a .unique_id : False for a in self .agents }
109
108
truncated ["__all__" ] = np .all (list (truncated .values ()))
110
- if self .schedule . time > self .max_iters :
109
+ if self .time > self .max_iters :
111
110
done ["__all__" ] = True
112
111
else :
113
112
done ["__all__" ] = False
@@ -116,7 +115,7 @@ def step(self, action_dict):
116
115
117
116
def cal_reward (self ):
118
117
rewards = {}
119
- for agent in self .schedule . agents :
118
+ for agent in self .agents :
120
119
if isinstance (agent , CopRL ):
121
120
if agent .arrest_made :
122
121
# Cop is rewarded for making an arrest
@@ -149,19 +148,17 @@ def reset(self, *, seed=None, options=None):
149
148
"""
150
149
151
150
super ().reset ()
152
- # Using base scheduler to maintain the order of agents
153
- self .schedule = mesa .time .BaseScheduler (self )
154
151
self .grid = mesa .space .SingleGrid (self .width , self .height , torus = True )
155
152
create_intial_agents (self , CitizenRL , CopRL )
156
153
grid_to_observation (self , CitizenRL )
157
154
# Intialize action dictionary with no action
158
- self .action_dict = {a .unique_id : (0 , 0 ) for a in self .schedule . agents }
155
+ self .action_dict = {a .unique_id : (0 , 0 ) for a in self .agents }
159
156
# Update neighbors for observation space
160
- for agent in self .schedule . agents :
157
+ for agent in self .agents :
161
158
agent .update_neighbors ()
162
- self .schedule . step ( )
159
+ self .agents . shuffle_do ( "step" )
163
160
observation = {}
164
- for agent in self .schedule . agents :
161
+ for agent in self .agents :
165
162
observation [agent .unique_id ] = [
166
163
self .obs_grid [neighbor [0 ]][neighbor [1 ]]
167
164
for neighbor in agent .neighborhood
0 commit comments