update epstein 3 for rl

tpike3 · tpike3 · commit 877d9eec8e85 · 2024-11-14T05:38:06.000-05:00
- update epstein rl for mesa 3.0
diff --git a/rl/epstein_civil_violence/agent.py b/rl/epstein_civil_violence/agent.py
@@ -1,6 +1,5 @@
 from mesa.examples.advanced.epstein_civil_violence.agents import Citizen, Cop
-
-from .utility import move
+from utility import move
 
 
 class CitizenRL(Citizen):
diff --git a/rl/epstein_civil_violence/model.py b/rl/epstein_civil_violence/model.py
@@ -1,11 +1,10 @@
 import gymnasium as gym
 import mesa
 import numpy as np
+from agent import CitizenRL, CopRL
 from mesa.examples.advanced.epstein_civil_violence.model import EpsteinCivilViolence
 from ray.rllib.env import MultiAgentEnv
-
-from .agent import CitizenRL, CopRL
-from .utility import create_intial_agents, grid_to_observation
+from utility import create_intial_agents, grid_to_observation
 
 
 class EpsteinCivilViolenceRL(EpsteinCivilViolence, MultiAgentEnv):
@@ -88,7 +87,7 @@ def step(self, action_dict):
         self.action_dict = action_dict
 
         # Step the model
-        self.schedule.step()
+        self.agents.shuffle_do("step")
         self.datacollector.collect(self)
 
         # Calculate rewards
@@ -104,10 +103,10 @@ def step(self, action_dict):
             ]  # Get the values from the observation grid for the neighborhood cells
 
         # RL specific outputs for the environment
-        done = {a.unique_id: False for a in self.schedule.agents}
-        truncated = {a.unique_id: False for a in self.schedule.agents}
+        done = {a.unique_id: False for a in self.agents}
+        truncated = {a.unique_id: False for a in self.agents}
         truncated["__all__"] = np.all(list(truncated.values()))
-        if self.schedule.time > self.max_iters:
+        if self.time > self.max_iters:
             done["__all__"] = True
         else:
             done["__all__"] = False
@@ -116,7 +115,7 @@ def step(self, action_dict):
 
     def cal_reward(self):
         rewards = {}
-        for agent in self.schedule.agents:
+        for agent in self.agents:
             if isinstance(agent, CopRL):
                 if agent.arrest_made:
                     # Cop is rewarded for making an arrest
@@ -149,19 +148,17 @@ def reset(self, *, seed=None, options=None):
         """
 
         super().reset()
-        # Using base scheduler to maintain the order of agents
-        self.schedule = mesa.time.BaseScheduler(self)
         self.grid = mesa.space.SingleGrid(self.width, self.height, torus=True)
         create_intial_agents(self, CitizenRL, CopRL)
         grid_to_observation(self, CitizenRL)
         # Intialize action dictionary with no action
-        self.action_dict = {a.unique_id: (0, 0) for a in self.schedule.agents}
+        self.action_dict = {a.unique_id: (0, 0) for a in self.agents}
         # Update neighbors for observation space
-        for agent in self.schedule.agents:
+        for agent in self.agents:
             agent.update_neighbors()
-        self.schedule.step()
+        self.agents.shuffle_do("step")
         observation = {}
-        for agent in self.schedule.agents:
+        for agent in self.agents:
             observation[agent.unique_id] = [
                 self.obs_grid[neighbor[0]][neighbor[1]]
                 for neighbor in agent.neighborhood
diff --git a/rl/epstein_civil_violence/train_config.py b/rl/epstein_civil_violence/train_config.py
@@ -1,10 +1,9 @@
 import os
 
+from model import EpsteinCivilViolenceRL
 from ray.rllib.algorithms.ppo import PPOConfig
 from ray.rllib.policy.policy import PolicySpec
 
-from .model import EpsteinCivilViolenceRL
-
 
 # Configuration for the PPO algorithm
 # You can change the configuration as per your requirements
diff --git a/rl/epstein_civil_violence/utility.py b/rl/epstein_civil_violence/utility.py
@@ -30,9 +30,9 @@ def create_intial_agents(self, CitizenRL, CopRL):
     # Initializing cops then citizens
     # This ensures cops act out their step before citizens
     for cop in cops:
-        self.schedule.add(cop)
+        self.add(cop)
     for citizen in citizens:
-        self.schedule.add(citizen)
+        self.add(citizen)
 
 
 def grid_to_observation(self, CitizenRL):