|
| 1 | +""" |
| 2 | +This code implements a multi-agent model called MoneyModel using the Mesa library. |
| 3 | +The model simulates the distribution of wealth among agents in a grid environment. |
| 4 | +Each agent has a randomly assigned wealth and can move to neighboring cells. |
| 5 | +Agents can also give money to other agents in the same cell if they have greater wealth. |
| 6 | +The model is trained by a scientist who believes in an equal society and wants to minimize the Gini coefficient, which measures wealth inequality. |
| 7 | +The model is trained using the Proximal Policy Optimization (PPO) algorithm from the stable-baselines3 library. |
| 8 | +The trained model is saved as "ppo_money_model". |
| 9 | +""" |
| 10 | + |
| 11 | +import random |
| 12 | + |
| 13 | +import gymnasium |
| 14 | +import matplotlib.pyplot as plt |
| 15 | + |
| 16 | +# Import mesa |
| 17 | +import mesa |
| 18 | + |
| 19 | +# Import necessary libraries |
| 20 | +import numpy as np |
| 21 | +import seaborn as sns |
| 22 | +from mesa_models.boltzmann_wealth_model.model import ( |
| 23 | + BoltzmannWealthModel, |
| 24 | + MoneyAgent, |
| 25 | + compute_gini, |
| 26 | +) |
| 27 | + |
| 28 | +NUM_AGENTS = 10 |
| 29 | + |
| 30 | + |
| 31 | +# Define the agent class |
| 32 | +class MoneyAgentRL(MoneyAgent): |
| 33 | + def __init__(self, unique_id, model): |
| 34 | + super().__init__(unique_id, model) |
| 35 | + self.wealth = np.random.randint(1, NUM_AGENTS) |
| 36 | + |
| 37 | + def move(self, action): |
| 38 | + empty_neighbors = self.model.grid.get_neighborhood( |
| 39 | + self.pos, moore=True, include_center=False |
| 40 | + ) |
| 41 | + |
| 42 | + # Define the movement deltas |
| 43 | + moves = { |
| 44 | + 0: (1, 0), # Move right |
| 45 | + 1: (-1, 0), # Move left |
| 46 | + 2: (0, -1), # Move up |
| 47 | + 3: (0, 1), # Move down |
| 48 | + 4: (0, 0), # Stay in place |
| 49 | + } |
| 50 | + |
| 51 | + # Get the delta for the action, defaulting to (0, 0) if the action is invalid |
| 52 | + dx, dy = moves.get(int(action), (0, 0)) |
| 53 | + |
| 54 | + # Calculate the new position and wrap around the grid |
| 55 | + new_position = ( |
| 56 | + (self.pos[0] + dx) % self.model.grid.width, |
| 57 | + (self.pos[1] + dy) % self.model.grid.height, |
| 58 | + ) |
| 59 | + |
| 60 | + # Move the agent if the new position is in empty_neighbors |
| 61 | + if new_position in empty_neighbors: |
| 62 | + self.model.grid.move_agent(self, new_position) |
| 63 | + |
| 64 | + def take_money(self): |
| 65 | + # Get all agents in the same cell |
| 66 | + cellmates = self.model.grid.get_cell_list_contents([self.pos]) |
| 67 | + if len(cellmates) > 1: |
| 68 | + # Choose a random agent from the cellmates |
| 69 | + other_agent = random.choice(cellmates) |
| 70 | + if other_agent.wealth > self.wealth: |
| 71 | + # Transfer money from other_agent to self |
| 72 | + other_agent.wealth -= 1 |
| 73 | + self.wealth += 1 |
| 74 | + |
| 75 | + def step(self): |
| 76 | + # Get the action for the agent |
| 77 | + action = self.model.action_dict[self.unique_id] |
| 78 | + # Move the agent based on the action |
| 79 | + self.move(action) |
| 80 | + # Take money from other agents in the same cell |
| 81 | + self.take_money() |
| 82 | + |
| 83 | + |
| 84 | +# Define the model class |
| 85 | +class BoltzmannWealthModelRL(BoltzmannWealthModel, gymnasium.Env): |
| 86 | + def __init__(self, N, width, height): |
| 87 | + super().__init__(N, width, height) |
| 88 | + # Define the observation and action space for the RL model |
| 89 | + # The observation space is the wealth of each agent and their position |
| 90 | + self.observation_space = gymnasium.spaces.Box(low=0, high=10 * N, shape=(N, 3)) |
| 91 | + # The action space is a MultiDiscrete space with 5 possible actions for each agent |
| 92 | + self.action_space = gymnasium.spaces.MultiDiscrete([5] * N) |
| 93 | + self.is_visualize = False |
| 94 | + |
| 95 | + def step(self, action): |
| 96 | + self.action_dict = action |
| 97 | + # Perform one step of the model |
| 98 | + self.schedule.step() |
| 99 | + # Collect data for visualization |
| 100 | + self.datacollector.collect(self) |
| 101 | + # Compute the new Gini coefficient |
| 102 | + new_gini = compute_gini(self) |
| 103 | + # Compute the reward based on the change in Gini coefficient |
| 104 | + reward = self.calculate_reward(new_gini) |
| 105 | + self.prev_gini = new_gini |
| 106 | + # Get the observation for the RL model |
| 107 | + obs = self._get_obs() |
| 108 | + if self.schedule.time > 5 * NUM_AGENTS: |
| 109 | + # Terminate the episode if the model has run for a certain number of timesteps |
| 110 | + done = True |
| 111 | + reward = -1 |
| 112 | + elif new_gini < 0.1: |
| 113 | + # Terminate the episode if the Gini coefficient is below a certain threshold |
| 114 | + done = True |
| 115 | + reward = 50 / self.schedule.time |
| 116 | + else: |
| 117 | + done = False |
| 118 | + info = {} |
| 119 | + truncated = False |
| 120 | + return obs, reward, done, truncated, info |
| 121 | + |
| 122 | + def calculate_reward(self, new_gini): |
| 123 | + if new_gini < self.prev_gini: |
| 124 | + # Compute the reward based on the decrease in Gini coefficient |
| 125 | + reward = (self.prev_gini - new_gini) * 20 |
| 126 | + else: |
| 127 | + # Penalize for increase in Gini coefficient |
| 128 | + reward = -0.05 |
| 129 | + self.prev_gini = new_gini |
| 130 | + return reward |
| 131 | + |
| 132 | + def visualize(self): |
| 133 | + # Visualize the Gini coefficient over time |
| 134 | + gini = self.datacollector.get_model_vars_dataframe() |
| 135 | + g = sns.lineplot(data=gini) |
| 136 | + g.set(title="Gini Coefficient over Time", ylabel="Gini Coefficient") |
| 137 | + plt.show() |
| 138 | + |
| 139 | + def reset(self, *, seed=None, options=None): |
| 140 | + if self.is_visualize: |
| 141 | + # Visualize the Gini coefficient before resetting the model |
| 142 | + self.visualize() |
| 143 | + super().reset() |
| 144 | + self.grid = mesa.space.MultiGrid(self.grid.width, self.grid.height, True) |
| 145 | + self.schedule = mesa.time.RandomActivation(self) |
| 146 | + for i in range(self.num_agents): |
| 147 | + # Create MoneyAgentRL instances and add them to the schedule |
| 148 | + a = MoneyAgentRL(i, self) |
| 149 | + self.schedule.add(a) |
| 150 | + x = self.random.randrange(self.grid.width) |
| 151 | + y = self.random.randrange(self.grid.height) |
| 152 | + self.grid.place_agent(a, (x, y)) |
| 153 | + self.prev_gini = compute_gini(self) |
| 154 | + return self._get_obs(), {} |
| 155 | + |
| 156 | + def _get_obs(self): |
| 157 | + # The observation is the wealth of each agent and their position |
| 158 | + obs = [] |
| 159 | + for a in self.schedule.agents: |
| 160 | + obs.append([a.wealth, *list(a.pos)]) |
| 161 | + return np.array(obs) |
0 commit comments