Skip to content

The shapes of the spec BoundedContinuous and the Composite mismatch: the first 2 dimensions should match but got spec.shape=torch.Size([5, 13]) and Composite.shape=torch.Size([2, 5]). #221

@rsarpongstreetor

Description

@rsarpongstreetor

Giving the ClassAnFuelprice, I need help to resolve the shape mismatch for the Bounded Countineous and the Composit(agent composite) in the Marl _make_spec Method.

class AnFuelpriceEnv(EnvBase):
def init(self, num_envs, device, seed, categorical_actions, num_agents, episode_length=1000, **kwargs):
super().init(device=device, batch_size=torch.Size([num_envs]))
if seed is None:
seed = int(torch.empty((), dtype=torch.int64).random_().item())
self.set_seed(seed)

    self.num_envs = num_envs # Store num_envs as an attribute
    self.categorical_actions = categorical_actions
    self.num_agents = num_agents
    self.episode_length = episode_length

    # Assuming these are defined elsewhere or will be defined in _make_spec
    self.obs_dim = 13  # Example observation dimension per individual
    self.date_dim = 1 # Example date dimension per individual
    self.num_individuals_per_agent = 13 # Example number of individuals per agent
    self.num_individual_actions = 3 # Example number of actions per individual

    # Assuming agent names are 'agent_0', 'agent_1', etc.
    self.agents = [f"agent_{i}" for i in range(self.num_agents)]

    self._make_spec()
    # self._reset() # Call reset after making specs


@staticmethod
def gen_params(batch_size: torch.Size = torch.Size([]), device: Union[str, torch.device] = "cpu") -> TensorDictBase:
    # Ensure batch_size is a tuple of integers
    if not isinstance(batch_size, torch.Size):
         # Attempt to convert to torch.Size
         try:
             batch_size = torch.Size(batch_size)
         except TypeError:
              raise TypeError(f"Expected batch_size to be torch.Size or convertible to torch.Size, but got {type(batch_size)}")

    data_path = '/content/drive/MyDrive/deep learning codes/EIAAPI_DOWNLOAD/solutions/mergedata/DataDic.pt'
    data_columns = ['Forex', 'WTI', 'Brent', 'OPEC', 'Fuelprice5', 'Fuelprice6', 'Fuelprice7', 'Fuelprice8',
                    'Fuelprice9', 'Fuelprice10', 'Fuelprice11', 'Fuelprice12', 'Fuelprice13',
                    'reward0', 'reward1', 'reward2', 'reward3', 'reward4', 'reward5', 'reward6', 'reward7', 'reward8',
                    'reward9', 'reward10', 'reward11', 'reward12',
                    'action0', 'action1', 'action2', 'action3', 'action4', 'action5', 'action6', 'action7', 'action8',
                    'action9', 'action10', 'action11', 'action12', 'Date']
    # Create num_envs instances of DDataenv if batch_size is provided, otherwise just one
    num_instances = batch_size[0] if batch_size else 1
    env_data = []
    for _ in range(num_instances):
         envv = DDataenv(data_path, data_columns)
         env_data.append(envv.get_observation())

    # Stack the observations to create batched tensors
    batched_ac = {}
    for key in env_data[0].keys():
         batched_ac[key] = torch.stack([torch.tensor(data[key], device="cpu") for data in env_data])

    # Explicitly ensure date parameters have shape (batch_size, 1,) if they are scalar
    for k in ['Date', 'Date_min', 'Date_max']:
         if k in batched_ac and batched_ac[k].ndim == 1: # Check if it's (batch_size,) and key exists
             batched_ac[k] = batched_ac[k].unsqueeze(1) # Reshape to (batch_size, 1)


    # Create a TensorDict from the batched data
    td = TensorDict({"params": {k: v.to(device) for k, v in batched_ac.items()}}, batch_size=batch_size, device=device)
    #print(f"gen_params: Created TensorDict with batch_size {batch_size}, shape: {td.shape})


    return td

def _make_spec(self):

    dummy_td_params = self.gen_params(torch.Size([1]), device=self.device)

    # Store min/max data from the dummy TensorDict
    self.obs_min_data = dummy_td_params['params','obsState&Fuel_min'].squeeze(0).clone().detach()
    self.obs_max_data = dummy_td_params['params','obsState&Fuel_max'].squeeze(0).clone().detach()
    self.Date_min_data = dummy_td_params['params','Date_min'].squeeze(0).clone().detach()
    self.Date_max_data = dummy_td_params['params','Date_max'].squeeze(0).clone().detach()
    self.reward_min_data = dummy_td_params['params','rewardState&reward_min'].squeeze(0).clone().detach()
    self.reward_max_data = dummy_td_params['params','rewardState&reward_max'].squeeze(0).clone().detach()
    self.action_min_data = dummy_td_params['params','actionState&action_min'].squeeze(0).clone().detach()
    self.action_max_data = dummy_td_params['params','actionState&action_max'].squeeze(0).clone().detach()


    # Define observation bounds (excluding Date)
    obs_low_per_agent = self.obs_min_data # Shape [13]
    obs_high_per_agent = self.obs_max_data # Shape [13]
    obs_shape_per_agent = obs_low_per_agent.shape # Should be [13]

    # Define position_key spec based on Date data (Unbounded)
    position_key_shape_per_agent = torch.Size([1]) # Should be [1] per agent


    # Define the inner Composite for agents within observation spec
    agents_observation_composite_dict = {
        # Observation spec for fuel and forex data (shape per agent)
        "observation": Bounded( # Using Bounded for the main observation features
            low= obs_low_per_agent, # Shape [13]
            high= obs_high_per_agent, # Shape [13]
            shape=obs_shape_per_agent, # Shape is [13]
            dtype=torch.float32,
            device=self.device,
        ),
        "position_key": Unbounded( # Positional key spec based on Date (now Unbounded, shape per agent)
            shape=position_key_shape_per_agent, # Shape is [1]
            dtype=torch.float32, # Assuming Date is float
            device=self.device,
        ),
         # Keep done, terminated, truncated under agents with shape [1] per agent
         "done": Binary(
            shape=[1], # Shape is [1] per agent
            dtype=torch.bool,
            device=self.device,
        ),
         "is_terminated": Binary(
            shape=[1], # Shape is [1] per agent
            dtype=torch.bool,
            device=self.device,
        ),
         "is_truncated": Binary(
            shape=[1], # Shape is [1] per agent
            dtype=torch.bool,
            device=self.device,
        ),
    }
    print(f"agents_observation_composite_dict: {agents_observation_composite_dict}")
    agents_observation_composite = Composite(
        agents_observation_composite_dict,
        # The shape of this inner Composite is now [num_agents]
        shape=torch.Size([self.num_agents]), # Shape is [num_agents]
        device=self.device,
    )

    # Observation spec - Standard TorchRL Multi-Agent Structure
    # Top-level Composite: batch_size=[num_envs], shape=[]
    # Inner Composite ('agents'): shape=[num_agents]
    # Innermost Specs ('observation', 'position_key', 'done', 'is_terminated', 'is_truncated'): shape=[feature_dims] (per agent)
    self.observation_spec = Composite(
        {
            "agents": agents_observation_composite
        },
        # The shape of the top-level Composite is empty, batch_size is handled separately
        batch_size=self.batch_size,
        device=self.device,
    )
    print(f"observation_spec: {self.observation_spec}")


    self.num_individual_actions=13 # Corrected to 13 as per the user's request to match observation shape
    # Action spec - Standard TorchRL Multi-Agent Structure
    # Top-level Composite: batch_size=[num_envs], shape=[]
    # Inner Composite ('agents'): shape=[num_agents]
    # Innermost Spec ('action'): shape=[action_dims] (per agent)


    if self.categorical_actions:
         # For discrete actions, action feature shape is [num_individuals_per_agent]
         action_feature_shape = torch.Size([self.num_individuals_per_agent]) # Shape [13] per agent
         # nvec is per agent, set device [13]
         nvec_per_agent = torch.full([self.num_individuals_per_agent], self.num_individual_actions, dtype=torch.int64, device=self.device)

         # Define the inner Composite for agents with shape [num_agents]
         agents_action_composite_dict = {
              "action": MultiCategorical(
                 nvec=nvec_per_agent, # nvec is per agent group now [13]
                 shape=action_feature_shape, # Shape is just feature dims per agent [13]
                 dtype=torch.int64,
                 device=self.device, # Explicitly set device
             )
         }
         print(f"agents_action_composite_dict (categorical): {agents_action_composite_dict}")
         agents_action_composite = Composite(
             agents_action_composite_dict,
             # The shape of this inner Composite is now [num_agents]
             shape=torch.Size([self.num_agents]),
             device=self.device,
         )

         self.action_spec = Composite(
             {
                  "agents": agents_action_composite
             },
             # The shape of the top-level Composite is empty, batch_size is handled separately
             batch_size=self.batch_size,
             device=self.device,
         )
         print(f"action_spec (categorical): {self.action_spec}")


    else:
         # For continuous actions, define the spec as a single UnboundedContinuousTensorSpec
         # with shape [num_agents, num_individuals_per_agent]
         continuous_action_shape = torch.Size([self.num_agents, self.num_individuals_per_agent])
         action_spec = UnboundedContinuousTensorSpec(
             shape=continuous_action_shape, # Combined shape for all agents' continuous actions
             dtype=torch.float32,
             device=self.device,
         )
         print(f"action_spec (continuous): {action_spec}")

         # The action spec is just this single spec at the top level
         self.action_spec = action_spec
         print(f"action_spec (continuous, top-level): {self.action_spec}")


    # Reward spec - Standard TorchRL Multi-Agent Structure
    # Top-level Composite: batch_size=[num_envs], shape=[]
    # Inner Composite ('agents'): shape=[num_agents]
    # Innermost Spec ('reward'): shape=[1] (per agent)
    reward_feature_shape = torch.Size([1])

    agents_reward_composite_dict = {
        "reward": Unbounded( # Corrected shape to [1] per agent
            shape=[1], # Corrected shape definition
            dtype=torch.float32,
            device=self.device,
        )
    }
    print(f"agents_reward_composite_dict: {agents_reward_composite_dict}")
    agents_reward_composite = Composite(
        agents_reward_composite_dict,
        shape=torch.Size([self.num_agents]), # Shape is [num_agents]
        device=self.device,
    )
    print(f"agents_reward_composite: {agents_reward_composite}")


    self.full_reward_spec = Composite(
        {
            "agents": agents_reward_composite
        },
        # The shape of the top-level Composite is empty, batch_size is handled separately
        batch_size=self.batch_size,
        device=self.device,
    )
    print(f"full_reward_spec: {self.full_reward_spec}")

    # Define the inner Composite for info spec
    info_composite_dict = { # Added spec for step_count
        "step_count": Unbounded(
            shape=[1], # Corrected shape definition [1] per environment
            dtype=torch.int64,
            device=self.device,
        )
    }
    print(f"info_composite_dict: {info_composite_dict}")
    info_composite = Composite( # Info is typically per environment, shape is empty []
        info_composite_dict,
        # The shape of this inner Composite is empty, it's per environment
        shape=torch.Size([]), # Shape is empty
        device=self.device, # Device of the inner Composite
    )
    print(f"info_composite: {info_composite}")

    # Info spec (typically per environment) - Standard Composite for batched env info
    # Top-level Composite: batch_size=[num_envs], shape=[]
    # Inner Composite ('info'): shape=[] - Info is per environment batch, not per agent group
    self.info_spec = Composite(
        {
             "info": info_composite
        },
        batch_size=self.batch_size,
        device=self.device,
    )
    print(f"info_spec: {self.info_spec}")

    # Define unbatched info spec for CustomEnvClass
    info_composite_unbatched_dict = { # Added spec for step_count
        "step_count": Unbounded(
            shape=[1], # Corrected shape definition [1] per environment
            dtype=torch.int64,
            device=self.device,
        )
    }
    print(f"info_composite_unbatched_dict: {info_composite_unbatched_dict}")
    info_composite_unbatched = Composite( # Info is typically per environment, shape is empty []
        info_composite_unbatched_dict,
        # The shape of this inner Composite is empty, it's per environment
        shape=torch.Size([]), # Shape is empty
        device=self.device, # Device of the inner Composite
    )
    print(f"info_composite_unbatched: {info_composite_unbatched}")

    self.info_spec_unbatched = Composite(
        {
             "info": info_composite_unbatched
        },
        batch_size=torch.Size([]), # Unbatched spec has empty batch size
        device=self.device,
    )
    print(f"info_spec_unbatched: {self.info_spec_unbatched}")

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions