Docking distance change reward

This module implements the Reward Functions and Reward Validators specific to the 1D Docking task.

`DockingDistanceChangeReward (RewardFuncBase)` ¤

This RewardFuncBase extension is responsible for calculating the reward associated with a change in agent position.

Source code in corl/rewards/docking_1d/docking_distance_change_reward.py

class DockingDistanceChangeReward(RewardFuncBase):
    """
    This RewardFuncBase extension is responsible for calculating the reward associated with a change in agent position.
    """

    def __init__(self, **kwargs):
        self.config: DockingDistanceChangeRewardValidator
        super().__init__(**kwargs)
        self._dist_buffer = RingBuffer(capacity=2, dtype=float)

    @property
    def get_validator(self):
        """
        Method to return class's Validator.
        """
        return DockingDistanceChangeRewardValidator

    def __call__(
        self,
        observation: OrderedDict,
        action,
        next_observation: OrderedDict,
        state: StateDict,
        next_state: StateDict,
        observation_space: StateDict,
        observation_units: StateDict,
    ) -> RewardDict:
        """
        This method calculates the current position of the agent and compares it to the previous position. The
        difference is used to return a proportional reward.

        Parameters
        ----------
        observation : OrderedDict
            The observations available to the agent from the previous state.
        action
            The last action performed by the agent.
        next_observation : OrderedDict
            The observations available to the agent from the current state.
        state : StateDict
            The previous state of the simulation.
        next_state : StateDict
            The current state of the simulation.
        observation_space : StateDict
            The agent's observation space.
        observation_units : StateDict
            The units corresponding to values in the observation_space?

        Returns
        -------
        reward : RewardDict
            The agent's reward for their change in distance.
        """

        reward = RewardDict()
        val = 0

        deputy = get_platform_by_name(next_state, self.config.agent_name)
        position_sensor = get_sensor_by_name(deputy, self.config.position_sensor_name)  # type: ignore
        deputy_position = position_sensor.get_measurement()
        chief_position = np.array([0])  # hardcoded to origin

        distance = abs(chief_position - deputy_position)
        self._dist_buffer.append(distance[0])

        if len(self._dist_buffer) == 2:
            val = self.config.scale * (self._dist_buffer[0] - self._dist_buffer[1])

        reward[self.config.agent_name] = val

        return reward

`get_validator` `property` `readonly` ¤

Method to return class's Validator.

`call(self, observation, action, next_observation, state, next_state, observation_space, observation_units)` `special` ¤

This method calculates the current position of the agent and compares it to the previous position. The difference is used to return a proportional reward.

Parameters¤

observation : OrderedDict The observations available to the agent from the previous state. action The last action performed by the agent. next_observation : OrderedDict The observations available to the agent from the current state. state : StateDict The previous state of the simulation. next_state : StateDict The current state of the simulation. observation_space : StateDict The agent's observation space. observation_units : StateDict The units corresponding to values in the observation_space?

Returns¤

reward : RewardDict The agent's reward for their change in distance.

Source code in corl/rewards/docking_1d/docking_distance_change_reward.py

def __call__(
    self,
    observation: OrderedDict,
    action,
    next_observation: OrderedDict,
    state: StateDict,
    next_state: StateDict,
    observation_space: StateDict,
    observation_units: StateDict,
) -> RewardDict:
    """
    This method calculates the current position of the agent and compares it to the previous position. The
    difference is used to return a proportional reward.

    Parameters
    ----------
    observation : OrderedDict
        The observations available to the agent from the previous state.
    action
        The last action performed by the agent.
    next_observation : OrderedDict
        The observations available to the agent from the current state.
    state : StateDict
        The previous state of the simulation.
    next_state : StateDict
        The current state of the simulation.
    observation_space : StateDict
        The agent's observation space.
    observation_units : StateDict
        The units corresponding to values in the observation_space?

    Returns
    -------
    reward : RewardDict
        The agent's reward for their change in distance.
    """

    reward = RewardDict()
    val = 0

    deputy = get_platform_by_name(next_state, self.config.agent_name)
    position_sensor = get_sensor_by_name(deputy, self.config.position_sensor_name)  # type: ignore
    deputy_position = position_sensor.get_measurement()
    chief_position = np.array([0])  # hardcoded to origin

    distance = abs(chief_position - deputy_position)
    self._dist_buffer.append(distance[0])

    if len(self._dist_buffer) == 2:
        val = self.config.scale * (self._dist_buffer[0] - self._dist_buffer[1])

    reward[self.config.agent_name] = val

    return reward