Skip to content

Episode done


Air Force Research Laboratory (AFRL) Autonomous Capabilities Team (ACT3) Reinforcement Learning (RL) Core.

This is a US Government Work not subject to copyright protection in the US.

The use, dissemination or disclosure of data in this file is subject to limitation or restriction. See accompanying README and LICENSE for details.


Reward that uses episode state to accumulate reward

EpisodeDoneNameReward (EpisodeDoneReward) ¤

Reward that responds once to individual dones.

Source code in corl/rewards/episode_done.py
class EpisodeDoneNameReward(EpisodeDoneReward):
    """Reward that responds once to individual dones."""

    def __init__(self, **kwargs) -> None:
        self.config: EpisodeDoneNameRewardValidator
        super().__init__(**kwargs)

        self._done_name_func = {}
        for name, args in self.config.rewarded_dones.items():
            if isinstance(args, NegativeExponentialScaling):
                self._done_name_func[name] = partial(self.exp_scaling, scale=args.scale, eps=args.eps)
            else:
                self._done_name_func[name] = partial(self.constant_scaling, scale=args)

    @property
    def get_validator(self) -> typing.Type[EpisodeDoneNameRewardValidator]:
        return EpisodeDoneNameRewardValidator

    def get_scaling_method(self, done_name, done_code) -> typing.Callable[[int], float]:
        if self.config.missing_method == _MissingMethod.zero:
            return self._done_name_func.get(done_name, self.zero_scaling)

        return self._done_name_func[done_name]

get_validator: Type[corl.rewards.episode_done.EpisodeDoneNameRewardValidator] property readonly ¤

Returns pydantic validator associated with this class

get_scaling_method(self, done_name, done_code) ¤

Get the scaling method for a particular done name and code.

Source code in corl/rewards/episode_done.py
def get_scaling_method(self, done_name, done_code) -> typing.Callable[[int], float]:
    if self.config.missing_method == _MissingMethod.zero:
        return self._done_name_func.get(done_name, self.zero_scaling)

    return self._done_name_func[done_name]

EpisodeDoneNameRewardValidator (EpisodeDoneRewardValidator) pydantic-model ¤

Validation for EpisodeDoneNameReward.

Source code in corl/rewards/episode_done.py
class EpisodeDoneNameRewardValidator(EpisodeDoneRewardValidator):
    """Validation for EpisodeDoneNameReward."""
    rewarded_dones: typing.Dict[str, typing.Union[NegativeExponentialScaling, float]] = {}
    missing_method: _MissingMethod = _MissingMethod.zero

    @validator('rewarded_dones', always=True)
    def not_empty(cls, v):
        """Ensure that some done condition is specified."""
        if len(v) == 0:
            raise ValueError('Rewarded dones cannot be empty')
        return v

not_empty(v) classmethod ¤

Ensure that some done condition is specified.

Source code in corl/rewards/episode_done.py
@validator('rewarded_dones', always=True)
def not_empty(cls, v):
    """Ensure that some done condition is specified."""
    if len(v) == 0:
        raise ValueError('Rewarded dones cannot be empty')
    return v

EpisodeDoneReward (RewardFuncBase) ¤

Base class for rewards that give rewards based on done information.

Source code in corl/rewards/episode_done.py
class EpisodeDoneReward(RewardFuncBase):
    """Base class for rewards that give rewards based on done information."""

    def __init__(self, **kwargs) -> None:
        self.config: EpisodeDoneRewardValidator
        super().__init__(**kwargs)
        self._counter = 0
        self._already_recorded: typing.Set[str] = set()
        self._status_codes: typing.Dict[DoneStatusCodes, typing.List[str]] = {x: [] for x in DoneStatusCodes}

    @property
    def get_validator(self) -> typing.Type[EpisodeDoneRewardValidator]:
        return EpisodeDoneRewardValidator

    @staticmethod
    def exp_scaling(x, *, scale: float, eps: float) -> float:
        """Scale as a negative exponential."""
        return scale * np.exp(-np.abs(x / eps))

    @staticmethod
    def constant_scaling(_, *, scale: float) -> float:
        """Scale by a constant."""
        return scale

    @staticmethod
    def zero_scaling(_) -> float:
        """Scale as zero."""
        return 0

    @abc.abstractmethod
    def get_scaling_method(self, done_name, done_code) -> typing.Callable[[int], float]:
        """Get the scaling method for a particular done name and code."""
        raise NotImplementedError()

    def __call__(
        self,
        observation: OrderedDict,
        action,
        next_observation: OrderedDict,
        state: StateDict,
        next_state: StateDict,
        observation_space,
        observation_units
    ) -> RewardDict:

        reward = RewardDict()
        reward[self.config.agent_name] = 0

        if "_" in self.config.agent_name:
            platform_name = self.config.agent_name.split("_", 1)[0]
        else:
            platform_name = self.config.agent_name

        done_state = next_state.episode_state.get(platform_name, {})
        for done_name, done_code in done_state.items():
            if done_name in self._already_recorded:
                continue
            self._already_recorded.add(done_name)
            self._status_codes[done_code].append(done_name)

        if not self.config.skip_win_lose_sanity_check:
            if len(self._status_codes[DoneStatusCodes.WIN]) > 0 and len(self._status_codes[DoneStatusCodes.LOSE]) > 0:
                raise RuntimeError(
                    "EpisodeDoneReward found both WIN and LOSS set during this episode, "
                    "if this is intended set skip_sanity_check=True"
                )

        # this will loop starting from win and go down
        consolidate_break = False
        for done_status in DoneStatusCodes:
            for done_name in self._status_codes[done_status]:
                reward[self.config.agent_name] += self.get_scaling_method(done_name, done_status)(self._counter)
                if self.config.consolidate:
                    consolidate_break = True
                    break
            if consolidate_break:
                break

        self._counter += 1 / next_state.sim_update_rate

        return reward

get_validator: Type[corl.rewards.episode_done.EpisodeDoneRewardValidator] property readonly ¤

Returns pydantic validator associated with this class

constant_scaling(_, *, scale) staticmethod ¤

Scale by a constant.

Source code in corl/rewards/episode_done.py
@staticmethod
def constant_scaling(_, *, scale: float) -> float:
    """Scale by a constant."""
    return scale

exp_scaling(x, *, scale, eps) staticmethod ¤

Scale as a negative exponential.

Source code in corl/rewards/episode_done.py
@staticmethod
def exp_scaling(x, *, scale: float, eps: float) -> float:
    """Scale as a negative exponential."""
    return scale * np.exp(-np.abs(x / eps))

get_scaling_method(self, done_name, done_code) ¤

Get the scaling method for a particular done name and code.

Source code in corl/rewards/episode_done.py
@abc.abstractmethod
def get_scaling_method(self, done_name, done_code) -> typing.Callable[[int], float]:
    """Get the scaling method for a particular done name and code."""
    raise NotImplementedError()

zero_scaling(_) staticmethod ¤

Scale as zero.

Source code in corl/rewards/episode_done.py
@staticmethod
def zero_scaling(_) -> float:
    """Scale as zero."""
    return 0

EpisodeDoneRewardValidator (RewardFuncBaseValidator) pydantic-model ¤

if this done condition should attempt to reduce down to WLD in the case of

multiple done conditions being set, with Win taking higher precedence

notes if the done condition should skip the sanity check checking for

both WIN and LOSS in an episodes done results, in case it was intentional

Source code in corl/rewards/episode_done.py
class EpisodeDoneRewardValidator(RewardFuncBaseValidator):
    """
    consolidate: if this done condition should attempt to reduce down to WLD in the case of
        multiple done conditions being set, with Win taking higher precedence
    skip_win_lose_sanity_check: notes if the done condition should skip the sanity check checking for
        both WIN and LOSS in an episodes done results, in case it was intentional
    """
    consolidate: bool = False
    skip_win_lose_sanity_check: bool = True

EpisodeDoneStateReward (EpisodeDoneReward) ¤

Reward that responds to done condition state, once per done condition triggered.

Source code in corl/rewards/episode_done.py
class EpisodeDoneStateReward(EpisodeDoneReward):
    """Reward that responds to done condition state, once per done condition triggered."""

    def __init__(self, **kwargs) -> None:
        self.config: EpisodeDoneStateRewardValidator
        super().__init__(**kwargs)

        self._status_code_func = {}
        for code in DoneStatusCodes:
            code_name = code.name.lower()
            if not hasattr(self.config, code_name):
                raise RuntimeError(f'Unknown done status code: {code_name}')
            code_data = getattr(self.config, code_name)
            if isinstance(code_data, NegativeExponentialScaling):
                self._status_code_func[code_name] = partial(self.exp_scaling, scale=code_data.scale, eps=code_data.eps)
            else:
                self._status_code_func[code_name] = partial(self.constant_scaling, scale=code_data)

    @property
    def get_validator(self) -> typing.Type[EpisodeDoneStateRewardValidator]:
        return EpisodeDoneStateRewardValidator

    def get_scaling_method(self, done_name, done_code) -> typing.Callable[[int], float]:
        return self._status_code_func[done_code.name.lower()]

get_validator: Type[corl.rewards.episode_done.EpisodeDoneStateRewardValidator] property readonly ¤

Returns pydantic validator associated with this class

get_scaling_method(self, done_name, done_code) ¤

Get the scaling method for a particular done name and code.

Source code in corl/rewards/episode_done.py
def get_scaling_method(self, done_name, done_code) -> typing.Callable[[int], float]:
    return self._status_code_func[done_code.name.lower()]

EpisodeDoneStateRewardValidator (EpisodeDoneRewardValidator) pydantic-model ¤

Validation for EpisodeDoneStateReward.

Source code in corl/rewards/episode_done.py
class EpisodeDoneStateRewardValidator(EpisodeDoneRewardValidator):
    """Validation for EpisodeDoneStateReward."""
    win: typing.Union[NegativeExponentialScaling, float] = 0
    partial_win: typing.Union[NegativeExponentialScaling, float] = 0
    draw: typing.Union[NegativeExponentialScaling, float] = 0
    partial_loss: typing.Union[NegativeExponentialScaling, float] = 0
    lose: typing.Union[NegativeExponentialScaling, float] = 0

NegativeExponentialScaling (BaseModel) pydantic-model ¤

Validation entry for negative exponential scaling.

Source code in corl/rewards/episode_done.py
class NegativeExponentialScaling(BaseModel):
    """Validation entry for negative exponential scaling."""
    scale: NonNegativeFloat
    eps: PositiveFloat