Random action
Air Force Research Laboratory (AFRL) Autonomous Capabilities Team (ACT3) Reinforcement Learning (RL) Core.
This is a US Government Work not subject to copyright protection in the US.
The use, dissemination or disclosure of data in this file is subject to limitation or restriction. See accompanying README and LICENSE for details.
Module with base implimentations for Observations
RandomActionPolicy (Policy)
¤
Random action policy.
Source code in corl/policies/random_action.py
class RandomActionPolicy(Policy): # pylint: disable=abstract-method
"""Random action policy.
"""
def __init__(self, observation_space, action_space, config):
Policy.__init__(self, observation_space, action_space, config)
self.view_requirements = {key: value for key, value in self.view_requirements.items() if key != SampleBatch.PREV_ACTIONS}
def compute_actions(
self,
obs_batch,
state_batches=None,
prev_action_batch=None,
prev_reward_batch=None,
info_batch=None,
episodes=None,
explore=None,
timestep=None,
**kwargs
):
return [self.action_space.sample() for _ in obs_batch], [], {}
def learn_on_batch(self, samples):
return {}
def get_weights(self):
return {}
def set_weights(self, weights):
return
compute_actions(self, obs_batch, state_batches=None, prev_action_batch=None, prev_reward_batch=None, info_batch=None, episodes=None, explore=None, timestep=None, **kwargs)
¤
Computes actions for the current policy.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
obs_batch |
Batch of observations. |
required | |
state_batches |
List of RNN state input batches, if any. |
None |
|
prev_action_batch |
Batch of previous action values. |
None |
|
prev_reward_batch |
Batch of previous rewards. |
None |
|
info_batch |
Batch of info objects. |
None |
|
episodes |
List of Episode objects, one for each obs in obs_batch. This provides access to all of the internal episode state, which may be useful for model-based or multi-agent algorithms. |
None |
|
explore |
Whether to pick an exploitation or exploration action.
Set to None (default) for using the value of
|
None |
|
timestep |
The current (sampling) time step. |
None |
Keyword arguments:
Name | Type | Description |
---|---|---|
kwargs |
Forward compatibility placeholder |
Returns:
Type | Description |
---|---|
actions (TensorType) |
Batch of output actions, with shape like [BATCH_SIZE, ACTION_SHAPE]. state_outs (List[TensorType]): List of RNN state output batches, if any, each with shape [BATCH_SIZE, STATE_SIZE]. info (List[dict]): Dictionary of extra feature batches, if any, with shape like {"f1": [BATCH_SIZE, ...], "f2": [BATCH_SIZE, ...]}. |
Source code in corl/policies/random_action.py
def compute_actions(
self,
obs_batch,
state_batches=None,
prev_action_batch=None,
prev_reward_batch=None,
info_batch=None,
episodes=None,
explore=None,
timestep=None,
**kwargs
):
return [self.action_space.sample() for _ in obs_batch], [], {}
get_weights(self)
¤
Returns model weights.
Note: The return value of this method will reside under the "weights" key in the return value of Policy.get_state(). Model weights are only one part of a Policy's state. Other state information contains: optimizer variables, exploration state, and global state vars such as the sampling timestep.
Returns:
Type | Description |
---|---|
Serializable copy or view of model weights. |
Source code in corl/policies/random_action.py
def get_weights(self):
return {}
learn_on_batch(self, samples)
¤
Perform one learning update, given samples
.
Either this method or the combination of compute_gradients
and
apply_gradients
must be implemented by subclasses.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
samples |
The SampleBatch object to learn from. |
required |
Returns:
Type | Description |
---|---|
Dictionary of extra metadata from |
Examples:
>>> policy, sample_batch = ...
>>> policy.learn_on_batch(sample_batch)
Source code in corl/policies/random_action.py
def learn_on_batch(self, samples):
return {}
set_weights(self, weights)
¤
Sets this Policy's model's weights.
Note: Model weights are only one part of a Policy's state. Other state information contains: optimizer variables, exploration state, and global state vars such as the sampling timestep.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
weights |
Serializable copy or view of model weights. |
required |
Source code in corl/policies/random_action.py
def set_weights(self, weights):
return