Hparam search util
Air Force Research Laboratory (AFRL) Autonomous Capabilities Team (ACT3) Reinforcement Learning (RL) Core.
This is a US Government Work not subject to copyright protection in the US.
The use, dissemination or disclosure of data in this file is subject to limitation or restriction. See accompanying README and LICENSE for details.
BaseHparamSearch (ABC)
¤
Experiment provides an anstract class to run specific types of experiments this allows users to do specific setup steps or to run some sort of custom training loop
Source code in corl/libraries/hparam_search_util.py
class BaseHparamSearch(abc.ABC):
"""
Experiment provides an anstract class to run specific types of experiments
this allows users to do specific setup steps or to run some sort of custom training
loop
"""
def __init__(self, **kwargs) -> None:
self.config: BaseHparamSearchValidator = self.get_validator(**kwargs)
@property
def get_validator(self) -> typing.Type[BaseHparamSearchValidator]:
"""Gets the validator
Returns
-------
typing.Type[BaseHparamSearchValidator]
The validator class
"""
return BaseHparamSearchValidator
@abc.abstractmethod
def add_algorithm_hparams(self, rllib_config: dict, tune_config: dict) -> None:
"""Updates the configurations based on search alg
Parameters
----------
rllib_config : dict
The rllib config
tune_config : dict
The tune config
"""
...
get_validator: Type[corl.libraries.hparam_search_util.BaseHparamSearchValidator]
property
readonly
¤
add_algorithm_hparams(self, rllib_config, tune_config)
¤
Updates the configurations based on search alg
Parameters¤
rllib_config : dict The rllib config tune_config : dict The tune config
Source code in corl/libraries/hparam_search_util.py
@abc.abstractmethod
def add_algorithm_hparams(self, rllib_config: dict, tune_config: dict) -> None:
"""Updates the configurations based on search alg
Parameters
----------
rllib_config : dict
The rllib config
tune_config : dict
The tune config
"""
...
BaseHparamSearchValidator (BaseModel)
pydantic-model
¤
Base Validator to subclass for Experiments subclassing BaseExperiment
Source code in corl/libraries/hparam_search_util.py
class BaseHparamSearchValidator(BaseModel):
"""
Base Validator to subclass for Experiments subclassing BaseExperiment
"""
...
HparamSearchPPO_AHBS (BaseHparamSearch)
¤
Asynchronous Hyper Band Example https://docs.ray.io/en/master/tune/examples/includes/async_hyperband_example.html
Source code in corl/libraries/hparam_search_util.py
class HparamSearchPPO_AHBS(BaseHparamSearch):
"""Asynchronous Hyper Band Example
https://docs.ray.io/en/master/tune/examples/includes/async_hyperband_example.html
"""
def __init__(self, **kwargs) -> None:
super().__init__(kwargs=kwargs)
self._model_choices = []
if self.config.include_lstm_search: # type: ignore
self._model_choices.append(ParametersModel.select_lstm_model)
if self.config.inclue_fully_connected_search: # type: ignore
self._model_choices.append(ParametersModel.select_fully_connected_model)
if self.config.include_frame_stacking_search: # type: ignore
self._model_choices.append(ParametersModel.select_framestacking_model)
if self.config.include_gtrxl_search: # type: ignore
self._model_choices.append(ParametersModel.select_gtrxl_model)
@property
def get_validator(self) -> typing.Type[HparamSearchValidator_AHBS]:
"""gets the configuration for AHBS
Returns
-------
typing.Type[HparamSearchValidator_AHBS]
validator
"""
return HparamSearchValidator_AHBS
def add_algorithm_hparams(self, rllib_config, tune_config) -> None:
"""[summary]
Parameters
----------
run_or_experiment_config : [type]
[description]
tune_config : [type]
[description]
args : [type]
[description]
"""
ahbs = tune.schedulers.AsyncHyperBandScheduler(
time_attr=self.config.time_attr, # type: ignore
metric=self.config.metric, # type: ignore
mode=self.config.mode, # type: ignore
max_t=self.config.max_t, # type: ignore
grace_period=self.config.grace_period, # type: ignore
brackets=self.config.brackets # type: ignore
)
tune_config["num_samples"] = self.config.samples # type: ignore
tune_config["scheduler"] = ahbs
rllib_config.update(ParametersPPO.ppo_hyperparameters())
rllib_config["model"] = tune.sample_from(partial(ParametersModel.select_model, self._model_choices))
get_validator: Type[corl.libraries.hparam_search_util.HparamSearchValidator_AHBS]
property
readonly
¤
add_algorithm_hparams(self, rllib_config, tune_config)
¤
[summary]
Parameters¤
run_or_experiment_config : [type] [description] tune_config : [type] [description] args : [type] [description]
Source code in corl/libraries/hparam_search_util.py
def add_algorithm_hparams(self, rllib_config, tune_config) -> None:
"""[summary]
Parameters
----------
run_or_experiment_config : [type]
[description]
tune_config : [type]
[description]
args : [type]
[description]
"""
ahbs = tune.schedulers.AsyncHyperBandScheduler(
time_attr=self.config.time_attr, # type: ignore
metric=self.config.metric, # type: ignore
mode=self.config.mode, # type: ignore
max_t=self.config.max_t, # type: ignore
grace_period=self.config.grace_period, # type: ignore
brackets=self.config.brackets # type: ignore
)
tune_config["num_samples"] = self.config.samples # type: ignore
tune_config["scheduler"] = ahbs
rllib_config.update(ParametersPPO.ppo_hyperparameters())
rllib_config["model"] = tune.sample_from(partial(ParametersModel.select_model, self._model_choices))
HparamSearchPPO_PBT (BaseHparamSearch)
¤
PPO PBT Search Space https://medium.com/aureliantactics/ppo-hyperparameters-and-ranges-6fc2d29bccbe
Source code in corl/libraries/hparam_search_util.py
class HparamSearchPPO_PBT(BaseHparamSearch):
""" PPO PBT Search Space
https://medium.com/aureliantactics/ppo-hyperparameters-and-ranges-6fc2d29bccbe
"""
@property
def get_validator(self) -> typing.Type[HparamSearchValidator_PBT]:
"""gets the configuration for AHBS
Returns
-------
typing.Type[HparamSearchValidator_AHBS]
validator
"""
return HparamSearchValidator_PBT
def add_algorithm_hparams(self, rllib_config, tune_config) -> None:
"""Adds population based training to the configuration (TBD items to be added - default never add)
Parameters
----------
rllib_config : dict
The experiment configuration
tune_config : dict
The tune configuration
"""
# Postprocess the perturbed config to ensure it's still valid
pbt = tune.schedulers.PopulationBasedTraining(
time_attr=self.config.time_attr, # type: ignore
metric=self.config.metric, # type: ignore
mode=self.config.mode, # type: ignore
perturbation_interval=self.config.perturbation_interval, # type: ignore
resample_probability=self.config.resample_probability, # type: ignore
burn_in_period=self.config.burn_in_period, # type: ignore
log_config=True,
# Specifies the mutations of these hyper params
hyperparam_mutations={
"lambda": ParametersPPO.LAMBDA_DIST,
"clip_param": ParametersPPO.CLIP_PARAM_DIST,
"lr": ParametersPPO.LR_DIST,
"num_sgd_iter": ParametersPPO.NSGD_DIST,
"sgd_minibatch_size": ParametersPPO.SGD_MINIBATCH_SIZE_DIST,
"train_batch_size": ParametersPPO.TRAIN_BATCH_SIZE_DIST,
"vf_loss_coeff": ParametersPPO.VF_LOSS_COEFF_DIST,
"entropy_coeff": ParametersPPO.ENTROPY_COEFF_DIST,
"gamma": ParametersPPO.GAMMA_DIST,
"kl_coeff": ParametersPPO.KL_COEFF_DIST,
"kl_target": ParametersPPO.KL_TARGET_DIST
},
custom_explore_fn=ParametersPPO.pbt_ppo_explore
)
# These params start off randomly drawn from a set.
tune_config["scheduler"] = pbt
tune_config["num_samples"] = self.config.samples # type: ignore
rllib_config.update(ParametersPPO.ppo_hyperparameters())
get_validator: Type[corl.libraries.hparam_search_util.HparamSearchValidator_PBT]
property
readonly
¤
add_algorithm_hparams(self, rllib_config, tune_config)
¤
Adds population based training to the configuration (TBD items to be added - default never add)
Parameters¤
rllib_config : dict The experiment configuration tune_config : dict The tune configuration
Source code in corl/libraries/hparam_search_util.py
def add_algorithm_hparams(self, rllib_config, tune_config) -> None:
"""Adds population based training to the configuration (TBD items to be added - default never add)
Parameters
----------
rllib_config : dict
The experiment configuration
tune_config : dict
The tune configuration
"""
# Postprocess the perturbed config to ensure it's still valid
pbt = tune.schedulers.PopulationBasedTraining(
time_attr=self.config.time_attr, # type: ignore
metric=self.config.metric, # type: ignore
mode=self.config.mode, # type: ignore
perturbation_interval=self.config.perturbation_interval, # type: ignore
resample_probability=self.config.resample_probability, # type: ignore
burn_in_period=self.config.burn_in_period, # type: ignore
log_config=True,
# Specifies the mutations of these hyper params
hyperparam_mutations={
"lambda": ParametersPPO.LAMBDA_DIST,
"clip_param": ParametersPPO.CLIP_PARAM_DIST,
"lr": ParametersPPO.LR_DIST,
"num_sgd_iter": ParametersPPO.NSGD_DIST,
"sgd_minibatch_size": ParametersPPO.SGD_MINIBATCH_SIZE_DIST,
"train_batch_size": ParametersPPO.TRAIN_BATCH_SIZE_DIST,
"vf_loss_coeff": ParametersPPO.VF_LOSS_COEFF_DIST,
"entropy_coeff": ParametersPPO.ENTROPY_COEFF_DIST,
"gamma": ParametersPPO.GAMMA_DIST,
"kl_coeff": ParametersPPO.KL_COEFF_DIST,
"kl_target": ParametersPPO.KL_TARGET_DIST
},
custom_explore_fn=ParametersPPO.pbt_ppo_explore
)
# These params start off randomly drawn from a set.
tune_config["scheduler"] = pbt
tune_config["num_samples"] = self.config.samples # type: ignore
rllib_config.update(ParametersPPO.ppo_hyperparameters())
HparamSearchValidator_AHBS (HparamSearchValidator_Shared)
pydantic-model
¤
Base Validator to subclass for search subclassing
Source code in corl/libraries/hparam_search_util.py
class HparamSearchValidator_AHBS(HparamSearchValidator_Shared):
"""
Base Validator to subclass for search subclassing
"""
# max time units per trial. Trials will be stopped after max_t time units (determined
# by time_attr) have passed.
max_t: float = 1e7
# Brackets
brackets: float = 1
# Only stop trials at least this old in time. The units are the same as the attribute
# named by time-attr.
grace_period: float = 5e6
include_lstm_search: bool = False
inclue_fully_connected_search: bool = True
include_frame_stacking_search: bool = False
include_gtrxl_search: bool = False
HparamSearchValidator_PBT (HparamSearchValidator_Shared)
pydantic-model
¤
Base Validator to subclass for search subclassing
Source code in corl/libraries/hparam_search_util.py
class HparamSearchValidator_PBT(HparamSearchValidator_Shared):
"""
Base Validator to subclass for search subclassing
"""
# The probability of resampling from the original distribution when applying hyperparam_mutations.
# If not resampled, the value will be perturbed by a factor of 1.2 or 0.8 if continuous, or changed
# to an adjacent value if discrete. Note that resample_probability by default is 0.25, thus
# hyperparameter with a distribution may go out of the specific range.
resample_probability: float = 0.25
# (float) – Models will be considered for perturbation at this interval of time_attr. Note that
# perturbation incurs checkpoint overhead, so you shouldn’t set this to be too frequent.
perturbation_interval: float = 4
# (float) – Models will not be considered for perturbation before this interval of time_attr has
# passed. This guarantees that models are trained for at least a certain amount of time or timesteps
# before being perturbed.
burn_in_period: float = 10
HparamSearchValidator_Shared (BaseHparamSearchValidator)
pydantic-model
¤
Base Validator to subclass for search subclassing
Source code in corl/libraries/hparam_search_util.py
class HparamSearchValidator_Shared(BaseHparamSearchValidator):
"""
Base Validator to subclass for search subclassing
"""
# "The training result objective value attribute. Stopping procedures will use this attribute."
metric: str = "episode_reward_mean"
# One of {min, max}. Determines whether objective is minimizing or maximizing the metric attribute.
mode: str = "max"
# A training result attr to use for comparing time. Note that you can pass in something
# non-temporal such as training_iteration as a measure of progress, the only requirement is
# that the attribute should increase monotonically.
time_attr: str = "timesteps_total"
# The number of samples to collect during HPARAM search (trials)
samples: int = 4
ParametersModel
¤
Holds the model parameters
Source code in corl/libraries/hparam_search_util.py
class ParametersModel:
"""Holds the model parameters
"""
FC_LAYER_CHOICES = [32, 64, 128, 256, 512]
FC_LAYER_COUNT = [2, 3, 4, 5, 6]
@staticmethod
def __get_layers(layer_count, FC_FILTER_LOWER_VALUES_THRESHOLD, layer_choices, MIN_LAYER_INDEX):
model_layers: typing.List = []
for _ in range(0, layer_count):
temp_layers = layer_choices[MIN_LAYER_INDEX:] if len(model_layers) < FC_FILTER_LOWER_VALUES_THRESHOLD else layer_choices
if model_layers:
model_layers.append(random.choice([x for x in temp_layers if x <= model_layers[-1]]))
else:
model_layers.append(random.choice(temp_layers))
return model_layers
@staticmethod
def select_lstm_model() -> dict:
"""[summary]
Returns
-------
dict
[description]
"""
model_config = ParametersModel.select_fully_connected_model()
model_config["use_lstm"] = True
model_config["max_seq_len"] = random.choice([2, 3, 5, 10])
model_config["lstm_cell_size"] = random.choice([64, 128, 256, 512, 1024, 2048])
model_config["vf_share_layers"] = True
# model_config["vf_share_layers"] = random.choice([True, False])
# model_config["lstm_use_prev_action"] = random.choice([True, False])
# model_config["lstm_use_prev_reward"] = random.choice([True, False])
# model_config["_time_major"] = random.choice([True, False])
return model_config
@staticmethod
def select_fully_connected_model() -> dict:
"""[summary]
Returns
-------
dict
[description]
"""
layer_count = random.choice(ParametersModel.FC_LAYER_COUNT)
layer_choices = ParametersModel.FC_LAYER_CHOICES
FC_FILTER_LOWER_VALUES_THRESHOLD = 2
MIN_LAYER_INDEX = 2
model_layers = ParametersModel.__get_layers(layer_count, FC_FILTER_LOWER_VALUES_THRESHOLD, layer_choices, MIN_LAYER_INDEX)
model_config: dict = {}
model_config["fcnet_hiddens"] = model_layers
model_config["fcnet_activation"] = random.choice(["relu", "tanh"])
return model_config
@staticmethod
def select_framestacking_model() -> dict:
"""[summary]
Returns:
dict -- [description]
"""
model_config = ParametersModel.select_fully_connected_model()
model_config["custom_model"] = "TorchFrameStack"
model_config["custom_model_config"] = {} # type: ignore
model_config["custom_model_config"]["num_frames"] = random.choice(list(range(1, 11)))
model_config["custom_model_config"]["include_actions"] = random.choice([True, False])
model_config["custom_model_config"]["include_rewards"] = random.choice([True, False])
layer_count = random.choice(ParametersModel.FC_LAYER_COUNT)
layer_choices = ParametersModel.FC_LAYER_CHOICES
FC_FILTER_LOWER_VALUES_THRESHOLD = 2
MIN_LAYER_INDEX = 2
model_config["custom_model_config"]["post_fcnet_hiddens"] = ParametersModel.__get_layers(
layer_count, FC_FILTER_LOWER_VALUES_THRESHOLD, layer_choices, MIN_LAYER_INDEX
)
return model_config
@staticmethod
def select_gtrxl_model() -> dict:
"""[summary]
Returns
-------
dict
[description]
"""
model_config = ParametersModel.select_fully_connected_model()
model_config["use_attention"] = False
model_config["attention_num_transformer_units"] = random.choice(list(range(1, 6)))
model_config["attention_dim"] = random.choice([64, 128, 256, 512, 1024, 2048])
model_config["attention_num_heads"] = random.choice(list(range(1, 6)))
model_config["attention_head_dim"] = random.choice([64, 128, 256, 512, 1024, 2048])
model_config["attention_memory_inference"] = 50
model_config["attention_memory_training"] = 50
model_config["attention_position_wise_mlp_dim"] = random.choice([64, 128, 256, 512, 1024, 2048])
# model_config["attention_init_gru_gate_bias"] = 2.0
model_config["attention_use_n_prev_actions"] = random.choice(list(range(1, 11)))
model_config["attention_use_n_prev_rewards"] = random.choice(list(range(1, 11)))
return model_config
@staticmethod
def select_model(model_choices) -> dict:
"""The following function provides the start to exploring model configurations.
"""
model_config_func = random.choice(model_choices)
return model_config_func()
select_framestacking_model()
staticmethod
¤
[summary]
Returns:
Type | Description |
---|---|
dict |
dict -- [description] |
Source code in corl/libraries/hparam_search_util.py
@staticmethod
def select_framestacking_model() -> dict:
"""[summary]
Returns:
dict -- [description]
"""
model_config = ParametersModel.select_fully_connected_model()
model_config["custom_model"] = "TorchFrameStack"
model_config["custom_model_config"] = {} # type: ignore
model_config["custom_model_config"]["num_frames"] = random.choice(list(range(1, 11)))
model_config["custom_model_config"]["include_actions"] = random.choice([True, False])
model_config["custom_model_config"]["include_rewards"] = random.choice([True, False])
layer_count = random.choice(ParametersModel.FC_LAYER_COUNT)
layer_choices = ParametersModel.FC_LAYER_CHOICES
FC_FILTER_LOWER_VALUES_THRESHOLD = 2
MIN_LAYER_INDEX = 2
model_config["custom_model_config"]["post_fcnet_hiddens"] = ParametersModel.__get_layers(
layer_count, FC_FILTER_LOWER_VALUES_THRESHOLD, layer_choices, MIN_LAYER_INDEX
)
return model_config
select_fully_connected_model()
staticmethod
¤
[summary]
Returns¤
dict [description]
Source code in corl/libraries/hparam_search_util.py
@staticmethod
def select_fully_connected_model() -> dict:
"""[summary]
Returns
-------
dict
[description]
"""
layer_count = random.choice(ParametersModel.FC_LAYER_COUNT)
layer_choices = ParametersModel.FC_LAYER_CHOICES
FC_FILTER_LOWER_VALUES_THRESHOLD = 2
MIN_LAYER_INDEX = 2
model_layers = ParametersModel.__get_layers(layer_count, FC_FILTER_LOWER_VALUES_THRESHOLD, layer_choices, MIN_LAYER_INDEX)
model_config: dict = {}
model_config["fcnet_hiddens"] = model_layers
model_config["fcnet_activation"] = random.choice(["relu", "tanh"])
return model_config
select_gtrxl_model()
staticmethod
¤
[summary]
Returns¤
dict [description]
Source code in corl/libraries/hparam_search_util.py
@staticmethod
def select_gtrxl_model() -> dict:
"""[summary]
Returns
-------
dict
[description]
"""
model_config = ParametersModel.select_fully_connected_model()
model_config["use_attention"] = False
model_config["attention_num_transformer_units"] = random.choice(list(range(1, 6)))
model_config["attention_dim"] = random.choice([64, 128, 256, 512, 1024, 2048])
model_config["attention_num_heads"] = random.choice(list(range(1, 6)))
model_config["attention_head_dim"] = random.choice([64, 128, 256, 512, 1024, 2048])
model_config["attention_memory_inference"] = 50
model_config["attention_memory_training"] = 50
model_config["attention_position_wise_mlp_dim"] = random.choice([64, 128, 256, 512, 1024, 2048])
# model_config["attention_init_gru_gate_bias"] = 2.0
model_config["attention_use_n_prev_actions"] = random.choice(list(range(1, 11)))
model_config["attention_use_n_prev_rewards"] = random.choice(list(range(1, 11)))
return model_config
select_lstm_model()
staticmethod
¤
[summary]
Returns¤
dict [description]
Source code in corl/libraries/hparam_search_util.py
@staticmethod
def select_lstm_model() -> dict:
"""[summary]
Returns
-------
dict
[description]
"""
model_config = ParametersModel.select_fully_connected_model()
model_config["use_lstm"] = True
model_config["max_seq_len"] = random.choice([2, 3, 5, 10])
model_config["lstm_cell_size"] = random.choice([64, 128, 256, 512, 1024, 2048])
model_config["vf_share_layers"] = True
# model_config["vf_share_layers"] = random.choice([True, False])
# model_config["lstm_use_prev_action"] = random.choice([True, False])
# model_config["lstm_use_prev_reward"] = random.choice([True, False])
# model_config["_time_major"] = random.choice([True, False])
return model_config
select_model(model_choices)
staticmethod
¤
The following function provides the start to exploring model configurations.
Source code in corl/libraries/hparam_search_util.py
@staticmethod
def select_model(model_choices) -> dict:
"""The following function provides the start to exploring model configurations.
"""
model_config_func = random.choice(model_choices)
return model_config_func()
ParametersPPO
¤
Utility functions for processing hparam searches in the framework for PPO algorithm https://github.com/ray-project/ray/blob/00922817b66ee14ba215972a98f416f3d6fef1ba/rllib/agents/ppo/ppo.py https://medium.com/aureliantactics/ppo-hyperparameters-and-ranges-6fc2d29bccbe https://blog.openai.com/openai-five/ https://docs.ray.io/en/master/tune/api_docs/trainable.html#advanced-reusing-actors
Source code in corl/libraries/hparam_search_util.py
class ParametersPPO:
"""Utility functions for processing hparam searches in the framework for PPO algorithm
https://github.com/ray-project/ray/blob/00922817b66ee14ba215972a98f416f3d6fef1ba/rllib/agents/ppo/ppo.py
https://medium.com/aureliantactics/ppo-hyperparameters-and-ranges-6fc2d29bccbe
https://blog.openai.com/openai-five/
https://docs.ray.io/en/master/tune/api_docs/trainable.html#advanced-reusing-actors
"""
LAMBDA_MIN = 0.9
LAMBDA_MAX = 1.0
LAMBDA_DIST = tune.uniform(LAMBDA_MIN, LAMBDA_MAX)
@staticmethod
def LAMBDA_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM
GAE Parameter Lambda Range: 0.9 to 1
GAE Parameter Lambda also known as: GAE Parameter (lambda) (PPO Paper), lambda (RLlib),
lambda (ppo2 baselines), lambda (ppo baselines), lambda (Unity ML), gae_lambda (TensorForce)
"""
return ParametersPPO.LAMBDA_DIST
VF_LOSS_COEFF_MIN = 0.5
VF_LOSS_COEFF_MAX = 1.0
VF_LOSS_COEFF_DIST = tune.uniform(VF_LOSS_COEFF_MIN, VF_LOSS_COEFF_MAX)
@staticmethod
def VF_LOSS_COEFF_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM
Value Function Coefficient Range: 0.5, 1
Value Function Coefficient also known as: VF coeff. (PPO Paper), vf_loss_coef (RLlib),
vf_coef (ppo2 baselines), (ppo baselines: unclear), (Unity ML: unclear), (TensorForce: unclear)
"""
return ParametersPPO.VF_LOSS_COEFF_DIST
ENTROPY_COEFF_MIN = 0.00
ENTROPY_COEFF_MAX = 0.01
ENTROPY_COEFF_DIST = tune.uniform(ENTROPY_COEFF_MIN, ENTROPY_COEFF_MAX)
@staticmethod
def ENTROPY_COEFF_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM
Entropy Coefficient Range: 0 to 0.01
Entropy Coefficient also known as: Entropy coeff. (PPO Paper), entropy_coeff (RLlib),
ent_coeff (ppo2 baselines), entcoeff (ppo baselines), beta (Unity ML), entropy_regularization (TensorForce)
"""
return ParametersPPO.ENTROPY_COEFF_DIST
CLIP_PARAM_MIN = 0.1
CLIP_PARAM_MAX = 0.3
CLIP_PARAM_DIST = tune.choice([0.1, 0.2, 0.3])
@staticmethod
def CLIP_PARAM_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM
Clipping Range: 0.1, 0.2, 0.3
Clipping also known as: Clipping parameter epsilon (PPO Paper), clip_param (RLlib),
cliprange (ppo2 baselines), clip_param (ppo baselines), epsilon (Unity ML),
likelihood_ratio_clipping (TensorForce)
"""
return ParametersPPO.CLIP_PARAM_DIST
KL_TARGET_MIN = 0.003
KL_TARGET_MAX = 0.03
KL_TARGET_DIST = tune.uniform(KL_TARGET_MIN, KL_TARGET_MAX)
@staticmethod
def KL_TARGET_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM
https://medium.com/aureliantactics/ppo-hyperparameters-and-ranges-6fc2d29bccbe
The KL penalty implementation (third line in the above picture) is available in RLlib’s PPO
implementation. The parameters kl_coeff (initial coefficient for KL divergence) and kl_target
can be used for the KL implementation.
KL Target Range: 0.003 to 0.03
KL Initialization Range: 0.3 to 1 --- KL_COEFF IN RLLIB
"""
return ParametersPPO.KL_COEFF_DIST
KL_COEFF_MIN = 0.2 # RLLIB Default
KL_COEFF_MAX = 1.0 # https://medium.com/aureliantactics/ppo-hyperparameters-and-ranges-6fc2d29bccbe
KL_COEFF_DIST = tune.uniform(KL_COEFF_MIN, KL_COEFF_MAX)
@staticmethod
def KL_COEFF_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM
The KL penalty implementation (third line in the above picture) is available in RLlib’s PPO
implementation. The parameters kl_coeff (initial coefficient for KL divergence) and kl_target
can be used for the KL implementation.
KL Target Range: 0.003 to 0.03
KL Initialization Range: 0.3 to 1 --- KL_COEFF IN RLLIB
"""
return ParametersPPO.KL_COEFF_DIST
GAMMA_MIN = 0.8000
GAMMA_MAX = 0.9997
GAMMA_DIST = tune.uniform(GAMMA_MIN, GAMMA_MAX)
@staticmethod
def GAMMA_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM
Discount Factor Gamma Range: 0.99 (most common), 0.8 to 0.9997
Discount Factor Gamma also known as: Discount (gamma) (PPO Paper), gamma (RLlib), gamma (ppo2 baselines),
gamma (ppo baselines), gamma (Unity ML), discount (TensorForce)
"""
return ParametersPPO.GAMMA_DIST
LR_MIN = 5e-6
LR_MAX = 0.003
LR_DIST = tune.uniform(LR_MIN, LR_MAX)
@staticmethod
def LR_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM
Learning Rate Range: 0.003 to 5e-6
Learning Rate also known as: Adam stepsize (PPO Paper), sgd_stepsize (RLlib), lr (ppo2 baselines),
(ppo baselines: unclear), learning_rate (Unity ML), learning_rate (TensorForce)
"""
return ParametersPPO.LR_DIST
NSGD_MIN = 3
NSGD_MAX = 30
NSGD_DIST = tune.choice(list(range(NSGD_MIN, NSGD_MAX + 1)))
@staticmethod
def NSGD_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM
https://medium.com/aureliantactics/ppo-hyperparameters-and-ranges-6fc2d29bccbe
Epoch Range: 3 to 30
Epoch also known as: Num. epochs (PPO paper), num_sgd_iter (RLlib), epochs (ppo2 baselines),
optim_epochs (ppo baselines), num_epoch (Unity ML), (TensorForce: unclear)
"""
return ParametersPPO.NSGD_DIST
SGD_MINIBATCH_SIZE_MIN = 128
SGD_MINIBATCH_SIZE_MAX = 4096
SGD_MINIBATCH_SIZE_DIST = tune.choice([128, 256, 512, 1024, 2048, 4096])
@staticmethod
def SGD_MINIBATCH_SIZE_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM"""
return ParametersPPO.SGD_MINIBATCH_SIZE_DIST
TRAIN_BATCH_SIZE_MIN = 4096
TRAIN_BATCH_SIZE_MAX = 160000
TRAIN_BATCH_SIZE_INC = 256
# [4096, 4352, 4608, 4864, 5120, 5376, 5632, 5888, 6144, 6400, 6656, 6912, 7168, 7424, 7680, 7936, 8192, 8448,
# 8704, 8960, 9216, 9472, 9728, 9984, 10240, 10496, 10752, 11008, 11264, 11520, 11776, 12032, 12288, 12544,
# 12800, 13056, 13312, 13568, 13824, 14080, 14336, 14592, 14848, 15104, 15360, 15616, 15872, 16128, 16384,
# 16640, 16896, 17152, 17408, 17664, 17920, 18176, 18432, 18688, 18944, 19200, 19456, 19712, 19968, 20224,
# 20480, 20736, 20992, 21248, 21504, 21760, 22016, 22272, 22528, 22784, 23040, 23296, 23552, 23808, 24064,
# 24320, 24576, 24832, 25088, 25344, 25600, 25856, 26112, 26368, 26624, 26880, 27136, 27392, 27648, 27904,
# 28160, 28416, 28672, 28928, 29184, 29440, 29696, 29952, 30208, 30464, 30720, 30976, 31232, 31488, 31744,
# 32000, 32256, 32512, 32768, 33024, 33280, 33536, 33792, 34048, 34304, 34560, 34816, 35072, 35328, 35584,
# 35840, 36096, 36352, 36608, 36864, 37120, 37376, 37632, 37888, 38144, 38400, 38656, 38912, 39168, 39424,
# 39680, 39936, 40192, 40448, 40704, 40960, 41216, 41472, 41728, 41984, 42240, 42496, 42752, 43008, 43264,
# 43520, 43776, 44032, 44288, 44544, 44800, 45056, 45312, 45568, 45824, 46080, 46336, 46592, 46848, 47104,
# 47360, 47616, 47872, 48128, 48384, 48640, 48896, 49152, 49408, 49664, 49920, 50176, 50432, 50688, 50944,
# 51200, 51456, 51712, 51968, 52224, 52480, 52736, 52992, 53248, 53504, 53760, 54016, 54272, 54528, 54784,
# 55040, 55296, 55552, 55808, 56064, 56320, 56576, 56832, 57088, 57344, 57600, 57856, 58112, 58368, 58624,
# 58880, 59136, 59392, 59648, 59904, 60160, 60416, 60672, 60928, 61184, 61440, 61696, 61952, 62208, 62464,
# 62720, 62976, 63232, 63488, 63744, 64000, 64256, 64512, 64768, 65024, 65280, 65536, 65792, 66048, 66304,
# 66560, 66816, 67072, 67328, 67584, 67840, 68096, 68352, 68608, 68864, 69120, 69376, 69632, 69888, 70144,
# 70400, 70656, 70912, 71168, 71424, 71680, 71936, 72192, 72448, 72704, 72960, 73216, 73472, 73728, 73984,
# 74240, 74496, 74752, 75008, 75264, 75520, 75776, 76032, 76288, 76544, 76800, 77056, 77312, 77568, 77824,
# 78080, 78336, 78592, 78848, 79104, 79360, 79616, 79872, 80128, 80384, 80640, 80896, 81152, 81408, 81664,
# 81920, 82176, 82432, 82688, 82944, 83200, 83456, 83712, 83968, 84224, 84480, 84736, 84992, 85248, 85504,
# 85760, 86016, 86272, 86528, 86784, 87040, 87296, 87552, 87808, 88064, 88320, 88576, 88832, 89088, 89344,
# 89600, 89856, 90112, 90368, 90624, 90880, 91136, 91392, 91648, 91904, 92160, 92416, 92672, 92928, 93184,
# 93440, 93696, 93952, 94208, 94464, 94720, 94976, 95232, 95488, 95744, 96000, 96256, 96512, 96768, 97024,
# 97280, 97536, 97792, 98048, 98304, 98560, 98816, 99072, 99328, 99584, 99840, 100096, 100352, 100608, 100864,
# 101120, 101376, 101632, 101888, 102144, 102400, 102656, 102912, 103168, 103424, 103680, 103936, 104192,
# 104448, 104704, 104960, 105216, 105472, 105728, 105984, 106240, 106496, 106752, 107008, 107264, 107520,
# 107776, 108032, 108288, 108544, 108800, 109056, 109312, 109568, 109824, 110080, 110336, 110592, 110848,
# 111104, 111360, 111616, 111872, 112128, 112384, 112640, 112896, 113152, 113408, 113664, 113920, 114176,
# 114432, 114688, 114944, 115200, 115456, 115712, 115968, 116224, 116480, 116736, 116992, 117248, 117504,
# 117760, 118016, 118272, 118528, 118784, 119040, 119296, 119552, 119808, 120064, 120320, 120576, 120832,
# 121088, 121344, 121600, 121856, 122112, 122368, 122624, 122880, 123136, 123392, 123648, 123904, 124160,
# 124416, 124672, 124928, 125184, 125440, 125696, 125952, 126208, 126464, 126720, 126976, 127232, 127488,
# 127744, 128000, 128256, 128512, 128768, 129024, 129280, 129536, 129792, 130048, 130304, 130560, 130816,
# 131072, 131328, 131584, 131840, 132096, 132352, 132608, 132864, 133120, 133376, 133632, 133888, 134144,
# 134400, 134656, 134912, 135168, 135424, 135680, 135936, 136192, 136448, 136704, 136960, 137216, 137472,
# 137728, 137984, 138240, 138496, 138752, 139008, 139264, 139520, 139776, 140032, 140288, 140544, 140800,
# 141056, 141312, 141568, 141824, 142080, 142336, 142592, 142848, 143104, 143360, 143616, 143872, 144128,
# 144384, 144640, 144896, 145152, 145408, 145664, 145920, 146176, 146432, 146688, 146944, 147200, 147456,
# 147712, 147968, 148224, 148480, 148736, 148992, 149248, 149504, 149760, 150016, 150272, 150528, 150784,
# 151040, 151296, 151552, 151808, 152064, 152320, 152576, 152832, 153088, 153344, 153600, 153856, 154112,
# 154368, 154624, 154880, 155136, 155392, 155648, 155904, 156160, 156416, 156672, 156928, 157184, 157440,
# 157696, 157952, 158208, 158464, 158720, 158976, 159232, 159488, 159744, 160000]
TRAIN_BATCH_SIZE_DIST = tune.choice(list(range(2**12, 160000 + TRAIN_BATCH_SIZE_INC, TRAIN_BATCH_SIZE_INC)))
@staticmethod
def TRAIN_BATCH_SIZE_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM"""
return ParametersPPO.TRAIN_BATCH_SIZE_DIST
@staticmethod
def ppo_hyperparameters() -> dict:
"""PPO hyper parameters for hparam search
https://medium.com/aureliantactics/ppo-hyperparameters-and-ranges-6fc2d29bccbe
Returns:
dict -- model configuration
"""
ppo_hparams = {}
ppo_hparams["lambda"] = tune.sample_from(ParametersPPO.LAMBDA_RANGE)
ppo_hparams["vf_loss_coeff"] = tune.sample_from(ParametersPPO.VF_LOSS_COEFF_RANGE)
ppo_hparams["entropy_coeff"] = tune.sample_from(ParametersPPO.ENTROPY_COEFF_RANGE)
ppo_hparams["clip_param"] = tune.sample_from(ParametersPPO.CLIP_PARAM_RANGE)
ppo_hparams["gamma"] = tune.sample_from(ParametersPPO.GAMMA_RANGE)
ppo_hparams["lr"] = tune.sample_from(ParametersPPO.LR_RANGE)
ppo_hparams["num_sgd_iter"] = tune.sample_from(ParametersPPO.NSGD_RANGE)
ppo_hparams["sgd_minibatch_size"] = tune.sample_from(ParametersPPO.SGD_MINIBATCH_SIZE_RANGE)
ppo_hparams["train_batch_size"] = tune.sample_from(ParametersPPO.TRAIN_BATCH_SIZE_RANGE)
ppo_hparams["kl_coeff"] = tune.sample_from(ParametersPPO.KL_COEFF_RANGE)
ppo_hparams["kl_target"] = tune.sample_from(ParametersPPO.KL_TARGET_RANGE)
return ppo_hparams
@staticmethod
def sample_ppo_hyperparameters() -> dict:
"""PPO hyper parameters for hparam search
https://medium.com/aureliantactics/ppo-hyperparameters-and-ranges-6fc2d29bccbe
Returns:
dict -- model configuration
"""
ppo_hparams = ParametersPPO.ppo_hyperparameters()
for k, v in ppo_hparams.items():
ppo_hparams[k] = v.sample()
return ppo_hparams
@staticmethod
def pbt_ppo_explore(config: dict) -> dict:
"""The following function links to the companion function above. Sets the clipping needed by PBT
https://medium.com/aureliantactics/ppo-hyperparameters-and-ranges-6fc2d29bccbe
Arguments:
config {dict} -- input config
Returns:
dict -- clipped config
"""
def clip_parameter(config, parameter, parameter_max, parameter_min):
if config[parameter] > parameter_max:
config[parameter] = parameter_max
elif config[parameter] < parameter_min:
config[parameter] = parameter_min
clip_parameter(config, "lambda", ParametersPPO.LAMBDA_MAX, ParametersPPO.LAMBDA_MIN)
clip_parameter(config, "vf_loss_coeff", ParametersPPO.VF_LOSS_COEFF_MAX, ParametersPPO.VF_LOSS_COEFF_MIN)
clip_parameter(config, "entropy_coeff", ParametersPPO.ENTROPY_COEFF_MAX, ParametersPPO.ENTROPY_COEFF_MIN)
clip_parameter(config, "gamma", ParametersPPO.GAMMA_MAX, ParametersPPO.GAMMA_MIN)
clip_parameter(config, "clip_param", ParametersPPO.CLIP_PARAM_MAX, ParametersPPO.CLIP_PARAM_MIN)
clip_parameter(config, "lr", ParametersPPO.LR_MIN, ParametersPPO.LR_MAX)
clip_parameter(config, "kl_coeff", ParametersPPO.KL_COEFF_MIN, ParametersPPO.KL_COEFF_MAX)
clip_parameter(config, "kl_target", ParametersPPO.KL_TARGET_MIN, ParametersPPO.KL_TARGET_MAX)
sgd_minibatch_size_str = "sgd_minibatch_size"
train_batch_size_str = "train_batch_size"
num_sgd_iter_str = "num_sgd_iter"
clip_parameter(config, num_sgd_iter_str, ParametersPPO.NSGD_MAX, ParametersPPO.NSGD_MIN)
config[num_sgd_iter_str] = int(config[num_sgd_iter_str])
clip_parameter(config, sgd_minibatch_size_str, ParametersPPO.SGD_MINIBATCH_SIZE_MAX, ParametersPPO.SGD_MINIBATCH_SIZE_MIN)
config[sgd_minibatch_size_str] = int(config[sgd_minibatch_size_str])
clip_parameter(config, train_batch_size_str, ParametersPPO.TRAIN_BATCH_SIZE_MAX, ParametersPPO.TRAIN_BATCH_SIZE_MIN)
if config[train_batch_size_str] < config[sgd_minibatch_size_str] * 2:
config[train_batch_size_str] = config[sgd_minibatch_size_str] * 2
config[train_batch_size_str] = int(config[train_batch_size_str])
return config
CLIP_PARAM_RANGE(spec)
staticmethod
¤
Sets the default search space for HPARAM Clipping Range: 0.1, 0.2, 0.3 Clipping also known as: Clipping parameter epsilon (PPO Paper), clip_param (RLlib), cliprange (ppo2 baselines), clip_param (ppo baselines), epsilon (Unity ML), likelihood_ratio_clipping (TensorForce)
Source code in corl/libraries/hparam_search_util.py
@staticmethod
def CLIP_PARAM_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM
Clipping Range: 0.1, 0.2, 0.3
Clipping also known as: Clipping parameter epsilon (PPO Paper), clip_param (RLlib),
cliprange (ppo2 baselines), clip_param (ppo baselines), epsilon (Unity ML),
likelihood_ratio_clipping (TensorForce)
"""
return ParametersPPO.CLIP_PARAM_DIST
ENTROPY_COEFF_RANGE(spec)
staticmethod
¤
Sets the default search space for HPARAM Entropy Coefficient Range: 0 to 0.01 Entropy Coefficient also known as: Entropy coeff. (PPO Paper), entropy_coeff (RLlib), ent_coeff (ppo2 baselines), entcoeff (ppo baselines), beta (Unity ML), entropy_regularization (TensorForce)
Source code in corl/libraries/hparam_search_util.py
@staticmethod
def ENTROPY_COEFF_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM
Entropy Coefficient Range: 0 to 0.01
Entropy Coefficient also known as: Entropy coeff. (PPO Paper), entropy_coeff (RLlib),
ent_coeff (ppo2 baselines), entcoeff (ppo baselines), beta (Unity ML), entropy_regularization (TensorForce)
"""
return ParametersPPO.ENTROPY_COEFF_DIST
GAMMA_RANGE(spec)
staticmethod
¤
Sets the default search space for HPARAM Discount Factor Gamma Range: 0.99 (most common), 0.8 to 0.9997 Discount Factor Gamma also known as: Discount (gamma) (PPO Paper), gamma (RLlib), gamma (ppo2 baselines), gamma (ppo baselines), gamma (Unity ML), discount (TensorForce)
Source code in corl/libraries/hparam_search_util.py
@staticmethod
def GAMMA_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM
Discount Factor Gamma Range: 0.99 (most common), 0.8 to 0.9997
Discount Factor Gamma also known as: Discount (gamma) (PPO Paper), gamma (RLlib), gamma (ppo2 baselines),
gamma (ppo baselines), gamma (Unity ML), discount (TensorForce)
"""
return ParametersPPO.GAMMA_DIST
KL_COEFF_RANGE(spec)
staticmethod
¤
Sets the default search space for HPARAM The KL penalty implementation (third line in the above picture) is available in RLlib’s PPO implementation. The parameters kl_coeff (initial coefficient for KL divergence) and kl_target can be used for the KL implementation. KL Target Range: 0.003 to 0.03 KL Initialization Range: 0.3 to 1 --- KL_COEFF IN RLLIB
Source code in corl/libraries/hparam_search_util.py
@staticmethod
def KL_COEFF_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM
The KL penalty implementation (third line in the above picture) is available in RLlib’s PPO
implementation. The parameters kl_coeff (initial coefficient for KL divergence) and kl_target
can be used for the KL implementation.
KL Target Range: 0.003 to 0.03
KL Initialization Range: 0.3 to 1 --- KL_COEFF IN RLLIB
"""
return ParametersPPO.KL_COEFF_DIST
KL_TARGET_RANGE(spec)
staticmethod
¤
Sets the default search space for HPARAM https://medium.com/aureliantactics/ppo-hyperparameters-and-ranges-6fc2d29bccbe The KL penalty implementation (third line in the above picture) is available in RLlib’s PPO implementation. The parameters kl_coeff (initial coefficient for KL divergence) and kl_target can be used for the KL implementation. KL Target Range: 0.003 to 0.03 KL Initialization Range: 0.3 to 1 --- KL_COEFF IN RLLIB
Source code in corl/libraries/hparam_search_util.py
@staticmethod
def KL_TARGET_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM
https://medium.com/aureliantactics/ppo-hyperparameters-and-ranges-6fc2d29bccbe
The KL penalty implementation (third line in the above picture) is available in RLlib’s PPO
implementation. The parameters kl_coeff (initial coefficient for KL divergence) and kl_target
can be used for the KL implementation.
KL Target Range: 0.003 to 0.03
KL Initialization Range: 0.3 to 1 --- KL_COEFF IN RLLIB
"""
return ParametersPPO.KL_COEFF_DIST
LAMBDA_RANGE(spec)
staticmethod
¤
Sets the default search space for HPARAM GAE Parameter Lambda Range: 0.9 to 1 GAE Parameter Lambda also known as: GAE Parameter (lambda) (PPO Paper), lambda (RLlib), lambda (ppo2 baselines), lambda (ppo baselines), lambda (Unity ML), gae_lambda (TensorForce)
Source code in corl/libraries/hparam_search_util.py
@staticmethod
def LAMBDA_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM
GAE Parameter Lambda Range: 0.9 to 1
GAE Parameter Lambda also known as: GAE Parameter (lambda) (PPO Paper), lambda (RLlib),
lambda (ppo2 baselines), lambda (ppo baselines), lambda (Unity ML), gae_lambda (TensorForce)
"""
return ParametersPPO.LAMBDA_DIST
LR_RANGE(spec)
staticmethod
¤
Sets the default search space for HPARAM Learning Rate Range: 0.003 to 5e-6 Learning Rate also known as: Adam stepsize (PPO Paper), sgd_stepsize (RLlib), lr (ppo2 baselines), (ppo baselines: unclear), learning_rate (Unity ML), learning_rate (TensorForce)
Source code in corl/libraries/hparam_search_util.py
@staticmethod
def LR_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM
Learning Rate Range: 0.003 to 5e-6
Learning Rate also known as: Adam stepsize (PPO Paper), sgd_stepsize (RLlib), lr (ppo2 baselines),
(ppo baselines: unclear), learning_rate (Unity ML), learning_rate (TensorForce)
"""
return ParametersPPO.LR_DIST
NSGD_RANGE(spec)
staticmethod
¤
Sets the default search space for HPARAM https://medium.com/aureliantactics/ppo-hyperparameters-and-ranges-6fc2d29bccbe Epoch Range: 3 to 30 Epoch also known as: Num. epochs (PPO paper), num_sgd_iter (RLlib), epochs (ppo2 baselines), optim_epochs (ppo baselines), num_epoch (Unity ML), (TensorForce: unclear)
Source code in corl/libraries/hparam_search_util.py
@staticmethod
def NSGD_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM
https://medium.com/aureliantactics/ppo-hyperparameters-and-ranges-6fc2d29bccbe
Epoch Range: 3 to 30
Epoch also known as: Num. epochs (PPO paper), num_sgd_iter (RLlib), epochs (ppo2 baselines),
optim_epochs (ppo baselines), num_epoch (Unity ML), (TensorForce: unclear)
"""
return ParametersPPO.NSGD_DIST
SGD_MINIBATCH_SIZE_RANGE(spec)
staticmethod
¤
Sets the default search space for HPARAM
Source code in corl/libraries/hparam_search_util.py
@staticmethod
def SGD_MINIBATCH_SIZE_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM"""
return ParametersPPO.SGD_MINIBATCH_SIZE_DIST
TRAIN_BATCH_SIZE_RANGE(spec)
staticmethod
¤
Sets the default search space for HPARAM
Source code in corl/libraries/hparam_search_util.py
@staticmethod
def TRAIN_BATCH_SIZE_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM"""
return ParametersPPO.TRAIN_BATCH_SIZE_DIST
VF_LOSS_COEFF_RANGE(spec)
staticmethod
¤
Sets the default search space for HPARAM Value Function Coefficient Range: 0.5, 1 Value Function Coefficient also known as: VF coeff. (PPO Paper), vf_loss_coef (RLlib), vf_coef (ppo2 baselines), (ppo baselines: unclear), (Unity ML: unclear), (TensorForce: unclear)
Source code in corl/libraries/hparam_search_util.py
@staticmethod
def VF_LOSS_COEFF_RANGE(spec): # pylint: disable=W0613
"""Sets the default search space for HPARAM
Value Function Coefficient Range: 0.5, 1
Value Function Coefficient also known as: VF coeff. (PPO Paper), vf_loss_coef (RLlib),
vf_coef (ppo2 baselines), (ppo baselines: unclear), (Unity ML: unclear), (TensorForce: unclear)
"""
return ParametersPPO.VF_LOSS_COEFF_DIST
pbt_ppo_explore(config)
staticmethod
¤
The following function links to the companion function above. Sets the clipping needed by PBT https://medium.com/aureliantactics/ppo-hyperparameters-and-ranges-6fc2d29bccbe
Returns:
Type | Description |
---|---|
dict |
dict -- clipped config |
Source code in corl/libraries/hparam_search_util.py
@staticmethod
def pbt_ppo_explore(config: dict) -> dict:
"""The following function links to the companion function above. Sets the clipping needed by PBT
https://medium.com/aureliantactics/ppo-hyperparameters-and-ranges-6fc2d29bccbe
Arguments:
config {dict} -- input config
Returns:
dict -- clipped config
"""
def clip_parameter(config, parameter, parameter_max, parameter_min):
if config[parameter] > parameter_max:
config[parameter] = parameter_max
elif config[parameter] < parameter_min:
config[parameter] = parameter_min
clip_parameter(config, "lambda", ParametersPPO.LAMBDA_MAX, ParametersPPO.LAMBDA_MIN)
clip_parameter(config, "vf_loss_coeff", ParametersPPO.VF_LOSS_COEFF_MAX, ParametersPPO.VF_LOSS_COEFF_MIN)
clip_parameter(config, "entropy_coeff", ParametersPPO.ENTROPY_COEFF_MAX, ParametersPPO.ENTROPY_COEFF_MIN)
clip_parameter(config, "gamma", ParametersPPO.GAMMA_MAX, ParametersPPO.GAMMA_MIN)
clip_parameter(config, "clip_param", ParametersPPO.CLIP_PARAM_MAX, ParametersPPO.CLIP_PARAM_MIN)
clip_parameter(config, "lr", ParametersPPO.LR_MIN, ParametersPPO.LR_MAX)
clip_parameter(config, "kl_coeff", ParametersPPO.KL_COEFF_MIN, ParametersPPO.KL_COEFF_MAX)
clip_parameter(config, "kl_target", ParametersPPO.KL_TARGET_MIN, ParametersPPO.KL_TARGET_MAX)
sgd_minibatch_size_str = "sgd_minibatch_size"
train_batch_size_str = "train_batch_size"
num_sgd_iter_str = "num_sgd_iter"
clip_parameter(config, num_sgd_iter_str, ParametersPPO.NSGD_MAX, ParametersPPO.NSGD_MIN)
config[num_sgd_iter_str] = int(config[num_sgd_iter_str])
clip_parameter(config, sgd_minibatch_size_str, ParametersPPO.SGD_MINIBATCH_SIZE_MAX, ParametersPPO.SGD_MINIBATCH_SIZE_MIN)
config[sgd_minibatch_size_str] = int(config[sgd_minibatch_size_str])
clip_parameter(config, train_batch_size_str, ParametersPPO.TRAIN_BATCH_SIZE_MAX, ParametersPPO.TRAIN_BATCH_SIZE_MIN)
if config[train_batch_size_str] < config[sgd_minibatch_size_str] * 2:
config[train_batch_size_str] = config[sgd_minibatch_size_str] * 2
config[train_batch_size_str] = int(config[train_batch_size_str])
return config
ppo_hyperparameters()
staticmethod
¤
PPO hyper parameters for hparam search https://medium.com/aureliantactics/ppo-hyperparameters-and-ranges-6fc2d29bccbe
Returns:
Type | Description |
---|---|
dict |
dict -- model configuration |
Source code in corl/libraries/hparam_search_util.py
@staticmethod
def ppo_hyperparameters() -> dict:
"""PPO hyper parameters for hparam search
https://medium.com/aureliantactics/ppo-hyperparameters-and-ranges-6fc2d29bccbe
Returns:
dict -- model configuration
"""
ppo_hparams = {}
ppo_hparams["lambda"] = tune.sample_from(ParametersPPO.LAMBDA_RANGE)
ppo_hparams["vf_loss_coeff"] = tune.sample_from(ParametersPPO.VF_LOSS_COEFF_RANGE)
ppo_hparams["entropy_coeff"] = tune.sample_from(ParametersPPO.ENTROPY_COEFF_RANGE)
ppo_hparams["clip_param"] = tune.sample_from(ParametersPPO.CLIP_PARAM_RANGE)
ppo_hparams["gamma"] = tune.sample_from(ParametersPPO.GAMMA_RANGE)
ppo_hparams["lr"] = tune.sample_from(ParametersPPO.LR_RANGE)
ppo_hparams["num_sgd_iter"] = tune.sample_from(ParametersPPO.NSGD_RANGE)
ppo_hparams["sgd_minibatch_size"] = tune.sample_from(ParametersPPO.SGD_MINIBATCH_SIZE_RANGE)
ppo_hparams["train_batch_size"] = tune.sample_from(ParametersPPO.TRAIN_BATCH_SIZE_RANGE)
ppo_hparams["kl_coeff"] = tune.sample_from(ParametersPPO.KL_COEFF_RANGE)
ppo_hparams["kl_target"] = tune.sample_from(ParametersPPO.KL_TARGET_RANGE)
return ppo_hparams
sample_ppo_hyperparameters()
staticmethod
¤
PPO hyper parameters for hparam search https://medium.com/aureliantactics/ppo-hyperparameters-and-ranges-6fc2d29bccbe
Returns:
Type | Description |
---|---|
dict |
dict -- model configuration |
Source code in corl/libraries/hparam_search_util.py
@staticmethod
def sample_ppo_hyperparameters() -> dict:
"""PPO hyper parameters for hparam search
https://medium.com/aureliantactics/ppo-hyperparameters-and-ranges-6fc2d29bccbe
Returns:
dict -- model configuration
"""
ppo_hparams = ParametersPPO.ppo_hyperparameters()
for k, v in ppo_hparams.items():
ppo_hparams[k] = v.sample()
return ppo_hparams