RL

Byron, 12 March 2020

Hyperparameter

First group

GAMMA = 0.99
LEARNING_RATE = 0.001
ENTROPY_BETA = 0.01
BATCH_SIZE = 128
NUM_ENVS = 50
REWARD_STEPS = 4
CLIP_GRAD = 0.1

second group

GAMMA = 0.99
LEARNING_RATE = 0.005
ENTROPY_BETA = 0.01
BATCH_SIZE = 256
NUM_ENVS = 100
REWARD_STEPS = 4
CLIP_GRAD = 0.1

third group

GAMMA = 0.99
LEARNING_RATE = 0.001
ENTROPY_BETA = 0.01
BATCH_SIZE = 128
NUM_ENVS = 100
REWARD_STEPS = 4
CLIP_GRAD = 0.1

forth group

GAMMA = 0.99
LEARNING_RATE = 0.001
ENTROPY_BETA = 0.01
BATCH_SIZE = 128
NUM_ENVS = 100
REWARD_STEPS = 4
CLIP_GRAD = 0.1