RL
Byron, 12 March 2020
Hyperparameter
First group
GAMMA = 0.99
LEARNING_RATE = 0.001
ENTROPY_BETA = 0.01
BATCH_SIZE = 128
NUM_ENVS = 50
REWARD_STEPS = 4
CLIP_GRAD = 0.1
second group
GAMMA = 0.99
LEARNING_RATE = 0.005
ENTROPY_BETA = 0.01
BATCH_SIZE = 256
NUM_ENVS = 100
REWARD_STEPS = 4
CLIP_GRAD = 0.1
third group
GAMMA = 0.99
LEARNING_RATE = 0.001
ENTROPY_BETA = 0.01
BATCH_SIZE = 128
NUM_ENVS = 100
REWARD_STEPS = 4
CLIP_GRAD = 0.1
forth group
GAMMA = 0.99
LEARNING_RATE = 0.001
ENTROPY_BETA = 0.01
BATCH_SIZE = 128
NUM_ENVS = 100
REWARD_STEPS = 4
CLIP_GRAD = 0.1