Commit f2f41b14 authored by lli's avatar lli
Browse files

change replay batch size

parent 168e0fcb
......@@ -20,4 +20,4 @@ epsilon_decay = True # Using epsilon decay
tau_decay = True # Using boltzmann exploration, decay temperature
replay_buffer = deque(maxlen=10000) # Size of replay buffer
replay_batch_size = 64 # Size of replay batch
#replay_batch_size = 64 # Size of replay batch
......@@ -25,6 +25,7 @@ parser.add_argument('--lr', type=float, default=0.01, help='learning rate (defau
parser.add_argument('--seed', type=int, default=None, help='random seed')
parser.add_argument('--policy', type=str, choices=('epsilon_greedy', 'boltzmann'))
parser.add_argument('--n_episode', type=int, required=True, help='number of training episodes')
parser.add_argument('--batch_size', type=int, default=64, help='replay batch size')
args = parser.parse_args()
# Check if using cuda and define device
......@@ -56,7 +57,7 @@ if args.policy == 'epsilon_greedy':
if epsilon_decay:
print(f'Training using epsilon decay: {epsilon_decay}')
print(f'Size of experience replay buffer: {replay_buffer}')
print(f'Size of experience replay batch: {replay_batch_size}')
print(f'Size of experience replay batch: {args.batch_size}')
# Initialize DQN network
......@@ -123,7 +124,7 @@ for episode in range(n_episode):
accepted_orders.append(info['Accepted orders'])
loss = dqn.replay(replay_buffer, replay_batch_size, gamma)
loss = dqn.replay(replay_buffer, args.batch_size, gamma)
state = next_state
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment