Commit 37e5aee0 authored by lli's avatar lli
Browse files

update train_dqn

parent 492aa8d6
......@@ -106,7 +106,7 @@ for episode in range(n_episode):
if args.policy == 'epsilon_greedy':
action = dqn.eps_greedy_policy(state, n_action, epsilon)
else:
action = dqn.boltzmann_policy(state, n_action)
action = dqn.boltzmann_policy(state, n_action, 0.5)
next_state, reward, is_done, info = env.step(action)
total_reward_episode[episode] += reward
......@@ -121,9 +121,14 @@ for episode in range(n_episode):
losses.append(loss)
state = next_state
print(
f'episode: {episode}, total reward: {total_reward_episode[episode]}, epsilon: {epsilon}, loss: {loss}, '
f'num_no_capacity: {num_no_capacity[episode]}, accepted orders: {accepted_orders[episode]}')
if args.policy == 'epsilon_greedy':
print(
f'episode: {episode}, total reward: {total_reward_episode[episode]}, epsilon: {epsilon}, loss: {loss}, '
f'num_no_capacity: {num_no_capacity[episode]}, accepted orders: {accepted_orders[episode]}')
else:
print(
f'episode: {episode}, total reward: {total_reward_episode[episode]}, loss: {loss}, '
f'num_no_capacity: {num_no_capacity[episode]}, accepted orders: {accepted_orders[episode]}')
print(f"Training time for {n_episode} episodes: {timer() - start_time}")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment