......@@ -156,7 +156,7 @@ plt.legend(handles=[rewards, avg_rewards], loc='best')
plt.savefig(os.path.join(OUT_PATH, 'training_total_rewards.png'), dpi=1200, transparent=True, bbox_inches='tight')
# Plot epsilon
# Plot epsilon and tau
if args.policy == 'epsilon_greedy':
plt.title('Epsilon over time')
......@@ -227,10 +227,14 @@ for ep in range(test_orders.shape[0]):
# Save the variables for evaluation
EVA_FILE = os.path.join(OUT_PATH, 'evaluation')
save_list(total_reward_episode, EVA_FILE, 'total_reward_episode_train')
save_list(total_reward_episode_eva, EVA_FILE, 'total_reward_episode_eva')
save_list(num_no_capacity_eva, EVA_FILE, 'num_no_capacity_eva')
save_list(accepted_orders_eva, EVA_FILE, 'accepted_orders_eva')
save_list(epsilon_value, EVA_FILE, 'epsilon_value')
if args.policy == 'epsilon_greedy':
save_list(epsilon_value, EVA_FILE, 'epsilon_value')
save_list(tau_value, EVA_FILE, 'tau_value')
# Load optimal solution
optimal_rewards = load('dp/results.npy')
