Commit d289fe1f authored by lli's avatar lli
Browse files

updated epsilon decay

parent d72dd903
......@@ -103,10 +103,7 @@ for episode in range(n_episode):
if args.policy == 'epsilon_greedy':
if epsilon_decay:
epsilon = epsilon_end + (epsilon - epsilon_end) * math.exp(-1. * steps_done / decay)
epsilon_value.append(epsilon)
steps_done += 1
else:
epsilon = stretched_exponential_decay(episode, args.n_episode, 0.1, 0.1, 0.1)
epsilon_value.append(epsilon)
while not is_done:
......@@ -218,6 +215,7 @@ EVA_FILE = os.path.join(OUT_PATH, 'evaluation')
save_list(total_reward_episode_eva, EVA_FILE, 'total_reward_episode_eva')
save_list(num_no_capacity_eva, EVA_FILE, 'num_no_capacity_eva')
save_list(accepted_orders_eva, EVA_FILE, 'accepted_orders_eva')
save_list(epsilon_value, EVA_FILE, 'epsilon_value')
# Load optimal solution
optimal_rewards = load('dp/results.npy')
......
......@@ -2,6 +2,7 @@ import errno
import os
import shutil
import sys
import math
import pickle
import numpy as np
......@@ -70,6 +71,21 @@ def modify_orders(orders):
return np.asarray(result, dtype=int)
def stretched_exponential_decay(episode, n_episode, a=0.2, b=0.1, c=0.1):
"""
Stretched exponential decay for epsilon greedy algorthm
:param time: current episode
:param n_episode: the number of episodes
:param a: decides where the agent spend more time <.5 more exploiting >.5 more exploration
:param b: decides the slope of transition region between exploration and exploitation
:param c: controls the steepness of left and right tail of the graph
"""
standardized_time = (episode - a * n_episode) / (b * n_episode)
cosh = np.cosh(math.exp(-standardized_time))
epsilon = 1.1 - (1 / cosh + (episode * c / n_episode))
return epsilon
class StdOut(object):
"""Redirect stdout to file, and print to console as well.
"""
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment