Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
lli
YM-Seminar
Commits
d289fe1f
Commit
d289fe1f
authored
Apr 20, 2021
by
lli
Browse files
updated epsilon decay
parent
d72dd903
Changes
2
Hide whitespace changes
Inline
Side-by-side
train_dqn.py
View file @
d289fe1f
...
...
@@ -103,10 +103,7 @@ for episode in range(n_episode):
if
args
.
policy
==
'epsilon_greedy'
:
if
epsilon_decay
:
epsilon
=
epsilon_end
+
(
epsilon
-
epsilon_end
)
*
math
.
exp
(
-
1.
*
steps_done
/
decay
)
epsilon_value
.
append
(
epsilon
)
steps_done
+=
1
else
:
epsilon
=
stretched_exponential_decay
(
episode
,
args
.
n_episode
,
0.1
,
0.1
,
0.1
)
epsilon_value
.
append
(
epsilon
)
while
not
is_done
:
...
...
@@ -218,6 +215,7 @@ EVA_FILE = os.path.join(OUT_PATH, 'evaluation')
save_list
(
total_reward_episode_eva
,
EVA_FILE
,
'total_reward_episode_eva'
)
save_list
(
num_no_capacity_eva
,
EVA_FILE
,
'num_no_capacity_eva'
)
save_list
(
accepted_orders_eva
,
EVA_FILE
,
'accepted_orders_eva'
)
save_list
(
epsilon_value
,
EVA_FILE
,
'epsilon_value'
)
# Load optimal solution
optimal_rewards
=
load
(
'dp/results.npy'
)
...
...
utils/utils.py
View file @
d289fe1f
...
...
@@ -2,6 +2,7 @@ import errno
import
os
import
shutil
import
sys
import
math
import
pickle
import
numpy
as
np
...
...
@@ -70,6 +71,21 @@ def modify_orders(orders):
return
np
.
asarray
(
result
,
dtype
=
int
)
def
stretched_exponential_decay
(
episode
,
n_episode
,
a
=
0.2
,
b
=
0.1
,
c
=
0.1
):
"""
Stretched exponential decay for epsilon greedy algorthm
:param time: current episode
:param n_episode: the number of episodes
:param a: decides where the agent spend more time <.5 more exploiting >.5 more exploration
:param b: decides the slope of transition region between exploration and exploitation
:param c: controls the steepness of left and right tail of the graph
"""
standardized_time
=
(
episode
-
a
*
n_episode
)
/
(
b
*
n_episode
)
cosh
=
np
.
cosh
(
math
.
exp
(
-
standardized_time
))
epsilon
=
1.1
-
(
1
/
cosh
+
(
episode
*
c
/
n_episode
))
return
epsilon
class
StdOut
(
object
):
"""Redirect stdout to file, and print to console as well.
"""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment