Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
lli
YM-Seminar
Commits
d12d5a56
Commit
d12d5a56
authored
Apr 25, 2021
by
lli
Browse files
update
parent
31cca5c3
Changes
5
Hide whitespace changes
Inline
Side-by-side
algorithms/reinforce.py
View file @
d12d5a56
import
torch
import
torch.nn
as
nn
USE_CUDA
=
torch
.
cuda
.
is_available
()
device
=
torch
.
device
(
'cuda'
if
USE_CUDA
else
'cpu'
)
...
...
@@ -34,7 +33,8 @@ class PolicyNetwork(nn.Module):
if
self
.
lr_schedule
:
self
.
schedule_step
=
schedule_step
self
.
schedule_rate
=
schedule_rate
self
.
scheduler
=
torch
.
optim
.
lr_scheduler
.
StepLR
(
self
.
optimizer
,
step_size
=
self
.
schedule_step
,
gamma
=
self
.
schedule_rate
)
self
.
scheduler
=
torch
.
optim
.
lr_scheduler
.
StepLR
(
self
.
optimizer
,
step_size
=
self
.
schedule_step
,
gamma
=
self
.
schedule_rate
)
def
predict
(
self
,
state
):
# Compute the action probabilities of state s using the learning rate
...
...
@@ -76,6 +76,7 @@ class ValueNetwork(nn.Module):
'''
Use a regression neural network to approximate state-values
'''
def
__init__
(
self
,
n_state
,
n_hidden
,
lr
=
0.01
):
super
(
ValueNetwork
,
self
).
__init__
()
self
.
criterion
=
torch
.
nn
.
SmoothL1Loss
()
...
...
@@ -101,4 +102,4 @@ class ValueNetwork(nn.Module):
def
predict
(
self
,
state
):
with
torch
.
no_grad
():
return
self
.
model
(
torch
.
tensor
(
state
,
dtype
=
torch
.
float32
,
device
=
device
))
\ No newline at end of file
return
self
.
model
(
torch
.
tensor
(
state
,
dtype
=
torch
.
float32
,
device
=
device
))
reinforce_tune_lr.py
View file @
d12d5a56
import
os
from
itertools
import
product
# Define different learning rates for learning rate tuning
parameters
=
dict
(
policy_lr
=
[
0.01
,
0.001
,
0.0001
,
0.00001
],
value_lr
=
[
0.01
,
0.001
,
0.0001
,
0.00001
]
policy_lr
=
[
0.01
,
0.001
,
0.0001
,
0.00001
],
value_lr
=
[
0.01
,
0.001
,
0.0001
,
0.00001
]
)
param_values
=
[
v
for
v
in
parameters
.
values
()]
...
...
@@ -12,11 +13,10 @@ print(param_values)
for
policy_lr
,
value_lr
in
product
(
*
param_values
):
print
(
policy_lr
,
value_lr
)
# Generate different learning rate combinations
for
run_id
,
(
policy_lr
,
value_lr
)
in
enumerate
(
product
(
*
param_values
)):
print
(
'Run id: '
,
run_id
+
1
)
print
(
'Policy learning rate: '
,
policy_lr
)
print
(
'Value learning rate: '
,
value_lr
)
os
.
system
(
f
"python train_reinforce.py --save_path
{
run_id
+
1
}
--n_hidden 128 --lr_policy
{
policy_lr
}
--lr_value
{
value_lr
}
--n_episode 200 "
)
os
.
system
(
f
"python train_reinforce.py --save_path
{
run_id
+
1
}
--n_hidden 128 --lr_policy
{
policy_lr
}
--lr_value
{
value_lr
}
--n_episode 200 "
)
train_dqn.py
View file @
d12d5a56
...
...
@@ -70,6 +70,7 @@ print(dqn)
print
()
print
(
f
'Total parameters:
{
sum
(
p
.
numel
()
for
p
in
dqn
.
parameters
())
}
'
)
print
(
f
'Trainable parameters:
{
sum
(
p
.
numel
()
for
p
in
dqn
.
parameters
()
if
p
.
requires_grad
)
}
'
)
print
(
f
'DQN is on GPU:
{
next
(
dqn
.
parameters
()).
is_cuda
}
'
)
print
()
seed
=
args
.
seed
...
...
train_reinforce.py
View file @
d12d5a56
...
...
@@ -73,6 +73,7 @@ print(policy_net)
print
()
print
(
f
'Total parameters:
{
sum
(
p
.
numel
()
for
p
in
policy_net
.
parameters
())
}
'
)
print
(
f
'Trainable parameters:
{
sum
(
p
.
numel
()
for
p
in
policy_net
.
parameters
()
if
p
.
requires_grad
)
}
'
)
print
(
f
'Policy net is on GPU:
{
next
(
policy_net
.
parameters
()).
is_cuda
}
'
)
print
()
print
(
'######################Value net architecture#####################'
)
...
...
@@ -80,6 +81,7 @@ print(value_net)
print
()
print
(
f
'Total parameters:
{
sum
(
p
.
numel
()
for
p
in
value_net
.
parameters
())
}
'
)
print
(
f
'Trainable parameters:
{
sum
(
p
.
numel
()
for
p
in
value_net
.
parameters
()
if
p
.
requires_grad
)
}
'
)
print
(
f
'Value net is on GPU:
{
next
(
value_net
.
parameters
()).
is_cuda
}
'
)
print
()
seed
=
args
.
seed
...
...
utils/utils.py
View file @
d12d5a56
...
...
@@ -19,7 +19,6 @@ def sliding_window(data, N):
:param N: The length of the sliding window.
:return: A numpy array, length M, containing smoothed averaging.
"""
idx
=
0
window
=
np
.
zeros
(
N
)
smoothed
=
np
.
zeros
(
len
(
data
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment