Commit d12d5a56 authored by lli's avatar lli
Browse files

update

parent 31cca5c3
import torch
import torch.nn as nn
USE_CUDA = torch.cuda.is_available()
device = torch.device('cuda' if USE_CUDA else 'cpu')
......@@ -34,7 +33,8 @@ class PolicyNetwork(nn.Module):
if self.lr_schedule:
self.schedule_step = schedule_step
self.schedule_rate = schedule_rate
self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=self.schedule_step, gamma=self.schedule_rate)
self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=self.schedule_step,
gamma=self.schedule_rate)
def predict(self, state):
# Compute the action probabilities of state s using the learning rate
......@@ -76,6 +76,7 @@ class ValueNetwork(nn.Module):
'''
Use a regression neural network to approximate state-values
'''
def __init__(self, n_state, n_hidden, lr=0.01):
super(ValueNetwork, self).__init__()
self.criterion = torch.nn.SmoothL1Loss()
......@@ -101,4 +102,4 @@ class ValueNetwork(nn.Module):
def predict(self, state):
with torch.no_grad():
return self.model(torch.tensor(state, dtype=torch.float32, device=device))
\ No newline at end of file
return self.model(torch.tensor(state, dtype=torch.float32, device=device))
import os
from itertools import product
# Define different learning rates for learning rate tuning
parameters = dict(
policy_lr = [0.01, 0.001, 0.0001, 0.00001],
value_lr = [0.01, 0.001, 0.0001, 0.00001]
policy_lr=[0.01, 0.001, 0.0001, 0.00001],
value_lr=[0.01, 0.001, 0.0001, 0.00001]
)
param_values = [v for v in parameters.values()]
......@@ -12,11 +13,10 @@ print(param_values)
for policy_lr, value_lr in product(*param_values):
print(policy_lr, value_lr)
# Generate different learning rate combinations
for run_id, (policy_lr, value_lr) in enumerate(product(*param_values)):
print('Run id: ', run_id + 1)
print('Policy learning rate: ', policy_lr)
print('Value learning rate: ', value_lr)
os.system(f"python train_reinforce.py --save_path {run_id + 1} --n_hidden 128 --lr_policy {policy_lr} --lr_value {value_lr} --n_episode 200 ")
os.system(
f"python train_reinforce.py --save_path {run_id + 1} --n_hidden 128 --lr_policy {policy_lr} --lr_value {value_lr} --n_episode 200 ")
......@@ -70,6 +70,7 @@ print(dqn)
print()
print(f'Total parameters: {sum(p.numel() for p in dqn.parameters())}')
print(f'Trainable parameters: {sum(p.numel() for p in dqn.parameters() if p.requires_grad)}')
print(f'DQN is on GPU: {next(dqn.parameters()).is_cuda}')
print()
seed = args.seed
......
......@@ -73,6 +73,7 @@ print(policy_net)
print()
print(f'Total parameters: {sum(p.numel() for p in policy_net.parameters())}')
print(f'Trainable parameters: {sum(p.numel() for p in policy_net.parameters() if p.requires_grad)}')
print(f'Policy net is on GPU: {next(policy_net.parameters()).is_cuda}')
print()
print('######################Value net architecture#####################')
......@@ -80,6 +81,7 @@ print(value_net)
print()
print(f'Total parameters: {sum(p.numel() for p in value_net.parameters())}')
print(f'Trainable parameters: {sum(p.numel() for p in value_net.parameters() if p.requires_grad)}')
print(f'Value net is on GPU: {next(value_net.parameters()).is_cuda}')
print()
seed = args.seed
......
......@@ -19,7 +19,6 @@ def sliding_window(data, N):
:param N: The length of the sliding window.
:return: A numpy array, length M, containing smoothed averaging.
"""
idx = 0
window = np.zeros(N)
smoothed = np.zeros(len(data))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment