Commit 10901380 authored by lli's avatar lli
Browse files

update

parent 0bbf1707
......@@ -23,6 +23,8 @@ class PolicyNetwork(nn.Module):
nn.ReLU(),
nn.Linear(n_hidden, n_hidden),
nn.ReLU(),
nn.Linear(n_hidden, n_hidden),
nn.ReLU(),
nn.Linear(n_hidden, n_action),
nn.Softmax(dim=-1),
)
......@@ -68,12 +70,14 @@ class ValueNetwork(nn.Module):
'''
def __init__(self, n_state, n_hidden, lr=0.01):
super(ValueNetwork, self).__init__()
self.criterion = torch.nn.MSELoss()
self.criterion = torch.nn.SmoothL1Loss()
self.model = torch.nn.Sequential(
nn.Linear(n_state, n_hidden),
nn.ReLU(),
nn.Linear(n_hidden, n_hidden),
nn.ReLU(),
nn.Linear(n_hidden, n_hidden),
nn.ReLU(),
nn.Linear(n_hidden, 1)
)
self.optimizer = torch.optim.Adam(self.model.parameters(), lr)
......
......@@ -4,7 +4,6 @@ import shutil
import sys
import math
import pickle
import torch
import numpy as np
......@@ -12,6 +11,32 @@ def to_np(var):
return var.detach().cpu().numpy()
def sliding_window(data, N):
"""
For each index, k, in data we average over the window from k-N-1 to k. The beginning handles incomplete buffers,
that is it only takes the average over what has actually been seen.
:param data: A numpy array, length M
:param N: The length of the sliding window.
:return: A numpy array, length M, containing smoothed averaging.
"""
idx = 0
window = np.zeros(N)
smoothed = np.zeros(len(data))
for i in range(len(data)):
window[idx] = data[i]
idx += 1
smoothed[i] = window[0:idx].mean()
if idx == N:
window[0:-1] = window[1:]
idx = N - 1
return smoothed
def create_folder(folder_path):
"""Create a folder if it does not exist.
"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment