Commit bb7d426f authored by lli's avatar lli
Browse files

corrected output neurons in reinforce

parent bcd483ec
......@@ -10,7 +10,6 @@ class PolicyNetwork(nn.Module):
def __init__(self, n_state, n_hidden, n_action, lr):
'''
Initialize the policy neural network:
Use one hidden layer
Input: a state, followed by a hidden layer
Output: the probability of taking possible individual actions
use softmax function as the activation for the output layer
......@@ -22,7 +21,7 @@ class PolicyNetwork(nn.Module):
nn.ReLU(),
nn.Linear(n_hidden, n_hidden),
nn.ReLU(),
nn.Linear(n_hidden, n_hidden),
nn.Linear(n_hidden, n_action),
nn.Softmax(dim=-1)
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment