Commit b9ff612e authored by lli's avatar lli
Browse files

change tau value for boltzmann

parent d289fe1f
...@@ -55,7 +55,7 @@ class DQN(nn.Module): ...@@ -55,7 +55,7 @@ class DQN(nn.Module):
q_values = self.predict(state) q_values = self.predict(state)
return torch.argmax(q_values).item() return torch.argmax(q_values).item()
def boltzmann_policy(self, state, n_action, tau=1, clip=(-500., 500.)): def boltzmann_policy(self, state, n_action, tau=0.5, clip=(-500., 500.)):
""" """
Boltzmann policy builds a probability law on q values and returns an Boltzmann policy builds a probability law on q values and returns an
action selected randomly according to this law action selected randomly according to this law
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment