Commit bcd483ec authored by lli's avatar lli
Browse files

modified wendtris eva environment canceled summary writer

parent b12712ca
......@@ -171,11 +171,12 @@ class Wendtris(gym.Env):
class Wendtris_Eva(gym.Env):
# Evaluation environment
def __init__(self, orders, rewards, num_spacewidth=6, num_capacity=6):
def __init__(self, orders, rewards, num_spacewidth=6, num_capacity=6, PENALTY_FACTOR=0):
self.orders = orders.astype('float64')
self.rewards = rewards
self.num_spacewidth = num_spacewidth
self.num_capacity = num_capacity
self.penalty_factor = PENALTY_FACTOR
self.action_space = spaces.Discrete(2)
orders_offset_row = np.zeros(self.orders.shape[1], dtype=np.float64)
rewards_offset_row = np.zeros(self.rewards.shape[1])
......@@ -211,9 +212,9 @@ class Wendtris_Eva(gym.Env):
# If the agent accepts the order#
no_capacity = ((self.tmp[0] - self.tmp[1]) < 0).any()
if no_capacity:
# If there is no capacity, agent receives 0 reward
# If there is no capacity, agent receives negative current reward * penalty_factor
# Order position and remaining orders will not change
current_reward = 0
current_reward = - self.rewards[self.reward_pos][1] * self.penalty_factor
self.order_pos += 1
self.reward_pos += 1
self.num_remain_orders -= 1
......
......@@ -32,8 +32,8 @@ USE_CUDA = torch.cuda.is_available()
device = torch.device('cuda' if USE_CUDA else 'cpu')
# Define tensorboard writer path
comment = f' policy_lr={args.lr_policy} value_lr={args.lr_value} n_episode={args.n_episode}'
writer = SummaryWriter('runs/reinforce', comment=comment, filename_suffix=str(args.lr_policy) + str(args.n_episode))
# comment = f' policy_lr={args.lr_policy} value_lr={args.lr_value} n_episode={args.n_episode}'
# writer = SummaryWriter('runs/reinforce', comment=comment, filename_suffix=str(args.lr_policy) + str(args.n_episode))
OUT_PATH = os.path.join('results/reinforce', args.save_path)
LOG_FILE = os.path.join(OUT_PATH, 'log.txt')
......@@ -164,11 +164,11 @@ for episode in range(n_episode):
p_running_losses.append(p_running_loss)
# Update tensor board
writer.add_scalar('Policy Loss', p_running_loss, episode)
writer.add_scalar('Value loss', v_running_loss, episode)
for name, param in policy_net.named_parameters():
writer.add_histogram(name + '_grad', param.grad, episode)
writer.add_histogram(name + '_data', param, episode)
# writer.add_scalar('Policy Loss', p_running_loss, episode)
# writer.add_scalar('Value loss', v_running_loss, episode)
# for name, param in policy_net.named_parameters():
# writer.add_histogram(name + '_grad', param.grad, episode)
# writer.add_histogram(name + '_data', param, episode)
print(f'Episode: {episode}, total reward: {total_reward_episode[episode]}, number of penalties: {num_no_capacity[episode]}, accepted orders: {accepted_orders[episode]}, policy loss: {p_loss.item()}, value loss: {v_loss.item()}')
......@@ -176,7 +176,7 @@ for episode in range(n_episode):
state = next_state
writer.close()
#writer.close()
# save the model parameters
torch.save(policy_net.state_dict(), os.path.join(OUT_PATH, 'policy_{}.pk1'.format(n_episode)))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment