# Update nn based on discounted rewards and log_probs
policy_net.update(advantages,log_probs)
print('Episode: {}, total reward: {}, number of penalties: {}, accepted orders: {}, learning rate: {}'.format(episode,
ifpolicy_lr_schedule:
print('Episode: {}, total reward: {}, number of penalties: {}, accepted orders: {}, learning rate: {}'.format(episode,
total_reward_episode[
episode],
num_no_capacity[
episode],
accepted_orders[
episode],lr[episode]))
else:
print(f'Episode: {episode}, total reward: {total_reward_episode[episode]}, number of penalties: {num_no_capacity[episode]}, accepted orders: {accepted_orders[episode]}')