print('Episode: {}, total reward: {}, number of penalties: {}, accepted orders: {}, learning rate: {}'.format(episode,
total_reward_episode[
episode],
num_no_capacity[
episode],
accepted_orders[
episode],lr[episode]))
print(
f'Episode: {episode}, total reward: {total_reward_episode[episode]}, number of penalties: {num_no_capacity[episode]}, accepted orders: {accepted_orders[episode]}, policy loss: {p_loss.item()}, value loss: {v_loss.item()}, policy learning rate: {lr[episode]}')
else:
print(f'Episode: {episode}, total reward: {total_reward_episode[episode]}, number of penalties: {num_no_capacity[episode]}, accepted orders: {accepted_orders[episode]}')
f'Episode: {episode}, total reward: {total_reward_episode[episode]}, number of penalties: {num_no_capacity[episode]}, accepted orders: {accepted_orders[episode]}, policy loss: {p_loss.item()}, value loss: {v_loss.item()}')