diff --git a/PPO.py b/PPO.py index fe60743..f644e25 100644 --- a/PPO.py +++ b/PPO.py @@ -106,14 +106,14 @@ class AgentPPO: self.get_reward_sum = None # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw self.trajectory_list = None - def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=False, gpu_id=0): + def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=False, gpu_id=0, layer_norm=False): self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu") self.trajectory_list = list() # choose whether to use gae or not self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw - self.cri = self.ClassCri(net_dim, state_dim, action_dim).to(self.device) - self.act = self.ClassAct(net_dim, state_dim, action_dim).to(self.device) if self.ClassAct else self.cri + self.cri = self.ClassCri(net_dim, state_dim, action_dim, layer_norm).to(self.device) + self.act = self.ClassAct(net_dim, state_dim, action_dim, layer_norm).to(self.device) if self.ClassAct else self.cri self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act