diff --git a/PPO.py b/PPO.py index 9602130..fe60743 100644 --- a/PPO.py +++ b/PPO.py @@ -241,7 +241,7 @@ class Arguments: self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads) '''Arguments for training''' - self.num_episode = 2000 # to control the train episodes for PPO + self.num_episode = 1000 # to control the train episodes for PPO self.gamma = 0.995 # discount factor of future rewards self.learning_rate = 2 ** -14 # 2e-4 self.soft_update_tau = 2 ** -8 # 2 ** -8 ~= 5e-3 diff --git a/module.py b/module.py index 4b4bde4..3cd7a28 100644 --- a/module.py +++ b/module.py @@ -126,7 +126,6 @@ class Wind: self.power_coefficient * self.generator_efficiency) / 1e3 else: self.current_power = 0 - return self.current_power def gen_cost(self, current_power): diff --git a/tools.py b/tools.py index 6b2da0a..6edd607 100644 --- a/tools.py +++ b/tools.py @@ -129,7 +129,7 @@ class Arguments: self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads) '''Arguments for training''' - self.num_episode = 2000 + self.num_episode = 1000 self.gamma = 0.995 # discount factor of future rewards # self.reward_scale = 1 # an approximate target reward usually be closed to 256 self.learning_rate = 2 ** -14 # 2 ** -14 ~= 6e-5