diff --git a/DDPG.py b/DDPG.py index a79258a..0a9b6dd 100644 --- a/DDPG.py +++ b/DDPG.py @@ -48,16 +48,14 @@ if __name__ == '__main__': target_step = args.target_step # how manysteps of one episode should stop repeat_times = args.repeat_times # how many times should update for one batch size data soft_update_tau = args.soft_update_tau - # get the first experience from - agent.state = env.reset() - '''collect data and train and update network''' num_episode = args.num_episode + agent.state = env.reset() + '''collect data, train and update network''' # args.train=False # args.save_network=False # args.test_network=False # args.save_test_data=False # args.compare_with_gurobi=False - if args.train: collect_data = True while collect_data: diff --git a/PPO.py b/PPO.py index 0e02e36..5fa3e2d 100644 --- a/PPO.py +++ b/PPO.py @@ -249,7 +249,7 @@ class Arguments: self.repeat_times = 2 ** 3 # collect target_step, then update network self.target_step = 4096 # repeatedly update network to keep critic's loss small self.max_memo = self.target_step # capacity of replay buffer - self.if_per_or_gae = False # GAE for on-policy sparse reward: Generalized Advantage Estimation. + self.if_gae_or_raw = True # GAE for on-policy sparse reward: Generalized Advantage Estimation. '''Arguments for evaluate''' self.random_seed = 0 # initialize random seed in self.init_before_training() @@ -270,7 +270,7 @@ class Arguments: if if_main: import shutil # remove history according to bool(if_remove) if self.if_remove is None: - self.if_remove = bool(input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y') + self.if_remove = (input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y') elif self.if_remove: shutil.rmtree(self.cwd, ignore_errors=True) print(f"| Remove cwd: {self.cwd}") @@ -280,7 +280,6 @@ class Arguments: torch.manual_seed(self.random_seed) torch.set_num_threads(self.num_threads) torch.set_default_dtype(torch.float32) - os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu) @@ -308,7 +307,6 @@ if __name__ == '__main__': for seed in args.random_seed_list: args.random_seed = seed args.agent = AgentPPO() - agent_name = f'{args.agent.__class__.__name__}' args.agent.cri_target = True args.env = ESSEnv() @@ -317,19 +315,18 @@ if __name__ == '__main__': agent = args.agent env = args.env agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate, - args.if_per_or_gae, layer_norm=True) - + args.if_gae_or_raw, layer_norm=True) cwd = args.cwd gamma = args.gamma batch_size = args.batch_size # how much data should be used to update net target_step = args.target_step # how manysteps of one episode should stop repeat_times = args.repeat_times # how many times should update for one batch size data soft_update_tau = args.soft_update_tau + num_episode = args.num_episode agent.state = env.reset() '''init buffer''' buffer = list() '''init training parameters''' - num_episode = args.num_episode # args.train = False # args.save_network = False # args.test_network = False @@ -388,7 +385,7 @@ if __name__ == '__main__': from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result plot_args = PlotArgs() - plot_args.feature_change = '' + plot_args.feature_change = 'gae' args.cwd = agent_name plot_dir = make_dir(args.cwd, plot_args.feature_change) plot_optimization_result(base_result, plot_dir) diff --git a/PPO_llm.py b/PPO_llm.py index 24d4dbf..a1d4da7 100644 --- a/PPO_llm.py +++ b/PPO_llm.py @@ -249,7 +249,7 @@ class Arguments: self.repeat_times = 2 ** 3 # collect target_step, then update network self.target_step = 4096 # repeatedly update network to keep critic's loss small self.max_memo = self.target_step # capacity of replay buffer - self.if_per_or_gae = False # GAE for on-policy sparse reward: Generalized Advantage Estimation. + self.if_gae_or_raw = True # GAE for on-policy sparse reward: Generalized Advantage Estimation. '''Arguments for evaluate''' self.random_seed = 0 # initialize random seed in self.init_before_training() @@ -317,7 +317,7 @@ if __name__ == '__main__': agent = args.agent env = args.env agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate, - args.if_per_or_gae, layer_norm=True) + args.if_gae_or_raw, layer_norm=True) cwd = args.cwd gamma = args.gamma @@ -389,7 +389,7 @@ if __name__ == '__main__': from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result plot_args = PlotArgs() - plot_args.feature_change = 'llm_5_no_mean' + plot_args.feature_change = 'llm' args.cwd = agent_name plot_dir = make_dir(args.cwd, plot_args.feature_change) plot_optimization_result(base_result, plot_dir) diff --git a/SAC.py b/SAC.py index 950953e..bfe8f67 100644 --- a/SAC.py +++ b/SAC.py @@ -47,20 +47,15 @@ if __name__ == '__main__': target_step = args.target_step # how manysteps of one episode should stop repeat_times = args.repeat_times # how many times should update for one batch size data soft_update_tau = args.soft_update_tau - agent.state = env.reset() - '''collect data and train and update network''' num_episode = args.num_episode '''here record real unbalance''' - - ## - args.train = False - args.save_network = False + # args.train = False + # args.save_network = False # args.test_network = False # args.save_test_data = False # args.compare_with_gurobi = False - # if args.train: collect_data = True while collect_data: diff --git a/TD3.py b/TD3.py index aa2f390..5decf00 100644 --- a/TD3.py +++ b/TD3.py @@ -47,11 +47,9 @@ if __name__ == '__main__': target_step = args.target_step # how manysteps of one episode should stop repeat_times = args.repeat_times # how many times should update for one batch size data soft_update_tau = args.soft_update_tau - agent.state = env.reset() '''collect data and train and update network''' num_episode = args.num_episode - # args.train=False # args.save_network=False # args.test_network=False diff --git a/environment.py b/environment.py index ec80116..9f90c72 100644 --- a/environment.py +++ b/environment.py @@ -90,7 +90,7 @@ class ESSEnv(gym.Env): self.dg1.step(action[1]) self.dg2.step(action[2]) self.dg3.step(action[3]) - self.solar.step(action[4], temperature, irradiance) + self.solar.step(temperature, irradiance, action[4]) self.wind.step(wind_speed) self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output, -self.battery.energy_change)) diff --git a/environment_llm.py b/environment_llm.py index 1e1d5a8..f8ad984 100644 --- a/environment_llm.py +++ b/environment_llm.py @@ -93,7 +93,7 @@ class ESSEnv(gym.Env): self.dg1.step(action[1]) self.dg2.step(action[2]) self.dg3.step(action[3]) - self.solar.step(action[4], temperature, irradiance) + self.solar.step(temperature, irradiance, action[4]) self.wind.step(wind_speed) self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output, -self.battery.energy_change)) diff --git a/environment_primal_dual.py b/environment_primal_dual.py index 4ca18ba..5d9ee05 100644 --- a/environment_primal_dual.py +++ b/environment_primal_dual.py @@ -85,7 +85,7 @@ class ESSEnv(gym.Env): self.dg1.step(action[1]) self.dg2.step(action[2]) self.dg3.step(action[3]) - self.solar.step(action[4], temperature, irradiance) + self.solar.step(temperature, irradiance, action[4]) self.wind.step(wind_speed) self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output, -self.battery.energy_change, self.solar.current_power, self.wind.current_power)) diff --git a/module.py b/module.py index f47be9f..17372b9 100644 --- a/module.py +++ b/module.py @@ -81,13 +81,14 @@ class Solar: self.opex_cofficient = parameters['O_c'] self.refer_irradiance = parameters['I_ref'] self.refer_temperature = parameters['T_ref'] + self.change_percent = parameters['C_per'] def step(self, temperature, irradiance, action_voltage=0): I_sc = self.sc_current * (irradiance / self.refer_irradiance) V_oc = self.oc_voltage + self.temper_coefficient * (temperature - self.refer_temperature) current = I_sc - (V_oc / self.sh_resistance) - self.current_power = max((1 + action_voltage) * self.base_voltage * current, 0) + self.current_power = max((1 + self.change_percent * action_voltage) * self.base_voltage * current, 0) return self.current_power def get_cost(self, current_power):