update environment and agents for improved performance
refactors the code for the environment and agent modules to optimize performance and readability. - Standardizing the order of arguments in solar step function calls. - Introducing a percentage-based action voltage adjustment in the module step function to enhance control flexibility. - Updating PPO and related files to use Generalized Advantage Estimation (GAE) by default, improving reward shaping and stability.
This commit is contained in:
parent
ed51c79123
commit
993e062068
6
DDPG.py
6
DDPG.py
|
@ -48,16 +48,14 @@ if __name__ == '__main__':
|
||||||
target_step = args.target_step # how manysteps of one episode should stop
|
target_step = args.target_step # how manysteps of one episode should stop
|
||||||
repeat_times = args.repeat_times # how many times should update for one batch size data
|
repeat_times = args.repeat_times # how many times should update for one batch size data
|
||||||
soft_update_tau = args.soft_update_tau
|
soft_update_tau = args.soft_update_tau
|
||||||
# get the first experience from
|
|
||||||
agent.state = env.reset()
|
|
||||||
'''collect data and train and update network'''
|
|
||||||
num_episode = args.num_episode
|
num_episode = args.num_episode
|
||||||
|
agent.state = env.reset()
|
||||||
|
'''collect data, train and update network'''
|
||||||
# args.train=False
|
# args.train=False
|
||||||
# args.save_network=False
|
# args.save_network=False
|
||||||
# args.test_network=False
|
# args.test_network=False
|
||||||
# args.save_test_data=False
|
# args.save_test_data=False
|
||||||
# args.compare_with_gurobi=False
|
# args.compare_with_gurobi=False
|
||||||
|
|
||||||
if args.train:
|
if args.train:
|
||||||
collect_data = True
|
collect_data = True
|
||||||
while collect_data:
|
while collect_data:
|
||||||
|
|
13
PPO.py
13
PPO.py
|
@ -249,7 +249,7 @@ class Arguments:
|
||||||
self.repeat_times = 2 ** 3 # collect target_step, then update network
|
self.repeat_times = 2 ** 3 # collect target_step, then update network
|
||||||
self.target_step = 4096 # repeatedly update network to keep critic's loss small
|
self.target_step = 4096 # repeatedly update network to keep critic's loss small
|
||||||
self.max_memo = self.target_step # capacity of replay buffer
|
self.max_memo = self.target_step # capacity of replay buffer
|
||||||
self.if_per_or_gae = False # GAE for on-policy sparse reward: Generalized Advantage Estimation.
|
self.if_gae_or_raw = True # GAE for on-policy sparse reward: Generalized Advantage Estimation.
|
||||||
|
|
||||||
'''Arguments for evaluate'''
|
'''Arguments for evaluate'''
|
||||||
self.random_seed = 0 # initialize random seed in self.init_before_training()
|
self.random_seed = 0 # initialize random seed in self.init_before_training()
|
||||||
|
@ -270,7 +270,7 @@ class Arguments:
|
||||||
if if_main:
|
if if_main:
|
||||||
import shutil # remove history according to bool(if_remove)
|
import shutil # remove history according to bool(if_remove)
|
||||||
if self.if_remove is None:
|
if self.if_remove is None:
|
||||||
self.if_remove = bool(input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y')
|
self.if_remove = (input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y')
|
||||||
elif self.if_remove:
|
elif self.if_remove:
|
||||||
shutil.rmtree(self.cwd, ignore_errors=True)
|
shutil.rmtree(self.cwd, ignore_errors=True)
|
||||||
print(f"| Remove cwd: {self.cwd}")
|
print(f"| Remove cwd: {self.cwd}")
|
||||||
|
@ -280,7 +280,6 @@ class Arguments:
|
||||||
torch.manual_seed(self.random_seed)
|
torch.manual_seed(self.random_seed)
|
||||||
torch.set_num_threads(self.num_threads)
|
torch.set_num_threads(self.num_threads)
|
||||||
torch.set_default_dtype(torch.float32)
|
torch.set_default_dtype(torch.float32)
|
||||||
|
|
||||||
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
|
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
|
||||||
|
|
||||||
|
|
||||||
|
@ -308,7 +307,6 @@ if __name__ == '__main__':
|
||||||
for seed in args.random_seed_list:
|
for seed in args.random_seed_list:
|
||||||
args.random_seed = seed
|
args.random_seed = seed
|
||||||
args.agent = AgentPPO()
|
args.agent = AgentPPO()
|
||||||
|
|
||||||
agent_name = f'{args.agent.__class__.__name__}'
|
agent_name = f'{args.agent.__class__.__name__}'
|
||||||
args.agent.cri_target = True
|
args.agent.cri_target = True
|
||||||
args.env = ESSEnv()
|
args.env = ESSEnv()
|
||||||
|
@ -317,19 +315,18 @@ if __name__ == '__main__':
|
||||||
agent = args.agent
|
agent = args.agent
|
||||||
env = args.env
|
env = args.env
|
||||||
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
||||||
args.if_per_or_gae, layer_norm=True)
|
args.if_gae_or_raw, layer_norm=True)
|
||||||
|
|
||||||
cwd = args.cwd
|
cwd = args.cwd
|
||||||
gamma = args.gamma
|
gamma = args.gamma
|
||||||
batch_size = args.batch_size # how much data should be used to update net
|
batch_size = args.batch_size # how much data should be used to update net
|
||||||
target_step = args.target_step # how manysteps of one episode should stop
|
target_step = args.target_step # how manysteps of one episode should stop
|
||||||
repeat_times = args.repeat_times # how many times should update for one batch size data
|
repeat_times = args.repeat_times # how many times should update for one batch size data
|
||||||
soft_update_tau = args.soft_update_tau
|
soft_update_tau = args.soft_update_tau
|
||||||
|
num_episode = args.num_episode
|
||||||
agent.state = env.reset()
|
agent.state = env.reset()
|
||||||
'''init buffer'''
|
'''init buffer'''
|
||||||
buffer = list()
|
buffer = list()
|
||||||
'''init training parameters'''
|
'''init training parameters'''
|
||||||
num_episode = args.num_episode
|
|
||||||
# args.train = False
|
# args.train = False
|
||||||
# args.save_network = False
|
# args.save_network = False
|
||||||
# args.test_network = False
|
# args.test_network = False
|
||||||
|
@ -388,7 +385,7 @@ if __name__ == '__main__':
|
||||||
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
|
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
|
||||||
|
|
||||||
plot_args = PlotArgs()
|
plot_args = PlotArgs()
|
||||||
plot_args.feature_change = ''
|
plot_args.feature_change = 'gae'
|
||||||
args.cwd = agent_name
|
args.cwd = agent_name
|
||||||
plot_dir = make_dir(args.cwd, plot_args.feature_change)
|
plot_dir = make_dir(args.cwd, plot_args.feature_change)
|
||||||
plot_optimization_result(base_result, plot_dir)
|
plot_optimization_result(base_result, plot_dir)
|
||||||
|
|
|
@ -249,7 +249,7 @@ class Arguments:
|
||||||
self.repeat_times = 2 ** 3 # collect target_step, then update network
|
self.repeat_times = 2 ** 3 # collect target_step, then update network
|
||||||
self.target_step = 4096 # repeatedly update network to keep critic's loss small
|
self.target_step = 4096 # repeatedly update network to keep critic's loss small
|
||||||
self.max_memo = self.target_step # capacity of replay buffer
|
self.max_memo = self.target_step # capacity of replay buffer
|
||||||
self.if_per_or_gae = False # GAE for on-policy sparse reward: Generalized Advantage Estimation.
|
self.if_gae_or_raw = True # GAE for on-policy sparse reward: Generalized Advantage Estimation.
|
||||||
|
|
||||||
'''Arguments for evaluate'''
|
'''Arguments for evaluate'''
|
||||||
self.random_seed = 0 # initialize random seed in self.init_before_training()
|
self.random_seed = 0 # initialize random seed in self.init_before_training()
|
||||||
|
@ -317,7 +317,7 @@ if __name__ == '__main__':
|
||||||
agent = args.agent
|
agent = args.agent
|
||||||
env = args.env
|
env = args.env
|
||||||
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
||||||
args.if_per_or_gae, layer_norm=True)
|
args.if_gae_or_raw, layer_norm=True)
|
||||||
|
|
||||||
cwd = args.cwd
|
cwd = args.cwd
|
||||||
gamma = args.gamma
|
gamma = args.gamma
|
||||||
|
@ -389,7 +389,7 @@ if __name__ == '__main__':
|
||||||
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
|
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
|
||||||
|
|
||||||
plot_args = PlotArgs()
|
plot_args = PlotArgs()
|
||||||
plot_args.feature_change = 'llm_5_no_mean'
|
plot_args.feature_change = 'llm'
|
||||||
args.cwd = agent_name
|
args.cwd = agent_name
|
||||||
plot_dir = make_dir(args.cwd, plot_args.feature_change)
|
plot_dir = make_dir(args.cwd, plot_args.feature_change)
|
||||||
plot_optimization_result(base_result, plot_dir)
|
plot_optimization_result(base_result, plot_dir)
|
||||||
|
|
9
SAC.py
9
SAC.py
|
@ -47,20 +47,15 @@ if __name__ == '__main__':
|
||||||
target_step = args.target_step # how manysteps of one episode should stop
|
target_step = args.target_step # how manysteps of one episode should stop
|
||||||
repeat_times = args.repeat_times # how many times should update for one batch size data
|
repeat_times = args.repeat_times # how many times should update for one batch size data
|
||||||
soft_update_tau = args.soft_update_tau
|
soft_update_tau = args.soft_update_tau
|
||||||
|
|
||||||
agent.state = env.reset()
|
agent.state = env.reset()
|
||||||
|
|
||||||
'''collect data and train and update network'''
|
'''collect data and train and update network'''
|
||||||
num_episode = args.num_episode
|
num_episode = args.num_episode
|
||||||
'''here record real unbalance'''
|
'''here record real unbalance'''
|
||||||
|
# args.train = False
|
||||||
##
|
# args.save_network = False
|
||||||
args.train = False
|
|
||||||
args.save_network = False
|
|
||||||
# args.test_network = False
|
# args.test_network = False
|
||||||
# args.save_test_data = False
|
# args.save_test_data = False
|
||||||
# args.compare_with_gurobi = False
|
# args.compare_with_gurobi = False
|
||||||
#
|
|
||||||
if args.train:
|
if args.train:
|
||||||
collect_data = True
|
collect_data = True
|
||||||
while collect_data:
|
while collect_data:
|
||||||
|
|
2
TD3.py
2
TD3.py
|
@ -47,11 +47,9 @@ if __name__ == '__main__':
|
||||||
target_step = args.target_step # how manysteps of one episode should stop
|
target_step = args.target_step # how manysteps of one episode should stop
|
||||||
repeat_times = args.repeat_times # how many times should update for one batch size data
|
repeat_times = args.repeat_times # how many times should update for one batch size data
|
||||||
soft_update_tau = args.soft_update_tau
|
soft_update_tau = args.soft_update_tau
|
||||||
|
|
||||||
agent.state = env.reset()
|
agent.state = env.reset()
|
||||||
'''collect data and train and update network'''
|
'''collect data and train and update network'''
|
||||||
num_episode = args.num_episode
|
num_episode = args.num_episode
|
||||||
|
|
||||||
# args.train=False
|
# args.train=False
|
||||||
# args.save_network=False
|
# args.save_network=False
|
||||||
# args.test_network=False
|
# args.test_network=False
|
||||||
|
|
|
@ -90,7 +90,7 @@ class ESSEnv(gym.Env):
|
||||||
self.dg1.step(action[1])
|
self.dg1.step(action[1])
|
||||||
self.dg2.step(action[2])
|
self.dg2.step(action[2])
|
||||||
self.dg3.step(action[3])
|
self.dg3.step(action[3])
|
||||||
self.solar.step(action[4], temperature, irradiance)
|
self.solar.step(temperature, irradiance, action[4])
|
||||||
self.wind.step(wind_speed)
|
self.wind.step(wind_speed)
|
||||||
self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
|
self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
|
||||||
-self.battery.energy_change))
|
-self.battery.energy_change))
|
||||||
|
|
|
@ -93,7 +93,7 @@ class ESSEnv(gym.Env):
|
||||||
self.dg1.step(action[1])
|
self.dg1.step(action[1])
|
||||||
self.dg2.step(action[2])
|
self.dg2.step(action[2])
|
||||||
self.dg3.step(action[3])
|
self.dg3.step(action[3])
|
||||||
self.solar.step(action[4], temperature, irradiance)
|
self.solar.step(temperature, irradiance, action[4])
|
||||||
self.wind.step(wind_speed)
|
self.wind.step(wind_speed)
|
||||||
self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
|
self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
|
||||||
-self.battery.energy_change))
|
-self.battery.energy_change))
|
||||||
|
|
|
@ -85,7 +85,7 @@ class ESSEnv(gym.Env):
|
||||||
self.dg1.step(action[1])
|
self.dg1.step(action[1])
|
||||||
self.dg2.step(action[2])
|
self.dg2.step(action[2])
|
||||||
self.dg3.step(action[3])
|
self.dg3.step(action[3])
|
||||||
self.solar.step(action[4], temperature, irradiance)
|
self.solar.step(temperature, irradiance, action[4])
|
||||||
self.wind.step(wind_speed)
|
self.wind.step(wind_speed)
|
||||||
self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
|
self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
|
||||||
-self.battery.energy_change, self.solar.current_power, self.wind.current_power))
|
-self.battery.energy_change, self.solar.current_power, self.wind.current_power))
|
||||||
|
|
|
@ -81,13 +81,14 @@ class Solar:
|
||||||
self.opex_cofficient = parameters['O_c']
|
self.opex_cofficient = parameters['O_c']
|
||||||
self.refer_irradiance = parameters['I_ref']
|
self.refer_irradiance = parameters['I_ref']
|
||||||
self.refer_temperature = parameters['T_ref']
|
self.refer_temperature = parameters['T_ref']
|
||||||
|
self.change_percent = parameters['C_per']
|
||||||
|
|
||||||
def step(self, temperature, irradiance, action_voltage=0):
|
def step(self, temperature, irradiance, action_voltage=0):
|
||||||
I_sc = self.sc_current * (irradiance / self.refer_irradiance)
|
I_sc = self.sc_current * (irradiance / self.refer_irradiance)
|
||||||
V_oc = self.oc_voltage + self.temper_coefficient * (temperature - self.refer_temperature)
|
V_oc = self.oc_voltage + self.temper_coefficient * (temperature - self.refer_temperature)
|
||||||
|
|
||||||
current = I_sc - (V_oc / self.sh_resistance)
|
current = I_sc - (V_oc / self.sh_resistance)
|
||||||
self.current_power = max((1 + action_voltage) * self.base_voltage * current, 0)
|
self.current_power = max((1 + self.change_percent * action_voltage) * self.base_voltage * current, 0)
|
||||||
return self.current_power
|
return self.current_power
|
||||||
|
|
||||||
def get_cost(self, current_power):
|
def get_cost(self, current_power):
|
||||||
|
|
Loading…
Reference in New Issue