Compare commits

...

3 Commits

Author SHA1 Message Date
chenxiaodong 53d3ac9ca8 edit the layer normalization 2024-06-19 14:36:11 +08:00
chenxiaodong ceff6e0ffe test 2024-06-18 16:16:18 +08:00
chenxiaodong 25a5da0e00 test 2024-06-18 14:54:23 +08:00
13 changed files with 71 additions and 49 deletions

View File

@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4"> <module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager"> <component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" /> <content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="rl-microgrid" jdkType="Python SDK" /> <orderEntry type="jdk" jdkName="Remote Python 3.9.18 (sftp://chenxd@124.16.151.196:22121/home/chenxd/miniconda3/envs/grid/bin/python3.9)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
</component> </component>
<component name="PyDocumentationSettings"> <component name="PyDocumentationSettings">

22
.idea/deployment.xml Normal file
View File

@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PublishConfigData" autoUpload="Always" serverName="chenxd@124.16.151.196:22121 password" remoteFilesAllowedToDisappearOnAutoupload="false">
<serverData>
<paths name="chenxd@124.16.151.196:22121 password">
<serverdata>
<mappings>
<mapping deploy="/tmp/pycharm_project_78" local="$PROJECT_DIR$" />
</mappings>
</serverdata>
</paths>
<paths name="root@124.16.151.196:10531 password">
<serverdata>
<mappings>
<mapping deploy="/mnt/chenxd/DRL-for-Energy-Systems" local="$PROJECT_DIR$" />
</mappings>
</serverdata>
</paths>
</serverData>
<option name="myAutoUpload" value="ALWAYS" />
</component>
</project>

View File

@ -3,5 +3,5 @@
<component name="Black"> <component name="Black">
<option name="sdkName" value="rl-microgrid" /> <option name="sdkName" value="rl-microgrid" />
</component> </component>
<component name="ProjectRootManager" version="2" project-jdk-name="rl-microgrid" project-jdk-type="Python SDK" /> <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.9.18 (sftp://chenxd@124.16.151.196:22121/home/chenxd/miniconda3/envs/grid/bin/python3.9)" project-jdk-type="Python SDK" />
</project> </project>

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -105,7 +105,7 @@ if __name__ == '__main__':
record = test_one_episode(env, agent.act, agent.device) record = test_one_episode(env, agent.act, agent.device)
eval_data = pd.DataFrame(record['system_info']) eval_data = pd.DataFrame(record['system_info'])
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2', eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2',
'gen3', 'unbalance', 'operation_cost'] 'gen3', 'temperature', 'irradiance', 'unbalance', 'operation_cost']
if args.save_test_data: if args.save_test_data:
test_data_save_path = f'{args.cwd}/test_data.pkl' test_data_save_path = f'{args.cwd}/test_data.pkl'
with open(test_data_save_path, 'wb') as tf: with open(test_data_save_path, 'wb') as tf:

66
PPO.py
View File

@ -12,33 +12,34 @@ os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
script_name = os.path.basename(__file__) script_name = os.path.basename(__file__)
# after adding layer normalization, it doesn't work
class ActorPPO(nn.Module): class ActorPPO(nn.Module):
def __init__(self, mid_dim, state_dim, action_dim, layer_norm=False): def __init__(self, mid_dim, state_dim, action_dim, layer_norm=False):
super().__init__() super().__init__()
self.layer_norm = layer_norm
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(), self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.ReLU(), nn.Linear(mid_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.Hardswish(), nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, action_dim), ) nn.Linear(mid_dim, action_dim))
# the logarithm (log) of standard deviation (std) of action, it is a trainable parameter # the logarithm (log) of standard deviation (std) of action, it is a trainable parameter
self.a_logstd = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True) self.a_logstd = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True)
self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi)) self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi))
if layer_norm:
self.layer_norm(self.net)
@staticmethod if self.layer_norm:
def layer_norm(layer, std=1.0, bias_const=0.0): self.apply_layer_norm()
for i in layer:
if hasattr(i, 'weight'): def apply_layer_norm(self):
torch.nn.init.orthogonal_(i.weight, std) def init_weights(layer):
torch.nn.init.constant_(i.bias, bias_const) if isinstance(layer, nn.Linear):
nn.init.orthogonal_(layer.weight, 1.0)
nn.init.constant_(layer.bias, 0.0)
self.net.apply(init_weights)
def forward(self, state): def forward(self, state):
return self.net(state).tanh() # action.tanh() # in this way limit the data output of action return self.net(state).tanh() # action.tanh() limit the data output of action
def get_action(self, state): def get_action(self, state):
a_avg = self.net(state) # too big for the action a_avg = self.forward(state) # too big for the action
a_std = self.a_logstd.exp() a_std = self.a_logstd.exp()
noise = torch.randn_like(a_avg) noise = torch.randn_like(a_avg)
@ -46,7 +47,7 @@ class ActorPPO(nn.Module):
return action, noise return action, noise
def get_logprob_entropy(self, state, action): def get_logprob_entropy(self, state, action):
a_avg = self.net(state) a_avg = self.forward(state)
a_std = self.a_logstd.exp() a_std = self.a_logstd.exp()
delta = ((a_avg - action) / a_std).pow(2) * 0.5 delta = ((a_avg - action) / a_std).pow(2) * 0.5
@ -63,19 +64,21 @@ class ActorPPO(nn.Module):
class CriticAdv(nn.Module): class CriticAdv(nn.Module):
def __init__(self, mid_dim, state_dim, _action_dim, layer_norm=False): def __init__(self, mid_dim, state_dim, _action_dim, layer_norm=False):
super().__init__() super().__init__()
self.layer_norm = layer_norm
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(), self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.ReLU(), nn.Linear(mid_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.Hardswish(), nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, 1)) nn.Linear(mid_dim, 1))
if layer_norm: if self.layer_norm:
self.layer_norm(self.net, std=1.0) self.apply_layer_norm()
@staticmethod def apply_layer_norm(self):
def layer_norm(layer, std=1.0, bias_const=0.0): def init_weights(layer):
for i in layer: if isinstance(layer, nn.Linear):
if hasattr(i, 'weight'): nn.init.orthogonal_(layer.weight, 1.0)
torch.nn.init.orthogonal_(i.weight, std) nn.init.constant_(layer.bias, 0.0)
torch.nn.init.constant_(i.bias, bias_const)
self.net.apply(init_weights)
def forward(self, state): def forward(self, state):
return self.net(state) # Advantage value return self.net(state) # Advantage value
@ -116,7 +119,6 @@ class AgentPPO:
self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate) self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
del self.ClassCri, self.ClassAct # why del self.ClassCri and self.ClassAct here, to save memory?
def select_action(self, state): def select_action(self, state):
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device) states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
@ -129,8 +131,8 @@ class AgentPPO:
last_done = 0 last_done = 0
for i in range(target_step): for i in range(target_step):
action, noise = self.select_action(state) action, noise = self.select_action(state)
state, next_state, reward, done, = env.step( # the step of cut action is finally organized into the environment
np.tanh(action)) # here the step of cut action is finally organized into the environment. state, next_state, reward, done, = env.step(np.tanh(action))
trajectory_temp.append((state, reward, done, action, noise)) trajectory_temp.append((state, reward, done, action, noise))
if done: if done:
state = env.reset() state = env.reset()
@ -140,8 +142,8 @@ class AgentPPO:
self.state = state self.state = state
'''splice list''' '''splice list'''
trajectory_list = self.trajectory_list + trajectory_temp[ # store 0 trajectory information to list
:last_done + 1] # store 0 trajectory information to the list trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1]
self.trajectory_list = trajectory_temp[last_done:] self.trajectory_list = trajectory_temp[last_done:]
return trajectory_list return trajectory_list
@ -149,12 +151,12 @@ class AgentPPO:
"""put data extract and update network together""" """put data extract and update network together"""
with torch.no_grad(): with torch.no_grad():
buf_len = buffer[0].shape[0] buf_len = buffer[0].shape[0]
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in # decompose buffer data
buffer] # decompose buffer data buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
'''get buf_r_sum, buf_logprob''' '''get buf_r_sum, buf_logprob'''
bs = 4096 # set a smaller 'BatchSize' when out of GPU memory: 1024, could change to 4096 bs = 4096 # set a smaller 'BatchSize' when out of GPU memory: 1024, could change to 4096
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)] # buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
buf_value = torch.cat(buf_value, dim=0) buf_value = torch.cat(buf_value, dim=0)
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise) buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
@ -317,7 +319,7 @@ if __name__ == '__main__':
agent = args.agent agent = args.agent
env = args.env env = args.env
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate, agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
args.if_per_or_gae) args.if_per_or_gae, layer_norm=True)
cwd = args.cwd cwd = args.cwd
gamma = args.gamma gamma = args.gamma
@ -371,7 +373,7 @@ if __name__ == '__main__':
record = test_one_episode(env, agent.act, agent.device) record = test_one_episode(env, agent.act, agent.device)
eval_data = pd.DataFrame(record['system_info']) eval_data = pd.DataFrame(record['system_info'])
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2', eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2',
'gen3', 'unbalance', 'operation_cost'] 'gen3', 'temperature', 'irradiance', 'unbalance', 'operation_cost']
if args.save_test_data: if args.save_test_data:
test_data_save_path = f'{args.cwd}/test_data.pkl' test_data_save_path = f'{args.cwd}/test_data.pkl'
with open(test_data_save_path, 'wb') as tf: with open(test_data_save_path, 'wb') as tf:

2
SAC.py
View File

@ -109,7 +109,7 @@ if __name__ == '__main__':
record = test_one_episode(env, agent.act, agent.device) record = test_one_episode(env, agent.act, agent.device)
eval_data = pd.DataFrame(record['system_info']) eval_data = pd.DataFrame(record['system_info'])
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2', eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2',
'gen3', 'unbalance', 'operation_cost'] 'gen3', 'temperature', 'irradiance', 'unbalance', 'operation_cost']
if args.save_test_data: if args.save_test_data:
test_data_save_path = f'{args.cwd}/test_data.pkl' test_data_save_path = f'{args.cwd}/test_data.pkl'
with open(test_data_save_path, 'wb') as tf: with open(test_data_save_path, 'wb') as tf:

2
TD3.py
View File

@ -105,7 +105,7 @@ if __name__ == '__main__':
record = test_one_episode(env, agent.act, agent.device) record = test_one_episode(env, agent.act, agent.device)
eval_data = pd.DataFrame(record['system_info']) eval_data = pd.DataFrame(record['system_info'])
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2', eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2',
'gen3', 'unbalance', 'operation_cost'] 'gen3', 'temperature', 'irradiance', 'unbalance', 'operation_cost']
if args.save_test_data: if args.save_test_data:
test_data_save_path = f'{args.cwd}/test_data.pkl' test_data_save_path = f'{args.cwd}/test_data.pkl'
with open(test_data_save_path, 'wb') as tf: with open(test_data_save_path, 'wb') as tf:

View File

@ -175,7 +175,7 @@ class ESSEnv(gym.Env):
# process_elements(pv, lambda x: x, self.data_manager.add_pv_element) # process_elements(pv, lambda x: x, self.data_manager.add_pv_element)
process_elements(price, lambda x: max(x / 10, 0.5), self.data_manager.add_price_element) process_elements(price, lambda x: max(x / 10, 0.5), self.data_manager.add_price_element)
process_elements(load, lambda x: x * 5, self.data_manager.add_load_element) process_elements(load, lambda x: x * 3, self.data_manager.add_load_element)
process_elements(irradiance, lambda x: x, self.data_manager.add_irradiance_element) process_elements(irradiance, lambda x: x, self.data_manager.add_irradiance_element)
process_elements(temperature, lambda x: x - 273.15, self.data_manager.add_temperature_element) process_elements(temperature, lambda x: x - 273.15, self.data_manager.add_temperature_element)
process_elements(wind, lambda x: x, self.data_manager.add_wind_element) process_elements(wind, lambda x: x, self.data_manager.add_wind_element)

View File

@ -46,39 +46,37 @@ def optimization_base_result(env, month, day, initial_soc):
m = gp.Model("UC") m = gp.Model("UC")
# set system variables # 设置系统变量
on_off = m.addVars(NUM_GEN, period, vtype=GRB.BINARY, name='on_off') on_off = m.addVars(NUM_GEN, period, vtype=GRB.BINARY, name='on_off')
gen_output = m.addVars(NUM_GEN, period, vtype=GRB.CONTINUOUS, name='output') gen_output = m.addVars(NUM_GEN, period, vtype=GRB.CONTINUOUS, name='output')
# set constrains for charge/discharge # 设置充放电约束
battery_energy_change = m.addVars(period, vtype=GRB.CONTINUOUS, lb=env.battery.max_discharge, battery_energy_change = m.addVars(period, vtype=GRB.CONTINUOUS, lb=env.battery.max_discharge,
ub=env.battery.max_charge, name='battery_action') ub=env.battery.max_charge, name='battery_action')
# set constrains for exchange between external grid and distributed energy system # 设置外部电网与能源系统交换约束
grid_energy_import = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=env.grid.exchange_ability, name='import') grid_energy_import = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=env.grid.exchange_ability, name='import')
grid_energy_export = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=env.grid.exchange_ability, name='export') grid_energy_export = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=env.grid.exchange_ability, name='export')
soc = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0.2, ub=0.8, name='SOC') soc = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0.2, ub=0.8, name='SOC')
# 1. add balance constrain # 1. 添加平衡约束
m.addConstrs(((sum(gen_output[g, t] for g in range(NUM_GEN)) + pv[t] + wind[t] + grid_energy_import[t] >= load[t] + m.addConstrs(((sum(gen_output[g, t] for g in range(NUM_GEN)) + pv[t] + wind[t] + grid_energy_import[t] >= load[t] +
battery_energy_change[t] + grid_energy_export[t]) for t in range(period)), name='powerbalance') battery_energy_change[t] + grid_energy_export[t]) for t in range(period)), name='powerbalance')
# 2. add constrain for pmax pmin # 2. 添加发电机最大/最小功率约束
m.addConstrs((gen_output[g, t] <= on_off[g, t] * p_max[g] for g in range(NUM_GEN) for t in range(period)), m.addConstrs((gen_output[g, t] <= on_off[g, t] * p_max[g] for g in range(NUM_GEN) for t in range(period)),
'gen_output_max') 'gen_output_max')
m.addConstrs((gen_output[g, t] >= on_off[g, t] * p_min[g] for g in range(NUM_GEN) for t in range(period)), m.addConstrs((gen_output[g, t] >= on_off[g, t] * p_min[g] for g in range(NUM_GEN) for t in range(period)),
'gen_output_min') 'gen_output_min')
# 3. add constrain for ramping up ramping down # 3. 添加上升和下降约束
m.addConstrs((gen_output[g, t + 1] - gen_output[g, t] <= ramping_up[g] for g in range(NUM_GEN) m.addConstrs((gen_output[g, t + 1] - gen_output[g, t] <= ramping_up[g] for g in range(NUM_GEN)
for t in range(period - 1)), 'ramping_up') for t in range(period - 1)), 'ramping_up')
m.addConstrs((gen_output[g, t] - gen_output[g, t + 1] <= ramping_down[g] for g in range(NUM_GEN) m.addConstrs((gen_output[g, t] - gen_output[g, t + 1] <= ramping_down[g] for g in range(NUM_GEN)
for t in range(period - 1)), 'ramping_down') for t in range(period - 1)), 'ramping_down')
# 4. add constrains for SOC # 4. 添加电池容量约束
m.addConstr(battery_capacity * soc[0] == battery_capacity * initial_soc + m.addConstr(battery_capacity * soc[0] == battery_capacity * initial_soc +
(battery_energy_change[0] * battery_efficiency), name='soc0') (battery_energy_change[0] * battery_efficiency), name='soc0')
m.addConstrs((battery_capacity * soc[t] == battery_capacity * soc[t - 1] + m.addConstrs((battery_capacity * soc[t] == battery_capacity * soc[t - 1] +
(battery_energy_change[t] * battery_efficiency) for t in range(1, period)), name='soc update') (battery_energy_change[t] * battery_efficiency) for t in range(1, period)), name='soc update')
# 5. add constrain for pv output # 设置成本函数
m.addConstrs((pv[t] >= 0 for t in range(period)), name='pv_output_min') # 发电机成本
# set cost function
# 1 cost of generator
cost_gen = gp.quicksum( cost_gen = gp.quicksum(
(a_para[g] * gen_output[g, t] * gen_output[g, t] + b_para[g] * gen_output[g, t] + c_para[g] * on_off[g, t]) for (a_para[g] * gen_output[g, t] * gen_output[g, t] + b_para[g] * gen_output[g, t] + c_para[g] * on_off[g, t]) for
t in range(period) for g in range(NUM_GEN)) t in range(period) for g in range(NUM_GEN))