meeting
This commit is contained in:
parent
7c186de43d
commit
94f447c711
43
PPO.py
43
PPO.py
|
@ -31,6 +31,7 @@ class ActorPPO(nn.Module):
|
||||||
if isinstance(layer, nn.Linear):
|
if isinstance(layer, nn.Linear):
|
||||||
nn.init.orthogonal_(layer.weight, 1.0)
|
nn.init.orthogonal_(layer.weight, 1.0)
|
||||||
nn.init.constant_(layer.bias, 0.0)
|
nn.init.constant_(layer.bias, 0.0)
|
||||||
|
|
||||||
self.net.apply(init_weights)
|
self.net.apply(init_weights)
|
||||||
|
|
||||||
def forward(self, state):
|
def forward(self, state):
|
||||||
|
@ -111,7 +112,8 @@ class AgentPPO:
|
||||||
self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
|
self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
|
||||||
|
|
||||||
self.cri = self.ClassCri(net_dim, state_dim, action_dim, layer_norm).to(self.device)
|
self.cri = self.ClassCri(net_dim, state_dim, action_dim, layer_norm).to(self.device)
|
||||||
self.act = self.ClassAct(net_dim, state_dim, action_dim, layer_norm).to(self.device) if self.ClassAct else self.cri
|
self.act = self.ClassAct(net_dim, state_dim, action_dim, layer_norm).to(
|
||||||
|
self.device) if self.ClassAct else self.cri
|
||||||
self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
|
self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
|
||||||
self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
|
self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
|
||||||
|
|
||||||
|
@ -203,19 +205,7 @@ class AgentPPO:
|
||||||
buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
|
buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
|
||||||
return buf_r_sum, buf_advantage
|
return buf_r_sum, buf_advantage
|
||||||
|
|
||||||
# def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value) -> (torch.Tensor, torch.Tensor):
|
import torch
|
||||||
# buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # old policy value
|
|
||||||
# buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device) # advantage value
|
|
||||||
#
|
|
||||||
# pre_r_sum = 0
|
|
||||||
# pre_advantage = 0 # advantage value of previous step
|
|
||||||
# for i in range(buf_len - 1, -1, -1):
|
|
||||||
# buf_r_sum[i] = ten_reward[i] + ten_mask[i] * gamma * pre_r_sum
|
|
||||||
# pre_r_sum = buf_r_sum[i]
|
|
||||||
# delta = ten_reward[i] + ten_mask[i] * gamma * ten_value[i + 1] - ten_value[i]
|
|
||||||
# buf_advantage[i] = delta + ten_mask[i] * gamma * self.lambda_gae_adv * pre_advantage
|
|
||||||
# pre_advantage = buf_advantage[i]
|
|
||||||
# return buf_r_sum, buf_advantage
|
|
||||||
|
|
||||||
def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value) -> (torch.Tensor, torch.Tensor):
|
def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value) -> (torch.Tensor, torch.Tensor):
|
||||||
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # old policy value
|
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # old policy value
|
||||||
|
@ -224,12 +214,33 @@ class AgentPPO:
|
||||||
pre_r_sum = 0.0
|
pre_r_sum = 0.0
|
||||||
pre_advantage = 0.0 # advantage value of previous step
|
pre_advantage = 0.0 # advantage value of previous step
|
||||||
for i in range(buf_len - 1, -1, -1):
|
for i in range(buf_len - 1, -1, -1):
|
||||||
|
# Calculate TD residual delta
|
||||||
|
if i == buf_len - 1:
|
||||||
|
next_value = 0.0
|
||||||
|
else:
|
||||||
|
next_value = ten_value[i + 1]
|
||||||
|
delta = ten_reward[i] + ten_mask[i] * gamma * next_value - ten_value[i]
|
||||||
|
|
||||||
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
|
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
|
||||||
pre_r_sum = buf_r_sum[i]
|
pre_r_sum = buf_r_sum[i]
|
||||||
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i]) # fix a bug here
|
pre_advantage = delta + ten_mask[i] * gamma * self.lambda_gae_adv * pre_advantage
|
||||||
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
|
buf_advantage[i] = pre_advantage
|
||||||
|
|
||||||
return buf_r_sum, buf_advantage
|
return buf_r_sum, buf_advantage
|
||||||
|
|
||||||
|
# def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value) -> (torch.Tensor, torch.Tensor):
|
||||||
|
# buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # old policy value
|
||||||
|
# buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device) # advantage value
|
||||||
|
#
|
||||||
|
# pre_r_sum = 0.0
|
||||||
|
# pre_advantage = 0.0 # advantage value of previous step
|
||||||
|
# for i in range(buf_len - 1, -1, -1):
|
||||||
|
# buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
|
||||||
|
# pre_r_sum = buf_r_sum[i]
|
||||||
|
# buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i]) # fix a bug here
|
||||||
|
# pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
|
||||||
|
# return buf_r_sum, buf_advantage
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def optim_update(optimizer, objective):
|
def optim_update(optimizer, objective):
|
||||||
optimizer.zero_grad()
|
optimizer.zero_grad()
|
||||||
|
|
13
plotDRL.py
13
plotDRL.py
|
@ -20,10 +20,9 @@ def plot_optimization_result(datasource, directory): # data source is dataframe
|
||||||
|
|
||||||
# 绘制步长成本 in ax[0]
|
# 绘制步长成本 in ax[0]
|
||||||
axs[0, 0].cla()
|
axs[0, 0].cla()
|
||||||
axs[0, 0].set_ylabel('Costs')
|
axs[0, 0].set_ylabel('Cost')
|
||||||
axs[0, 0].set_xlabel('Time (h)')
|
axs[0, 0].set_xlabel('Time (h)')
|
||||||
axs[0, 0].bar(T, datasource['step_cost'])
|
axs[0, 0].bar(T, datasource['step_cost'])
|
||||||
# axs[0,0].set_xticks([i for i in range(24)],[i for i in range(1,25)])
|
|
||||||
|
|
||||||
# 绘制soc和价格 in ax[1]
|
# 绘制soc和价格 in ax[1]
|
||||||
axs[0, 1].cla()
|
axs[0, 1].cla()
|
||||||
|
@ -67,10 +66,9 @@ def plot_optimization_result(datasource, directory): # data source is dataframe
|
||||||
# 绘制净负载曲线
|
# 绘制净负载曲线
|
||||||
axs[1, 0].plot(T, datasource['netload'], label='Netload', drawstyle='steps-mid', alpha=0.7)
|
axs[1, 0].plot(T, datasource['netload'], label='Netload', drawstyle='steps-mid', alpha=0.7)
|
||||||
axs[1, 0].legend(loc='upper right', bbox_to_anchor=(1.4, 1), fontsize=12, frameon=False, labelspacing=0.3)
|
axs[1, 0].legend(loc='upper right', bbox_to_anchor=(1.4, 1), fontsize=12, frameon=False, labelspacing=0.3)
|
||||||
# axs[1,0].set_xticks([i for i in range(24)],[i for i in range(1,25)])
|
|
||||||
|
|
||||||
fig.savefig(f"{directory}/rl.svg", format='svg', dpi=600, bbox_inches='tight')
|
fig.savefig(f"{directory}/rl.svg", format='svg', dpi=600, bbox_inches='tight')
|
||||||
print('optimization results have been ploted and saved')
|
print('rl figure have been ploted and saved')
|
||||||
|
|
||||||
|
|
||||||
def plot_evaluation_information(datasource, directory):
|
def plot_evaluation_information(datasource, directory):
|
||||||
|
@ -91,11 +89,10 @@ def plot_evaluation_information(datasource, directory):
|
||||||
# 绘制不平衡度 in axs[0]
|
# 绘制不平衡度 in axs[0]
|
||||||
axs[0, 0].cla()
|
axs[0, 0].cla()
|
||||||
axs[0, 0].set_xlabel('Time (h)')
|
axs[0, 0].set_xlabel('Time (h)')
|
||||||
axs[0, 0].set_ylabel('Unbalance of Generation and Load')
|
axs[0, 0].set_ylabel('Power (kWh)')
|
||||||
axs[0, 0].bar(eval_data['time_step'], eval_data['unbalance'], label='Exchange with Grid', width=0.4)
|
axs[0, 0].bar(eval_data['time_step'], eval_data['unbalance'], label='Exchange with Grid', width=0.4)
|
||||||
axs[0, 0].bar(eval_data['time_step'] + 0.4, eval_data['netload'], label='Netload', width=0.4)
|
axs[0, 0].bar(eval_data['time_step'] + 0.4, eval_data['netload'], label='Netload', width=0.4)
|
||||||
axs[0, 0].legend(loc='upper right', bbox_to_anchor=(1.45, 1), fontsize=12, frameon=False, labelspacing=0.5)
|
axs[0, 0].legend(loc='upper right', bbox_to_anchor=(1.45, 1), fontsize=12, frameon=False, labelspacing=0.5)
|
||||||
# axs[0,0].set_xticks([i for i in range(24)],[i for i in range(1,25)])
|
|
||||||
|
|
||||||
# 绘制能源充/放电与价格关系图 in ax[1]
|
# 绘制能源充/放电与价格关系图 in ax[1]
|
||||||
axs[0, 1].cla()
|
axs[0, 1].cla()
|
||||||
|
@ -140,10 +137,10 @@ def plot_evaluation_information(datasource, directory):
|
||||||
# 绘制奖励 in axs[3]
|
# 绘制奖励 in axs[3]
|
||||||
axs[1, 1].cla()
|
axs[1, 1].cla()
|
||||||
axs[1, 1].set_xlabel('Time (h)')
|
axs[1, 1].set_xlabel('Time (h)')
|
||||||
axs[1, 1].set_ylabel('Costs')
|
axs[1, 1].set_ylabel('Cost')
|
||||||
axs[1, 1].bar(eval_data['time_step'], eval_data['operation_cost'])
|
axs[1, 1].bar(eval_data['time_step'], eval_data['operation_cost'])
|
||||||
fig.savefig(f"{directory}/gurobi.svg", format='svg', dpi=600, bbox_inches='tight')
|
fig.savefig(f"{directory}/gurobi.svg", format='svg', dpi=600, bbox_inches='tight')
|
||||||
print('evaluation figure have been ploted and saved')
|
print('gurobi figure have been ploted and saved')
|
||||||
|
|
||||||
|
|
||||||
def make_dir(directory, feature_change):
|
def make_dir(directory, feature_change):
|
||||||
|
|
1
tools.py
1
tools.py
|
@ -94,6 +94,7 @@ def optimization_base_result(env, month, day, initial_soc):
|
||||||
cost_wind = gp.quicksum(wind[t] * wind_cofficient for t in range(period))
|
cost_wind = gp.quicksum(wind[t] * wind_cofficient for t in range(period))
|
||||||
|
|
||||||
m.setObjective((cost_gen + cost_battery + cost_import - cost_export + cost_solar + cost_wind), GRB.MINIMIZE)
|
m.setObjective((cost_gen + cost_battery + cost_import - cost_export + cost_solar + cost_wind), GRB.MINIMIZE)
|
||||||
|
m.setParam(GRB.Param.FuncNonlinear, 1)
|
||||||
m.optimize()
|
m.optimize()
|
||||||
|
|
||||||
# 记录数据便于绘图
|
# 记录数据便于绘图
|
||||||
|
|
Loading…
Reference in New Issue