meeting
This commit is contained in:
parent
23c1ad592d
commit
9056fbdc79
23
PPO.py
23
PPO.py
|
@ -100,6 +100,7 @@ class AgentPPO:
|
||||||
|
|
||||||
self.ratio_clip = 0.2 # ratio.clamp(1 - clip, 1 + clip)
|
self.ratio_clip = 0.2 # ratio.clamp(1 - clip, 1 + clip)
|
||||||
self.lambda_entropy = 0.02 # could be 0.01~0.05
|
self.lambda_entropy = 0.02 # could be 0.01~0.05
|
||||||
|
self.gamma = 0.99
|
||||||
self.lambda_gae_adv = 0.98 # could be 0.95~0.99, GAE (Generalized Advantage Estimation. ICLR.2016.)
|
self.lambda_gae_adv = 0.98 # could be 0.95~0.99, GAE (Generalized Advantage Estimation. ICLR.2016.)
|
||||||
self.get_reward_sum = None # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
|
self.get_reward_sum = None # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
|
||||||
self.trajectory_list = None
|
self.trajectory_list = None
|
||||||
|
@ -153,7 +154,7 @@ class AgentPPO:
|
||||||
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
|
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
|
||||||
|
|
||||||
'''get buf_r_sum, buf_logprob'''
|
'''get buf_r_sum, buf_logprob'''
|
||||||
bs = 4096 # set a smaller 'BatchSize' when out of GPU memory: 1024, could change to 4096
|
bs = 4096
|
||||||
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
|
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
|
||||||
buf_value = torch.cat(buf_value, dim=0)
|
buf_value = torch.cat(buf_value, dim=0)
|
||||||
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
|
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
|
||||||
|
@ -210,12 +211,26 @@ class AgentPPO:
|
||||||
pre_r_sum = 0
|
pre_r_sum = 0
|
||||||
pre_advantage = 0 # advantage value of previous step
|
pre_advantage = 0 # advantage value of previous step
|
||||||
for i in range(buf_len - 1, -1, -1):
|
for i in range(buf_len - 1, -1, -1):
|
||||||
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
|
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * self.gamma * pre_r_sum
|
||||||
pre_r_sum = buf_r_sum[i]
|
pre_r_sum = buf_r_sum[i]
|
||||||
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i]) # fix a bug here
|
delta = ten_reward[i] + ten_mask[i] * self.gamma * ten_value[i + 1] - ten_value[i]
|
||||||
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
|
buf_advantage[i] = delta + ten_mask[i] * self.gamma * self.lambda_gae_adv * pre_advantage
|
||||||
|
pre_advantage = buf_advantage[i]
|
||||||
return buf_r_sum, buf_advantage
|
return buf_r_sum, buf_advantage
|
||||||
|
|
||||||
|
# def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value) -> (torch.Tensor, torch.Tensor):
|
||||||
|
# buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # old policy value
|
||||||
|
# buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device) # advantage value
|
||||||
|
#
|
||||||
|
# pre_r_sum = 0
|
||||||
|
# pre_advantage = 0 # advantage value of previous step
|
||||||
|
# for i in range(buf_len - 1, -1, -1):
|
||||||
|
# buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
|
||||||
|
# pre_r_sum = buf_r_sum[i]
|
||||||
|
# buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i]) # fix a bug here
|
||||||
|
# pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
|
||||||
|
# return buf_r_sum, buf_advantage
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def optim_update(optimizer, objective):
|
def optim_update(optimizer, objective):
|
||||||
optimizer.zero_grad()
|
optimizer.zero_grad()
|
||||||
|
|
|
@ -106,7 +106,6 @@ class ESSEnv(gym.Env):
|
||||||
self.excess, self.shedding = 0, 0
|
self.excess, self.shedding = 0, 0
|
||||||
if unbalance >= 0: # 现在过剩
|
if unbalance >= 0: # 现在过剩
|
||||||
if unbalance <= self.grid.exchange_ability:
|
if unbalance <= self.grid.exchange_ability:
|
||||||
# sell money to grid is little [0.029,0.1]
|
|
||||||
sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient
|
sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient
|
||||||
else:
|
else:
|
||||||
sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient
|
sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient
|
||||||
|
|
|
@ -40,7 +40,7 @@ llm = ChatOpenAI(
|
||||||
openai_api_key="none",
|
openai_api_key="none",
|
||||||
# openai_api_base="http://0.0.0.0:5049/v1/models",
|
# openai_api_base="http://0.0.0.0:5049/v1/models",
|
||||||
openai_api_base="http://0.0.0.0:8501",
|
openai_api_base="http://0.0.0.0:8501",
|
||||||
model_name="Qwen1.5-32b-int4"
|
model_name="Qwen1.5-32b"
|
||||||
)
|
)
|
||||||
prompt = ChatPromptTemplate.from_messages(
|
prompt = ChatPromptTemplate.from_messages(
|
||||||
[
|
[
|
||||||
|
|
|
@ -43,7 +43,7 @@ def plot_optimization_result(datasource, directory): # data source is dataframe
|
||||||
|
|
||||||
# 绘制累计发电量和消耗量 in ax[2]
|
# 绘制累计发电量和消耗量 in ax[2]
|
||||||
axs[1, 0].cla()
|
axs[1, 0].cla()
|
||||||
axs[1, 0].set_ylabel('Outputs of Units and Netload (kWh)')
|
axs[1, 0].set_ylabel('Power (kWh)')
|
||||||
axs[1, 0].set_xlabel('Time (h)')
|
axs[1, 0].set_xlabel('Time (h)')
|
||||||
# 处理电池充放电数据
|
# 处理电池充放电数据
|
||||||
battery_positive = np.array(datasource['battery_energy_change'])
|
battery_positive = np.array(datasource['battery_energy_change'])
|
||||||
|
@ -69,7 +69,7 @@ def plot_optimization_result(datasource, directory): # data source is dataframe
|
||||||
axs[1, 0].legend(loc='upper right', bbox_to_anchor=(1.4, 1), fontsize=12, frameon=False, labelspacing=0.3)
|
axs[1, 0].legend(loc='upper right', bbox_to_anchor=(1.4, 1), fontsize=12, frameon=False, labelspacing=0.3)
|
||||||
# axs[1,0].set_xticks([i for i in range(24)],[i for i in range(1,25)])
|
# axs[1,0].set_xticks([i for i in range(24)],[i for i in range(1,25)])
|
||||||
|
|
||||||
fig.savefig(f"{directory}/optimization_information.svg", format='svg', dpi=600, bbox_inches='tight')
|
fig.savefig(f"{directory}/rl.svg", format='svg', dpi=600, bbox_inches='tight')
|
||||||
print('optimization results have been ploted and saved')
|
print('optimization results have been ploted and saved')
|
||||||
|
|
||||||
|
|
||||||
|
@ -114,7 +114,7 @@ def plot_evaluation_information(datasource, directory):
|
||||||
# 绘制发电量和负载量 in ax[2]
|
# 绘制发电量和负载量 in ax[2]
|
||||||
axs[1, 0].cla()
|
axs[1, 0].cla()
|
||||||
axs[1, 0].set_xlabel('Time (h)')
|
axs[1, 0].set_xlabel('Time (h)')
|
||||||
axs[1, 0].set_ylabel('Outputs of Units and Netload (kWh)')
|
axs[1, 0].set_ylabel('Power (kWh)')
|
||||||
# axs[1,0].set_xticks([i for i in range(24)], [i for i in range(1, 25)])
|
# axs[1,0].set_xticks([i for i in range(24)], [i for i in range(1, 25)])
|
||||||
battery_positive = np.array(eval_data['battery'])
|
battery_positive = np.array(eval_data['battery'])
|
||||||
battery_negative = np.array(eval_data['battery'])
|
battery_negative = np.array(eval_data['battery'])
|
||||||
|
@ -142,7 +142,7 @@ def plot_evaluation_information(datasource, directory):
|
||||||
axs[1, 1].set_xlabel('Time (h)')
|
axs[1, 1].set_xlabel('Time (h)')
|
||||||
axs[1, 1].set_ylabel('Costs')
|
axs[1, 1].set_ylabel('Costs')
|
||||||
axs[1, 1].bar(eval_data['time_step'], eval_data['operation_cost'])
|
axs[1, 1].bar(eval_data['time_step'], eval_data['operation_cost'])
|
||||||
fig.savefig(f"{directory}/evaluation_information.svg", format='svg', dpi=600, bbox_inches='tight')
|
fig.savefig(f"{directory}/gurobi.svg", format='svg', dpi=600, bbox_inches='tight')
|
||||||
print('evaluation figure have been ploted and saved')
|
print('evaluation figure have been ploted and saved')
|
||||||
|
|
||||||
|
|
||||||
|
|
2
tools.py
2
tools.py
|
@ -61,7 +61,7 @@ def optimization_base_result(env, month, day, initial_soc):
|
||||||
pv_voltage = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name='pv_voltage')
|
pv_voltage = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name='pv_voltage')
|
||||||
|
|
||||||
# 计算光伏和风力发电量
|
# 计算光伏和风力发电量
|
||||||
pv = [(0.2 * irradiance[t] + 0.05 * temperature[t] - 9.25) * (1 + pv_voltage[t]) for t in range(period)]
|
pv = [(0.25 * irradiance[t] + 0.05 * temperature[t] - 9.25) * (1 + pv_voltage[t]) for t in range(period)]
|
||||||
wind = [172.265625 * wind_speed[t] ** 3 / 1e3 if 3 <= wind_speed[t] < 8
|
wind = [172.265625 * wind_speed[t] ** 3 / 1e3 if 3 <= wind_speed[t] < 8
|
||||||
else (172.265625 * 8 ** 3 / 1e3 if 8 <= wind_speed[t] < 12 else 0) for t in range(period)]
|
else (172.265625 * 8 ** 3 / 1e3 if 8 <= wind_speed[t] < 12 else 0) for t in range(period)]
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue