train
This commit is contained in:
parent
f66de9fb54
commit
f26024c8d4
File diff suppressed because it is too large
Load Diff
Before Width: | Height: | Size: 141 KiB |
File diff suppressed because it is too large
Load Diff
Before Width: | Height: | Size: 141 KiB After Width: | Height: | Size: 142 KiB |
File diff suppressed because it is too large
Load Diff
Before Width: | Height: | Size: 125 KiB After Width: | Height: | Size: 126 KiB |
Binary file not shown.
Binary file not shown.
5
PPO.py
5
PPO.py
|
@ -31,7 +31,6 @@ class ActorPPO(nn.Module):
|
|||
if isinstance(layer, nn.Linear):
|
||||
nn.init.orthogonal_(layer.weight, 1.0)
|
||||
nn.init.constant_(layer.bias, 0.0)
|
||||
|
||||
self.net.apply(init_weights)
|
||||
|
||||
def forward(self, state):
|
||||
|
@ -331,8 +330,8 @@ if __name__ == '__main__':
|
|||
buffer = list()
|
||||
'''init training parameters'''
|
||||
num_episode = args.num_episode
|
||||
# args.train = False
|
||||
# args.save_network = False
|
||||
args.train = False
|
||||
args.save_network = False
|
||||
# args.test_network = False
|
||||
# args.save_test_data = False
|
||||
# args.compare_with_gurobi = False
|
||||
|
|
|
@ -77,7 +77,6 @@ class CriticAdv(nn.Module):
|
|||
if isinstance(layer, nn.Linear):
|
||||
nn.init.orthogonal_(layer.weight, 1.0)
|
||||
nn.init.constant_(layer.bias, 0.0)
|
||||
|
||||
self.net.apply(init_weights)
|
||||
|
||||
def forward(self, state):
|
||||
|
@ -91,6 +90,7 @@ class AgentPPO:
|
|||
self.device = None
|
||||
self.action_dim = None
|
||||
self.get_obj_critic = None
|
||||
self.current_step = 0
|
||||
|
||||
self.criterion = torch.nn.SmoothL1Loss()
|
||||
self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
|
||||
|
@ -105,11 +105,13 @@ class AgentPPO:
|
|||
self.lambda_gae_adv = 0.98 # could be 0.95~0.99, GAE (Generalized Advantage Estimation. ICLR.2016.)
|
||||
self.get_reward_sum = None # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
|
||||
self.trajectory_list = None
|
||||
self.llm_actions = self.load_llm_actions()
|
||||
|
||||
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=False, gpu_id=0, layer_norm=False):
|
||||
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
|
||||
self.action_dim = action_dim
|
||||
self.trajectory_list = list()
|
||||
# choose whether to use gae or not
|
||||
|
||||
self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
|
||||
|
||||
self.cri = self.ClassCri(net_dim, state_dim, action_dim, layer_norm).to(self.device)
|
||||
|
@ -121,32 +123,50 @@ class AgentPPO:
|
|||
self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
|
||||
self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
|
||||
|
||||
# def select_action(self, state):
|
||||
# states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
|
||||
# actions, noises = self.act.get_action(states)
|
||||
# return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
|
||||
|
||||
def select_action(self, state):
|
||||
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
|
||||
actions, noises = self.act.get_action(states)
|
||||
return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
|
||||
action_rl, noise = self.act.get_action(states[0])
|
||||
action_rl = action_rl.detach().cpu().numpy().flatten()
|
||||
noises = noise.detach().cpu().numpy().flatten()
|
||||
# print(f"Action from RL model: {action_rl}")
|
||||
# print(f"Noise: {noise}")
|
||||
# print(f"Expected action dimension: {self.action_dim}")
|
||||
index = self.current_step % len(self.llm_actions)
|
||||
self.current_step += 1
|
||||
action_llm = self.llm_actions[index]
|
||||
action_llm = np.array(action_llm, dtype=np.float32)
|
||||
# print(f"Action from LLM: {action_llm}")
|
||||
action_combined = 0.5 * action_rl + 0.5 * action_llm
|
||||
if action_combined.shape[0] != self.action_dim:
|
||||
raise ValueError("Combined action dimension mismatch. Check the action generation process.")
|
||||
return action_combined, noises
|
||||
|
||||
@staticmethod
|
||||
def get_llm_action(index):
|
||||
def load_llm_actions():
|
||||
with open('data/llm_action.json', 'r') as file:
|
||||
data = json.load(file)
|
||||
data_tensor = torch.tensor(data, dtype=torch.float32)
|
||||
normalized_index = index % len(data_tensor)
|
||||
action = data_tensor[normalized_index].detach().cpu().numpy()
|
||||
return action
|
||||
llm_actions = json.load(file)
|
||||
return llm_actions
|
||||
|
||||
def explore_env(self, env, target_step):
|
||||
state = self.state # sent state to agent and then agent sent state to method
|
||||
trajectory_temp = list()
|
||||
last_done = 0
|
||||
for i in range(target_step):
|
||||
# action = self.get_llm_action(i)
|
||||
# noise = 0
|
||||
action, noise = self.select_action(state)
|
||||
state, next_state, reward, done, = env.step(np.tanh(action)) # make action between -1 & 1
|
||||
action = np.tanh(action) # make action between -1 & 1
|
||||
# print(f"Action at step {i}: {action}")
|
||||
if len(action) < 2:
|
||||
raise ValueError("Action dimension is less than expected. Check the action generation process.")
|
||||
state, next_state, reward, done, = env.step(action)
|
||||
trajectory_temp.append((state, reward, done, action, noise))
|
||||
if done:
|
||||
state = env.reset()
|
||||
self.current_step = 0
|
||||
last_done = i
|
||||
else:
|
||||
state = next_state
|
|
@ -53,8 +53,9 @@ class Battery:
|
|||
|
||||
def step(self, action_battery):
|
||||
energy = action_battery * self.max_charge
|
||||
updated_capacity = max(self.min_soc,
|
||||
min(self.max_soc, (self.current_capacity * self.capacity + energy) / self.capacity))
|
||||
updated_capacity = np.maximum(self.min_soc,
|
||||
np.minimum(self.max_soc,
|
||||
(self.current_capacity * self.capacity + energy) / self.capacity))
|
||||
# if charge, positive, if discharge, negative
|
||||
self.energy_change = (updated_capacity - self.current_capacity) * self.capacity
|
||||
self.current_capacity = updated_capacity # update capacity to current codition
|
||||
|
@ -72,6 +73,7 @@ class Battery:
|
|||
|
||||
class Solar:
|
||||
"""simulate a solar panel"""
|
||||
|
||||
def __init__(self, parameters):
|
||||
self.current_power = None
|
||||
self.base_voltage = parameters['V_b']
|
||||
|
@ -108,6 +110,7 @@ class Solar:
|
|||
|
||||
class Wind:
|
||||
"""simulate a wind turbine"""
|
||||
|
||||
def __init__(self, parameters):
|
||||
self.current_power = None
|
||||
self.cutin_speed = parameters['cutin_speed']
|
||||
|
@ -140,6 +143,7 @@ class Wind:
|
|||
|
||||
class Grid:
|
||||
"""simulate a grid"""
|
||||
|
||||
def __init__(self):
|
||||
self.on = True
|
||||
self.delta = 1
|
||||
|
|
70
plotDRL.py
70
plotDRL.py
|
@ -18,35 +18,39 @@ def plot_optimization_result(datasource, directory): # data source is dataframe
|
|||
plt.autoscale(tight=True)
|
||||
T = np.array([i for i in range(24)])
|
||||
|
||||
# plot step cost in ax[0]
|
||||
# 绘制步长成本 in ax[0]
|
||||
axs[0, 0].cla()
|
||||
axs[0, 0].set_ylabel('Costs')
|
||||
axs[0, 0].set_xlabel('Time(h)')
|
||||
axs[0, 0].bar(T, datasource['step_cost'])
|
||||
# axs[0,0].set_xticks([i for i in range(24)],[i for i in range(1,25)])
|
||||
|
||||
# plot soc and price in ax[1]
|
||||
# 绘制soc和价格 in ax[1]
|
||||
axs[0, 1].cla()
|
||||
# 设置第一个 y 轴
|
||||
axs[0, 1].set_ylabel('Price')
|
||||
axs[0, 1].set_xlabel('Time(h)')
|
||||
line1, = axs[0, 1].plot(T, datasource['price'], drawstyle='steps-mid', label='Price', color='pink')
|
||||
# 创建第二个 y 轴
|
||||
ax2 = axs[0, 1].twinx()
|
||||
ax2.set_ylabel('SOC')
|
||||
line2, = ax2.plot(T, datasource['soc'], drawstyle='steps-mid', label='SOC', color='grey')
|
||||
# 为每个轴分别创建图例
|
||||
lines = [line1, line2]
|
||||
labels = [line.get_label() for line in lines]
|
||||
axs[0, 1].legend(lines, labels, loc='upper right', bbox_to_anchor=(1.4, 1),
|
||||
fontsize=12, frameon=False, labelspacing=0.3)
|
||||
|
||||
axs[0, 1].plot(T, datasource['price'], drawstyle='steps-mid', label='Price', color='pink')
|
||||
axs[0, 1] = axs[0, 1].twinx()
|
||||
|
||||
axs[0, 1].set_ylabel('SOC')
|
||||
axs[0, 1].plot(T, datasource['soc'], drawstyle='steps-mid', label='SOC', color='grey')
|
||||
# axs[0,1].set_xticks([i for i in range(24)],[i for i in range(1,25)])
|
||||
axs[0, 1].legend(loc='upper right', fontsize=12, frameon=False, labelspacing=0.3)
|
||||
|
||||
# plot accumulated generation and consumption in ax[2]
|
||||
# 绘制累计发电量和消耗量 in ax[2]
|
||||
axs[1, 0].cla()
|
||||
axs[1, 0].set_ylabel('Outputs of DGs and Battery')
|
||||
axs[1, 0].set_xlabel('Time(h)')
|
||||
# 处理电池充放电数据
|
||||
battery_positive = np.array(datasource['battery_energy_change'])
|
||||
battery_negative = np.array(datasource['battery_energy_change'])
|
||||
battery_negative = np.minimum(battery_negative, 0) # discharge
|
||||
battery_positive = np.maximum(battery_positive, 0) # charge
|
||||
# deal with power exchange within the figure
|
||||
battery_negative = np.minimum(battery_negative, 0) # discharge
|
||||
# 处理电网进出口数据
|
||||
imported_from_grid = np.array(datasource['grid_import'])
|
||||
exported_2_grid = np.array(datasource['grid_export'])
|
||||
axs[1, 0].bar(T, datasource['gen1'], label='Gen1')
|
||||
|
@ -55,13 +59,14 @@ def plot_optimization_result(datasource, directory): # data source is dataframe
|
|||
axs[1, 0].bar(T, -battery_positive, color='blue', hatch='/', label='ESS charge')
|
||||
axs[1, 0].bar(T, -battery_negative, hatch='/', label='ESS discharge',
|
||||
bottom=datasource['gen3'] + datasource['gen2'] + datasource['gen1'])
|
||||
# import as generate
|
||||
# 生成即进口
|
||||
axs[1, 0].bar(T, imported_from_grid, label='Grid import',
|
||||
bottom=-battery_negative + datasource['gen3'] + datasource['gen2'] + datasource['gen1'])
|
||||
# export as load
|
||||
# 负载即出口
|
||||
axs[1, 0].bar(T, -exported_2_grid, label='Grid export', bottom=-battery_positive)
|
||||
# 绘制净负载曲线
|
||||
axs[1, 0].plot(T, datasource['netload'], label='Netload', drawstyle='steps-mid', alpha=0.7)
|
||||
axs[1, 0].legend(loc='upper right', fontsize=12, frameon=False, labelspacing=0.3)
|
||||
axs[1, 0].legend(loc='upper right', bbox_to_anchor=(1.4, 1), fontsize=12, frameon=False, labelspacing=0.3)
|
||||
# axs[1,0].set_xticks([i for i in range(24)],[i for i in range(1,25)])
|
||||
|
||||
fig.savefig(f"{directory}/optimization_information.svg", format='svg', dpi=600, bbox_inches='tight')
|
||||
|
@ -72,38 +77,40 @@ def plot_evaluation_information(datasource, directory):
|
|||
sns.set_theme(style='whitegrid')
|
||||
with open(datasource, 'rb') as tf:
|
||||
test_data = pickle.load(tf)
|
||||
# plot unbalance, and reward of each step by bar figures
|
||||
# 用条形图表示每一步的不平衡和奖励
|
||||
plt.rcParams["figure.figsize"] = (16, 9)
|
||||
fig, axs = plt.subplots(2, 2)
|
||||
plt.subplots_adjust(wspace=0.7, hspace=0.3)
|
||||
plt.autoscale(tight=True)
|
||||
|
||||
# prepare data for evaluation the environment here
|
||||
# 为评估环境准备数据
|
||||
eval_data = pd.DataFrame(test_data['system_info'])
|
||||
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2',
|
||||
'gen3', 'temperature', 'irradiance', 'unbalance', 'operation_cost']
|
||||
|
||||
# plot unbalance in axs[0]
|
||||
# 绘制不平衡度 in axs[0]
|
||||
axs[0, 0].cla()
|
||||
axs[0, 0].set_ylabel('Unbalance of Generation and Load')
|
||||
axs[0, 0].bar(eval_data['time_step'], eval_data['unbalance'], label='Exchange with Grid', width=0.4)
|
||||
axs[0, 0].bar(eval_data['time_step'] + 0.4, eval_data['netload'], label='Netload', width=0.4)
|
||||
axs[0, 0].legend(loc='upper right', fontsize=12, frameon=False, labelspacing=0.5)
|
||||
axs[0, 0].legend(loc='upper right', bbox_to_anchor=(1.4, 1), fontsize=12, frameon=False, labelspacing=0.5)
|
||||
# axs[0,0].set_xticks([i for i in range(24)],[i for i in range(1,25)])
|
||||
|
||||
# plot energy charge/discharge with price in ax[1]
|
||||
# 绘制能源充/放电与价格关系图 in ax[1]
|
||||
axs[0, 1].cla()
|
||||
axs[0, 1].set_ylabel('Price')
|
||||
axs[0, 1].set_xlabel('Time Steps')
|
||||
line1, = axs[0, 1].plot(eval_data['time_step'], eval_data['price'], drawstyle='steps-mid', label='Price',
|
||||
color='pink')
|
||||
ax2 = axs[0, 1].twinx()
|
||||
ax2.set_ylabel('SOC')
|
||||
line2, = ax2.plot(eval_data['time_step'], eval_data['soc'], drawstyle='steps-mid', label='SOC', color='grey')
|
||||
lines = [line1, line2]
|
||||
labels = [line.get_label() for line in lines]
|
||||
axs[0, 1].legend(lines, labels, loc='upper right', bbox_to_anchor=(1.4, 1),
|
||||
fontsize=12, frameon=False, labelspacing=0.3)
|
||||
|
||||
axs[0, 1].plot(eval_data['time_step'], eval_data['price'], drawstyle='steps-mid', label='Price', color='pink')
|
||||
axs[0, 1] = axs[0, 1].twinx()
|
||||
axs[0, 1].set_ylabel('SOC')
|
||||
# axs[0,1].set_xticks([i for i in range(24)], [i for i in range(1, 25)])
|
||||
axs[0, 1].plot(eval_data['time_step'], eval_data['soc'], drawstyle='steps-mid', label='SOC', color='grey')
|
||||
axs[0, 1].legend(loc='upper right', fontsize=12, frameon=False, labelspacing=0.3)
|
||||
|
||||
# plot generation and netload in ax[2]
|
||||
# 绘制发电量和负载量 in ax[2]
|
||||
axs[1, 0].cla()
|
||||
axs[1, 0].set_ylabel('Outputs of Units and Netload (kWh)')
|
||||
# axs[1,0].set_xticks([i for i in range(24)], [i for i in range(1, 25)])
|
||||
|
@ -112,7 +119,6 @@ def plot_evaluation_information(datasource, directory):
|
|||
battery_positive = np.maximum(battery_positive, 0) # charge
|
||||
battery_negative = np.minimum(battery_negative, 0) # discharge
|
||||
|
||||
# deal with power exchange within the figure
|
||||
imported_from_grid = np.minimum(np.array(eval_data['unbalance']), 0)
|
||||
exported_2_grid = np.maximum(np.array(eval_data['unbalance']), 0)
|
||||
x = eval_data['time_step']
|
||||
|
@ -127,9 +133,9 @@ def plot_evaluation_information(datasource, directory):
|
|||
axs[1, 0].bar(x, -exported_2_grid, label='Grid export', bottom=-battery_positive)
|
||||
|
||||
axs[1, 0].plot(x, eval_data['netload'], drawstyle='steps-mid', label='Netload')
|
||||
axs[1, 0].legend(loc='upper right', fontsize=12, frameon=False, labelspacing=0.3)
|
||||
axs[1, 0].legend(loc='upper right', bbox_to_anchor=(1.4, 1), fontsize=12, frameon=False, labelspacing=0.3)
|
||||
|
||||
# plot reward in axs[3]
|
||||
# 绘制奖励 in axs[3]
|
||||
axs[1, 1].cla()
|
||||
axs[1, 1].set_ylabel('Costs')
|
||||
axs[1, 1].bar(eval_data['time_step'], eval_data['operation_cost'])
|
||||
|
|
Loading…
Reference in New Issue