train

2024-06-26 15:44:30 +08:00 · 2024-06-26 15:44:30 +08:00 · f26024c8d4
parent f66de9fb54
commit f26024c8d4
9 changed files with 2756 additions and 7116 deletions
--- a/AgentPPO/DRL__plots/Evoluation
+++ b/AgentPPO/DRL__plots/Evoluation
--- a/AgentPPO/DRL__plots/evaluation_information.svg
+++ b/AgentPPO/DRL__plots/evaluation_information.svg
--- a/AgentPPO/DRL__plots/optimization_information.svg
+++ b/AgentPPO/DRL__plots/optimization_information.svg
--- a/AgentPPO/actor.pth
+++ b/AgentPPO/actor.pth
--- a/AgentPPO/test_data.pkl
+++ b/AgentPPO/test_data.pkl
--- a/PPO.py
+++ b/PPO.py
@ -31,7 +31,6 @@ class ActorPPO(nn.Module):
            if isinstance(layer, nn.Linear):
                nn.init.orthogonal_(layer.weight, 1.0)
                nn.init.constant_(layer.bias, 0.0)
-
        self.net.apply(init_weights)

    def forward(self, state):
@ -331,8 +330,8 @@ if __name__ == '__main__':
        buffer = list()
        '''init training parameters'''
        num_episode = args.num_episode
-        # args.train = False
-        # args.save_network = False
+        args.train = False
+        args.save_network = False
        # args.test_network = False
        # args.save_test_data = False
        # args.compare_with_gurobi = False
--- a/PPO_delete.py
+++ b/PPO_delete.py
@ -77,7 +77,6 @@ class CriticAdv(nn.Module):
            if isinstance(layer, nn.Linear):
                nn.init.orthogonal_(layer.weight, 1.0)
                nn.init.constant_(layer.bias, 0.0)
-
        self.net.apply(init_weights)

    def forward(self, state):
@ -91,6 +90,7 @@ class AgentPPO:
        self.device = None
        self.action_dim = None
        self.get_obj_critic = None
+        self.current_step = 0

        self.criterion = torch.nn.SmoothL1Loss()
        self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
@ -105,11 +105,13 @@ class AgentPPO:
        self.lambda_gae_adv = 0.98  # could be 0.95~0.99, GAE (Generalized Advantage Estimation. ICLR.2016.)
        self.get_reward_sum = None  # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
        self.trajectory_list = None
+        self.llm_actions = self.load_llm_actions()

    def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=False, gpu_id=0, layer_norm=False):
        self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
+        self.action_dim = action_dim
        self.trajectory_list = list()
-        # choose whether to use gae or not
+
        self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw

        self.cri = self.ClassCri(net_dim, state_dim, action_dim, layer_norm).to(self.device)
@ -121,32 +123,50 @@ class AgentPPO:
        self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
        self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri

+    # def select_action(self, state):
+    #     states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
+    #     actions, noises = self.act.get_action(states)
+    #     return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
+
    def select_action(self, state):
        states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
-        actions, noises = self.act.get_action(states)
-        return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
+        action_rl, noise = self.act.get_action(states[0])
+        action_rl = action_rl.detach().cpu().numpy().flatten()
+        noises = noise.detach().cpu().numpy().flatten()
+        # print(f"Action from RL model: {action_rl}")
+        # print(f"Noise: {noise}")
+        # print(f"Expected action dimension: {self.action_dim}")
+        index = self.current_step % len(self.llm_actions)
+        self.current_step += 1
+        action_llm = self.llm_actions[index]
+        action_llm = np.array(action_llm, dtype=np.float32)
+        # print(f"Action from LLM: {action_llm}")
+        action_combined = 0.5 * action_rl + 0.5 * action_llm
+        if action_combined.shape[0] != self.action_dim:
+            raise ValueError("Combined action dimension mismatch. Check the action generation process.")
+        return action_combined, noises

    @staticmethod
-    def get_llm_action(index):
+    def load_llm_actions():
        with open('data/llm_action.json', 'r') as file:
-            data = json.load(file)
-        data_tensor = torch.tensor(data, dtype=torch.float32)
-        normalized_index = index % len(data_tensor)
-        action = data_tensor[normalized_index].detach().cpu().numpy()
-        return action
+            llm_actions = json.load(file)
+        return llm_actions

    def explore_env(self, env, target_step):
        state = self.state  # sent state to agent and then agent sent state to method
        trajectory_temp = list()
        last_done = 0
        for i in range(target_step):
-            # action = self.get_llm_action(i)
-            # noise = 0
            action, noise = self.select_action(state)
-            state, next_state, reward, done, = env.step(np.tanh(action))  # make action between -1 & 1
+            action = np.tanh(action)  # make action between -1 & 1
+            # print(f"Action at step {i}: {action}")
+            if len(action) < 2:
+                raise ValueError("Action dimension is less than expected. Check the action generation process.")
+            state, next_state, reward, done, = env.step(action)
            trajectory_temp.append((state, reward, done, action, noise))
            if done:
                state = env.reset()
+                self.current_step = 0
                last_done = i
            else:
                state = next_state
--- a/module.py
+++ b/module.py
@ -53,8 +53,9 @@ class Battery:

    def step(self, action_battery):
        energy = action_battery * self.max_charge
-        updated_capacity = max(self.min_soc,
-                               min(self.max_soc, (self.current_capacity * self.capacity + energy) / self.capacity))
+        updated_capacity = np.maximum(self.min_soc,
+                                      np.minimum(self.max_soc,
+                                                 (self.current_capacity * self.capacity + energy) / self.capacity))
        # if charge, positive, if discharge, negative
        self.energy_change = (updated_capacity - self.current_capacity) * self.capacity
        self.current_capacity = updated_capacity  # update capacity to current codition
@ -72,6 +73,7 @@ class Battery:

 class Solar:
    """simulate a solar panel"""
+
    def __init__(self, parameters):
        self.current_power = None
        self.base_voltage = parameters['V_b']
@ -108,6 +110,7 @@ class Solar:

 class Wind:
    """simulate a wind turbine"""
+
    def __init__(self, parameters):
        self.current_power = None
        self.cutin_speed = parameters['cutin_speed']
@ -140,6 +143,7 @@ class Wind:

 class Grid:
    """simulate a grid"""
+
    def __init__(self):
        self.on = True
        self.delta = 1
--- a/plotDRL.py
+++ b/plotDRL.py
@ -18,35 +18,39 @@ def plot_optimization_result(datasource, directory):  # data source is dataframe
    plt.autoscale(tight=True)
    T = np.array([i for i in range(24)])

-    # plot step cost in ax[0]
+    # 绘制步长成本 in ax[0]
    axs[0, 0].cla()
    axs[0, 0].set_ylabel('Costs')
    axs[0, 0].set_xlabel('Time(h)')
    axs[0, 0].bar(T, datasource['step_cost'])
    # axs[0,0].set_xticks([i for i in range(24)],[i for i in range(1,25)])

-    # plot soc and price in ax[1]
+    # 绘制soc和价格 in ax[1]
    axs[0, 1].cla()
+    # 设置第一个 y 轴
    axs[0, 1].set_ylabel('Price')
    axs[0, 1].set_xlabel('Time(h)')
+    line1, = axs[0, 1].plot(T, datasource['price'], drawstyle='steps-mid', label='Price', color='pink')
+    # 创建第二个 y 轴
+    ax2 = axs[0, 1].twinx()
+    ax2.set_ylabel('SOC')
+    line2, = ax2.plot(T, datasource['soc'], drawstyle='steps-mid', label='SOC', color='grey')
+    # 为每个轴分别创建图例
+    lines = [line1, line2]
+    labels = [line.get_label() for line in lines]
+    axs[0, 1].legend(lines, labels, loc='upper right', bbox_to_anchor=(1.4, 1),
+                     fontsize=12, frameon=False, labelspacing=0.3)

-    axs[0, 1].plot(T, datasource['price'], drawstyle='steps-mid', label='Price', color='pink')
-    axs[0, 1] = axs[0, 1].twinx()
-
-    axs[0, 1].set_ylabel('SOC')
-    axs[0, 1].plot(T, datasource['soc'], drawstyle='steps-mid', label='SOC', color='grey')
-    # axs[0,1].set_xticks([i for i in range(24)],[i for i in range(1,25)])
-    axs[0, 1].legend(loc='upper right', fontsize=12, frameon=False, labelspacing=0.3)
-
-    # plot accumulated generation and consumption in ax[2]
+    # 绘制累计发电量和消耗量 in ax[2]
    axs[1, 0].cla()
    axs[1, 0].set_ylabel('Outputs of DGs and Battery')
    axs[1, 0].set_xlabel('Time(h)')
+    # 处理电池充放电数据
    battery_positive = np.array(datasource['battery_energy_change'])
    battery_negative = np.array(datasource['battery_energy_change'])
-    battery_negative = np.minimum(battery_negative, 0)  # discharge
    battery_positive = np.maximum(battery_positive, 0)  # charge
-    # deal with power exchange within the figure
+    battery_negative = np.minimum(battery_negative, 0)  # discharge
+    # 处理电网进出口数据
    imported_from_grid = np.array(datasource['grid_import'])
    exported_2_grid = np.array(datasource['grid_export'])
    axs[1, 0].bar(T, datasource['gen1'], label='Gen1')
@ -55,13 +59,14 @@ def plot_optimization_result(datasource, directory):  # data source is dataframe
    axs[1, 0].bar(T, -battery_positive, color='blue', hatch='/', label='ESS charge')
    axs[1, 0].bar(T, -battery_negative, hatch='/', label='ESS discharge',
                  bottom=datasource['gen3'] + datasource['gen2'] + datasource['gen1'])
-    # import as generate
+    # 生成即进口
    axs[1, 0].bar(T, imported_from_grid, label='Grid import',
                  bottom=-battery_negative + datasource['gen3'] + datasource['gen2'] + datasource['gen1'])
-    # export as load
+    # 负载即出口
    axs[1, 0].bar(T, -exported_2_grid, label='Grid export', bottom=-battery_positive)
+    # 绘制净负载曲线
    axs[1, 0].plot(T, datasource['netload'], label='Netload', drawstyle='steps-mid', alpha=0.7)
-    axs[1, 0].legend(loc='upper right', fontsize=12, frameon=False, labelspacing=0.3)
+    axs[1, 0].legend(loc='upper right', bbox_to_anchor=(1.4, 1), fontsize=12, frameon=False, labelspacing=0.3)
    # axs[1,0].set_xticks([i for i in range(24)],[i for i in range(1,25)])

    fig.savefig(f"{directory}/optimization_information.svg", format='svg', dpi=600, bbox_inches='tight')
@ -72,38 +77,40 @@ def plot_evaluation_information(datasource, directory):
    sns.set_theme(style='whitegrid')
    with open(datasource, 'rb') as tf:
        test_data = pickle.load(tf)
-    # plot unbalance, and reward of each step by bar figures
+    # 用条形图表示每一步的不平衡和奖励
    plt.rcParams["figure.figsize"] = (16, 9)
    fig, axs = plt.subplots(2, 2)
    plt.subplots_adjust(wspace=0.7, hspace=0.3)
    plt.autoscale(tight=True)

-    # prepare data for evaluation the environment here
+    # 为评估环境准备数据
    eval_data = pd.DataFrame(test_data['system_info'])
    eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2',
                         'gen3', 'temperature', 'irradiance', 'unbalance', 'operation_cost']

-    # plot unbalance in axs[0]
+    # 绘制不平衡度 in axs[0]
    axs[0, 0].cla()
    axs[0, 0].set_ylabel('Unbalance of Generation and Load')
    axs[0, 0].bar(eval_data['time_step'], eval_data['unbalance'], label='Exchange with Grid', width=0.4)
    axs[0, 0].bar(eval_data['time_step'] + 0.4, eval_data['netload'], label='Netload', width=0.4)
-    axs[0, 0].legend(loc='upper right', fontsize=12, frameon=False, labelspacing=0.5)
+    axs[0, 0].legend(loc='upper right', bbox_to_anchor=(1.4, 1), fontsize=12, frameon=False, labelspacing=0.5)
    # axs[0,0].set_xticks([i for i in range(24)],[i for i in range(1,25)])

-    # plot energy charge/discharge with price in ax[1]
+    # 绘制能源充/放电与价格关系图 in ax[1]
    axs[0, 1].cla()
    axs[0, 1].set_ylabel('Price')
    axs[0, 1].set_xlabel('Time Steps')
+    line1, = axs[0, 1].plot(eval_data['time_step'], eval_data['price'], drawstyle='steps-mid', label='Price',
+                            color='pink')
+    ax2 = axs[0, 1].twinx()
+    ax2.set_ylabel('SOC')
+    line2, = ax2.plot(eval_data['time_step'], eval_data['soc'], drawstyle='steps-mid', label='SOC', color='grey')
+    lines = [line1, line2]
+    labels = [line.get_label() for line in lines]
+    axs[0, 1].legend(lines, labels, loc='upper right', bbox_to_anchor=(1.4, 1),
+                     fontsize=12, frameon=False, labelspacing=0.3)

-    axs[0, 1].plot(eval_data['time_step'], eval_data['price'], drawstyle='steps-mid', label='Price', color='pink')
-    axs[0, 1] = axs[0, 1].twinx()
-    axs[0, 1].set_ylabel('SOC')
-    # axs[0,1].set_xticks([i for i in range(24)], [i for i in range(1, 25)])
-    axs[0, 1].plot(eval_data['time_step'], eval_data['soc'], drawstyle='steps-mid', label='SOC', color='grey')
-    axs[0, 1].legend(loc='upper right', fontsize=12, frameon=False, labelspacing=0.3)
-
-    # plot generation and netload in ax[2]
+    # 绘制发电量和负载量 in ax[2]
    axs[1, 0].cla()
    axs[1, 0].set_ylabel('Outputs of Units and Netload (kWh)')
    # axs[1,0].set_xticks([i for i in range(24)], [i for i in range(1, 25)])
@ -112,7 +119,6 @@ def plot_evaluation_information(datasource, directory):
    battery_positive = np.maximum(battery_positive, 0)  # charge
    battery_negative = np.minimum(battery_negative, 0)  # discharge

-    # deal with power exchange within the figure 
    imported_from_grid = np.minimum(np.array(eval_data['unbalance']), 0)
    exported_2_grid = np.maximum(np.array(eval_data['unbalance']), 0)
    x = eval_data['time_step']
@ -127,9 +133,9 @@ def plot_evaluation_information(datasource, directory):
    axs[1, 0].bar(x, -exported_2_grid, label='Grid export', bottom=-battery_positive)

    axs[1, 0].plot(x, eval_data['netload'], drawstyle='steps-mid', label='Netload')
-    axs[1, 0].legend(loc='upper right', fontsize=12, frameon=False, labelspacing=0.3)
+    axs[1, 0].legend(loc='upper right', bbox_to_anchor=(1.4, 1), fontsize=12, frameon=False, labelspacing=0.3)

-    # plot reward in axs[3]
+    # 绘制奖励 in axs[3]
    axs[1, 1].cla()
    axs[1, 1].set_ylabel('Costs')
    axs[1, 1].bar(eval_data['time_step'], eval_data['operation_cost'])