nothing

2024-06-25 14:18:29 +08:00 · 2024-06-25 14:18:29 +08:00 · 0fd80456ac
parent 9d0b220b54
commit 0fd80456ac
7 changed files with 4172 additions and 4191 deletions
--- a/AgentPPO/DRL__plots/Evoluation
+++ b/AgentPPO/DRL__plots/Evoluation
--- a/AgentPPO/DRL__plots/optimization_information.svg
+++ b/AgentPPO/DRL__plots/optimization_information.svg
--- a/agent.py
+++ b/agent.py
@ -19,7 +19,7 @@ class AgentBase:
        self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
    def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, _if_per_or_gae=False, gpu_id=0):
-        # explict call self.init() for multiprocessing
+        # 显式调用self.init()进行多进程
        self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
        self.action_dim = action_dim
@ -81,7 +81,7 @@ class AgentBase:
 class AgentDDPG(AgentBase):
    def __init__(self):
        super().__init__()
-        self.explore_noise = 0.1  # explore noise of action
+        self.explore_noise = 0.1
        self.if_use_cri_target = self.if_use_act_target = True
        self.ClassCri = Critic
        self.ClassAct = Actor
@ -163,7 +163,7 @@ class AgentSAC(AgentBase):
        super().init(net_dim, state_dim, action_dim, learning_rate, _if_use_per, gpu_id)
        self.alpha_log = torch.tensor((-np.log(action_dim) * np.e,), dtype=torch.float32,
-                                      requires_grad=True, device=self.device)  # trainable parameter
+                                      requires_grad=True, device=self.device)
        self.alpha_optim = torch.optim.Adam((self.alpha_log,), lr=learning_rate)
        self.target_entropy = np.log(action_dim)
@ -250,10 +250,9 @@ class AgentPPO(AgentBase):
        with torch.no_grad():
            buf_len = buffer[0].shape[0]
            buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
            # (ten_state, ten_action, ten_noise, ten_reward, ten_mask) = buffer
            '''get buf_r_sum, buf_logprob'''
-            bs = 2 ** 10  # set a smaller 'BatchSize' when out of GPU memory.
+            bs = 2 ** 10
            buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
            buf_value = torch.cat(buf_value, dim=0)
            buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
@ -273,7 +272,7 @@ class AgentPPO(AgentBase):
            logprob = buf_logprob[indices]
            advantage = buf_advantage[indices]
-            new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action)  # it is obj_actor
+            new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action)  # it's obj_actor
            ratio = (new_logprob - logprob.detach()).exp()
            surrogate1 = advantage * ratio
            surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
@ -309,6 +308,6 @@ class AgentPPO(AgentBase):
        for i in range(buf_len - 1, -1, -1):
            buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
            pre_r_sum = buf_r_sum[i]
-            buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i])  # fix a bug here
+            buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i])
            pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
        return buf_r_sum, buf_advantage
--- a/environment.py
+++ b/environment.py
@ -28,8 +28,8 @@ class ESSEnv(gym.Env):
        self.dg_parameters = kwargs.get('dg_parameters', dg_parameters)
        self.solar_parameters = kwargs.get('solar_parameters', solar_parameters)
        self.wind_parameters = kwargs.get('wind_parameters', wind_parameters)
-        self.penalty_coefficient = 50  # control soft penalty constrain
+        self.penalty_coefficient = 50  # 约束惩罚系数
-        self.sell_coefficient = 0.5  # control sell benefits
+        self.sell_coefficient = 0.5  # 售出利润系数
        self.grid = Grid()
        self.battery = Battery(self.battery_parameters)
@ -81,12 +81,12 @@ class ESSEnv(gym.Env):
        return obs
    def step(self, action):  # state transition: current_obs->take_action->get_reward->get_finish->next_obs
-        # put action into each component
+        # 在每个组件中添加动作
        current_obs = self._build_state()
        temperature = current_obs[7]
        irradiance = current_obs[8]
        wind_speed = current_obs[9]
-        self.battery.step(action[0])  # execute the state-transition part, battery.current_capacity also changed
+        self.battery.step(action[0])  # 执行状态转换，电池当前容量也改变
        self.dg1.step(action[1])
        self.dg2.step(action[2])
        self.dg3.step(action[3])
@ -106,16 +106,16 @@ class ESSEnv(gym.Env):
        buy_cost = 0
        self.excess = 0
        self.shedding = 0
-        if unbalance >= 0:  # now in excess condition
+        if unbalance >= 0:  # 现在过剩
            if unbalance <= self.grid.exchange_ability:
                # sell money to grid is little [0.029,0.1]
                sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient
            else:
                sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient
-                # real unbalance that grid could not meet
+                # real unbalance：电网也无法满足
                self.excess = unbalance - self.grid.exchange_ability
                excess_penalty = self.excess * self.penalty_coefficient
-        else:  # unbalance <0, its load shedding model, deficient penalty is used
+        else:  # unbalance <0, 采用缺少惩罚
            if abs(unbalance) <= self.grid.exchange_ability:
                buy_cost = self.grid.get_cost(price, abs(unbalance))
            else:
@ -164,7 +164,7 @@ class ESSEnv(gym.Env):
        temperature = temperature_df['t2m'].to_numpy(dtype=float)
        wind = wind_df['wind_speed'].to_numpy(dtype=float)
-        '''redesign the magnitude for price and amount of generation as well as demand'''
+        '''重新设计价格和发电量以及需求的大小'''
        def process_elements(elements, transform_function, add_function):
            for element in elements:
                transformed_element = transform_function(element)
--- a/module.py
+++ b/module.py
@ -2,7 +2,7 @@ import numpy as np
 class DG:
-    """simulate a simple diesel generator"""
+    """simulate a diesel generator"""
    def __init__(self, parameters):
        self.current_output = None
@ -37,7 +37,7 @@ class DG:
 class Battery:
-    """simulate a simple battery"""
+    """simulate a battery"""
    def __init__(self, parameters):
        self.current_capacity = None
@ -71,6 +71,7 @@ class Battery:
 class Solar:
    """simulate a solar panel"""
    def __init__(self, parameters):
        self.current_power = None
        self.base_voltage = parameters['V_b']
@ -106,6 +107,7 @@ class Solar:
 class Wind:
    """simulate a wind turbine"""
    def __init__(self, parameters):
        self.current_power = None
        self.cutin_speed = parameters['cutin_speed']
@ -137,6 +139,7 @@ class Wind:
 class Grid:
    """simulate a grid"""
    def __init__(self):
        self.on = True
        self.delta = 1
@ -158,15 +161,3 @@ class Grid:
        # current_day_price = self.price[24 * self.day:24 * self.day + self.time]
        # result.extend(current_day_price)
        return result
    # def retrive_past_price(self):
    #     result = []
    #     if self.day < 1:
    #         past_price = self.past_price
    #     else:
    #         past_price = self.price[24 * (self.day - 1):24 * self.day]
    #     for item in past_price[(self.time - 24)::]:
    #         result.append(item)
    #     for item in self.price[24 * self.day:(24 * self.day + self.time)]:
    #         result.append(item)
    #     return result
--- a/plotDRL.py
+++ b/plotDRL.py
@ -17,6 +17,7 @@ def plot_optimization_result(datasource, directory):  # data source is dataframe
    plt.subplots_adjust(wspace=0.7, hspace=0.3)
    plt.autoscale(tight=True)
    T = np.array([i for i in range(24)])
    # plot step cost in ax[0]
    axs[0, 0].cla()
    axs[0, 0].set_ylabel('Costs')
--- a/tools.py
+++ b/tools.py
@ -116,13 +116,11 @@ class Arguments:
    """revise here for our own purpose"""
    def __init__(self, agent=None, env=None):
-        self.agent = agent  # Deep Reinforcement Learning algorithm
+        self.agent = agent
-        self.env = env  # the environment for training
+        self.env = env
-        # self.plot_shadow_on = False  # control do we need to plot all shadow figures
+        self.cwd = None  # current work directory, None means set automatically
        self.cwd = None  # current work directory. None means set automatically
        self.if_remove = False  # remove the cwd folder? (True, False, None:ask me)
        self.visible_gpu = '0,1,2,3'  # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
        # self.worker_num = 2  # rollout workers number pre GPU (adjust it to get high GPU usage)
        self.num_threads = 32  # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
        '''Arguments for training'''
@ -171,7 +169,7 @@ class Arguments:
        torch.set_num_threads(self.num_threads)
        torch.set_default_dtype(torch.float32)
-        os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)  # control how many GPU is used 　
+        os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
 def test_one_episode(env, act, device):