From 53d3ac9ca828eca346ce980fedd887acb3007219 Mon Sep 17 00:00:00 2001
From: chenxiaodong <chenxiaodong@qibebt.ac.cn>
Date: Wed, 19 Jun 2024 09:56:11 +0800
Subject: [PATCH] edit the layer normalization

---
 .idea/DRL-for-Energy-Systems.iml |  2 +-
 .idea/deployment.xml             | 22 +++++++++++
 .idea/misc.xml                   |  2 +-
 PPO.py                           | 64 ++++++++++++++++----------------
 tools.py                         |  1 -
 5 files changed, 57 insertions(+), 34 deletions(-)
 create mode 100644 .idea/deployment.xml
diff --git a/.idea/DRL-for-Energy-Systems.iml b/.idea/DRL-for-Energy-Systems.iml
index 0a5fd4b..ab54ddc 100644
--- a/.idea/DRL-for-Energy-Systems.iml
+++ b/.idea/DRL-for-Energy-Systems.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="rl-microgrid" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Remote Python 3.9.18 (sftp://chenxd@124.16.151.196:22121/home/chenxd/miniconda3/envs/grid/bin/python3.9)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyDocumentationSettings">
diff --git a/.idea/deployment.xml b/.idea/deployment.xml
new file mode 100644
index 0000000..9af79fb
--- /dev/null
+++ b/.idea/deployment.xml
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="PublishConfigData" autoUpload="Always" serverName="chenxd@124.16.151.196:22121 password" remoteFilesAllowedToDisappearOnAutoupload="false">
+    <serverData>
+      <paths name="chenxd@124.16.151.196:22121 password">
+        <serverdata>
+          <mappings>
+            <mapping deploy="/tmp/pycharm_project_78" local="$PROJECT_DIR$" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@124.16.151.196:10531 password">
+        <serverdata>
+          <mappings>
+            <mapping deploy="/mnt/chenxd/DRL-for-Energy-Systems" local="$PROJECT_DIR$" />
+          </mappings>
+        </serverdata>
+      </paths>
+    </serverData>
+    <option name="myAutoUpload" value="ALWAYS" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 94f4964..9aaae3a 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,5 +3,5 @@
   <component name="Black">
     <option name="sdkName" value="rl-microgrid" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="rl-microgrid" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.9.18 (sftp://chenxd@124.16.151.196:22121/home/chenxd/miniconda3/envs/grid/bin/python3.9)" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/PPO.py b/PPO.py
index 35d8484..842b9d6 100644
--- a/PPO.py
+++ b/PPO.py
@@ -12,33 +12,34 @@ os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
 script_name = os.path.basename(__file__)
 
 
-# after adding layer normalization, it doesn't work
 class ActorPPO(nn.Module):
     def __init__(self, mid_dim, state_dim, action_dim, layer_norm=False):
         super().__init__()
+        self.layer_norm = layer_norm
         self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
                                  nn.Linear(mid_dim, mid_dim), nn.ReLU(),
                                  nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
-                                 nn.Linear(mid_dim, action_dim), )
-
+                                 nn.Linear(mid_dim, action_dim))
         # the logarithm (log) of standard deviation (std) of action, it is a trainable parameter
         self.a_logstd = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True)
         self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi))
-        if layer_norm:
-            self.layer_norm(self.net)
 
-    @staticmethod
-    def layer_norm(layer, std=1.0, bias_const=0.0):
-        for i in layer:
-            if hasattr(i, 'weight'):
-                torch.nn.init.orthogonal_(i.weight, std)
-                torch.nn.init.constant_(i.bias, bias_const)
+        if self.layer_norm:
+            self.apply_layer_norm()
+
+    def apply_layer_norm(self):
+        def init_weights(layer):
+            if isinstance(layer, nn.Linear):
+                nn.init.orthogonal_(layer.weight, 1.0)
+                nn.init.constant_(layer.bias, 0.0)
+
+        self.net.apply(init_weights)
 
     def forward(self, state):
-        return self.net(state).tanh()  # action.tanh() # in this way limit the data output of action
+        return self.net(state).tanh()  # action.tanh() limit the data output of action
 
     def get_action(self, state):
-        a_avg = self.net(state)  # too big for the action
+        a_avg = self.forward(state)  # too big for the action
         a_std = self.a_logstd.exp()
 
         noise = torch.randn_like(a_avg)
@@ -46,7 +47,7 @@ class ActorPPO(nn.Module):
         return action, noise
 
     def get_logprob_entropy(self, state, action):
-        a_avg = self.net(state)
+        a_avg = self.forward(state)
         a_std = self.a_logstd.exp()
 
         delta = ((a_avg - action) / a_std).pow(2) * 0.5
@@ -63,19 +64,21 @@ class ActorPPO(nn.Module):
 class CriticAdv(nn.Module):
     def __init__(self, mid_dim, state_dim, _action_dim, layer_norm=False):
         super().__init__()
+        self.layer_norm = layer_norm
         self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
                                  nn.Linear(mid_dim, mid_dim), nn.ReLU(),
                                  nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
                                  nn.Linear(mid_dim, 1))
-        if layer_norm:
-            self.layer_norm(self.net, std=1.0)
+        if self.layer_norm:
+            self.apply_layer_norm()
 
-    @staticmethod
-    def layer_norm(layer, std=1.0, bias_const=0.0):
-        for i in layer:
-            if hasattr(i, 'weight'):
-                torch.nn.init.orthogonal_(i.weight, std)
-                torch.nn.init.constant_(i.bias, bias_const)
+    def apply_layer_norm(self):
+        def init_weights(layer):
+            if isinstance(layer, nn.Linear):
+                nn.init.orthogonal_(layer.weight, 1.0)
+                nn.init.constant_(layer.bias, 0.0)
+
+        self.net.apply(init_weights)
 
     def forward(self, state):
         return self.net(state)  # Advantage value
@@ -116,7 +119,6 @@ class AgentPPO:
 
         self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
         self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
-        del self.ClassCri, self.ClassAct  # why del self.ClassCri and self.ClassAct here, to save memory?
 
     def select_action(self, state):
         states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
@@ -129,8 +131,8 @@ class AgentPPO:
         last_done = 0
         for i in range(target_step):
             action, noise = self.select_action(state)
-            state, next_state, reward, done, = env.step(
-                np.tanh(action))  # the step of cut action is finally organized into the environment.
+            # the step of cut action is finally organized into the environment
+            state, next_state, reward, done, = env.step(np.tanh(action))
             trajectory_temp.append((state, reward, done, action, noise))
             if done:
                 state = env.reset()
@@ -140,8 +142,8 @@ class AgentPPO:
         self.state = state
 
         '''splice list'''
-        trajectory_list = self.trajectory_list + trajectory_temp[
-                                                 :last_done + 1]  # store 0 trajectory information to the list
+        # store 0 trajectory information to list
+        trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1]
         self.trajectory_list = trajectory_temp[last_done:]
         return trajectory_list
 
@@ -149,12 +151,12 @@ class AgentPPO:
         """put data extract and update network together"""
         with torch.no_grad():
             buf_len = buffer[0].shape[0]
-            buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in
-                                                                      buffer]  # decompose buffer data
+            # decompose buffer data
+            buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
 
             '''get buf_r_sum, buf_logprob'''
             bs = 4096  # set a smaller 'BatchSize' when out of GPU memory: 1024, could change to 4096
-            buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]  #
+            buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
             buf_value = torch.cat(buf_value, dim=0)
             buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
 
@@ -317,7 +319,7 @@ if __name__ == '__main__':
         agent = args.agent
         env = args.env
         agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
-                   args.if_per_or_gae)
+                   args.if_per_or_gae, layer_norm=True)
 
         cwd = args.cwd
         gamma = args.gamma
diff --git a/tools.py b/tools.py
index 824e9c7..c3c303f 100644
--- a/tools.py
+++ b/tools.py
@@ -49,7 +49,6 @@ def optimization_base_result(env, month, day, initial_soc):
     # 设置系统变量
     on_off = m.addVars(NUM_GEN, period, vtype=GRB.BINARY, name='on_off')
     gen_output = m.addVars(NUM_GEN, period, vtype=GRB.CONTINUOUS, name='output')
-    pv = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, name='pv')
     # 设置充放电约束
     battery_energy_change = m.addVars(period, vtype=GRB.CONTINUOUS, lb=env.battery.max_discharge,
                                       ub=env.battery.max_charge, name='battery_action')