From 88bbddbb7f3e950b4e137e8adbcf2f290eb32618 Mon Sep 17 00:00:00 2001
From: chenxiaodong <chenxiaodong@qibebt.ac.cn>
Date: Wed, 19 Jun 2024 15:55:41 +0800
Subject: [PATCH] nothing

---
 DDPG.py        | 16 +---------------
 SAC.py         | 13 -------------
 TD3.py         | 13 -------------
 environment.py |  4 ++--
 tools.py       | 13 +++++++++++++
 5 files changed, 16 insertions(+), 43 deletions(-)

diff --git a/DDPG.py b/DDPG.py
index 5057afe..03e7509 100644
--- a/DDPG.py
+++ b/DDPG.py
@@ -7,22 +7,9 @@ from environment import ESSEnv
 from tools import *
 
 
-def update_buffer(_trajectory):
-    ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
-    ary_other = torch.as_tensor([item[1] for item in _trajectory])
-    ary_other[:, 0] = ary_other[:, 0]  # ten_reward
-    ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma  # ten_mask = (1.0 - ary_done) * gamma
-
-    buffer.extend_buffer(ten_state, ary_other)
-
-    _steps = ten_state.shape[0]
-    _r_exp = ary_other[:, 0].mean()  # other = (reward, mask, action)
-    return _steps, _r_exp
-
-
 if __name__ == '__main__':
     args = Arguments()
-    '''here record real unbalance'''
+    '''record real unbalance'''
     reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
     loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
     args.visible_gpu = '0'
@@ -32,7 +19,6 @@ if __name__ == '__main__':
         agent_name = f'{args.agent.__class__.__name__}'
         args.agent.cri_target = True
         args.env = ESSEnv()
-        # creat lists of lists/or creat a long list?
         args.init_before_training(if_main=True)
         '''init agent and environment'''
         agent = args.agent
diff --git a/SAC.py b/SAC.py
index 07eb753..6bfb1e2 100644
--- a/SAC.py
+++ b/SAC.py
@@ -7,19 +7,6 @@ from environment import ESSEnv
 from tools import *
 
 
-def update_buffer(_trajectory):
-    ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
-    ary_other = torch.as_tensor([item[1] for item in _trajectory])
-    ary_other[:, 0] = ary_other[:, 0]  # ten_reward
-    ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma  # ten_mask = (1.0 - ary_done) * gamma
-
-    buffer.extend_buffer(ten_state, ary_other)
-
-    _steps = ten_state.shape[0]
-    _r_exp = ary_other[:, 0].mean()  # other = (reward, mask, action)
-    return _steps, _r_exp
-
-
 if __name__ == '__main__':
     args = Arguments()
     reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
diff --git a/TD3.py b/TD3.py
index 050a2af..8c8baa0 100644
--- a/TD3.py
+++ b/TD3.py
@@ -7,19 +7,6 @@ from environment import ESSEnv
 from tools import *
 
 
-def update_buffer(_trajectory):
-    ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
-    ary_other = torch.as_tensor([item[1] for item in _trajectory])
-    ary_other[:, 0] = ary_other[:, 0]  # ten_reward
-    ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma  # ten_mask = (1.0 - ary_done) * gamma
-
-    buffer.extend_buffer(ten_state, ary_other)
-
-    _steps = ten_state.shape[0]
-    _r_exp = ary_other[:, 0].mean()  # other = (reward, mask, action)
-    return _steps, _r_exp
-
-
 if __name__ == '__main__':
     args = Arguments()
     reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
diff --git a/environment.py b/environment.py
index 2d8a001..8e302c2 100644
--- a/environment.py
+++ b/environment.py
@@ -11,8 +11,8 @@ class ESSEnv(gym.Env):
     def __init__(self, **kwargs):
         super(ESSEnv, self).__init__()
         self.excess = None
-        self.unbalance = None
         self.shedding = None
+        self.unbalance = None
         self.real_unbalance = None
         self.operation_cost = None
         self.current_output = None
@@ -113,7 +113,7 @@ class ESSEnv(gym.Env):
                 sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient
             else:
                 sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient
-                # real unbalance that even grid could not meet
+                # real unbalance that grid could not meet
                 self.excess = unbalance - self.grid.exchange_ability
                 excess_penalty = self.excess * self.penalty_coefficient
         else:  # unbalance <0, its load shedding model, deficient penalty is used
diff --git a/tools.py b/tools.py
index c3c303f..ac1cb7b 100644
--- a/tools.py
+++ b/tools.py
@@ -233,6 +233,19 @@ def get_episode_return(env, act, device):
     return episode_return, episode_unbalance
 
 
+def update_buffer(_trajectory):
+    ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
+    ary_other = torch.as_tensor([item[1] for item in _trajectory])
+    ary_other[:, 0] = ary_other[:, 0]  # ten_reward
+    ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma  # ten_mask = (1.0 - ary_done) * gamma
+
+    buffer.extend_buffer(ten_state, ary_other)
+
+    _steps = ten_state.shape[0]
+    _r_exp = ary_other[:, 0].mean()  # other = (reward, mask, action)
+    return _steps, _r_exp
+
+
 class ReplayBuffer:
     def __init__(self, max_len, state_dim, action_dim, gpu_id=0):
         self.now_len = 0