From 88bbddbb7f3e950b4e137e8adbcf2f290eb32618 Mon Sep 17 00:00:00 2001 From: chenxiaodong Date: Wed, 19 Jun 2024 15:55:41 +0800 Subject: [PATCH] nothing --- DDPG.py | 16 +--------------- SAC.py | 13 ------------- TD3.py | 13 ------------- environment.py | 4 ++-- tools.py | 13 +++++++++++++ 5 files changed, 16 insertions(+), 43 deletions(-) diff --git a/DDPG.py b/DDPG.py index 5057afe..03e7509 100644 --- a/DDPG.py +++ b/DDPG.py @@ -7,22 +7,9 @@ from environment import ESSEnv from tools import * -def update_buffer(_trajectory): - ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32) - ary_other = torch.as_tensor([item[1] for item in _trajectory]) - ary_other[:, 0] = ary_other[:, 0] # ten_reward - ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma - - buffer.extend_buffer(ten_state, ary_other) - - _steps = ten_state.shape[0] - _r_exp = ary_other[:, 0].mean() # other = (reward, mask, action) - return _steps, _r_exp - - if __name__ == '__main__': args = Arguments() - '''here record real unbalance''' + '''record real unbalance''' reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []} loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []} args.visible_gpu = '0' @@ -32,7 +19,6 @@ if __name__ == '__main__': agent_name = f'{args.agent.__class__.__name__}' args.agent.cri_target = True args.env = ESSEnv() - # creat lists of lists/or creat a long list? args.init_before_training(if_main=True) '''init agent and environment''' agent = args.agent diff --git a/SAC.py b/SAC.py index 07eb753..6bfb1e2 100644 --- a/SAC.py +++ b/SAC.py @@ -7,19 +7,6 @@ from environment import ESSEnv from tools import * -def update_buffer(_trajectory): - ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32) - ary_other = torch.as_tensor([item[1] for item in _trajectory]) - ary_other[:, 0] = ary_other[:, 0] # ten_reward - ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma - - buffer.extend_buffer(ten_state, ary_other) - - _steps = ten_state.shape[0] - _r_exp = ary_other[:, 0].mean() # other = (reward, mask, action) - return _steps, _r_exp - - if __name__ == '__main__': args = Arguments() reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []} diff --git a/TD3.py b/TD3.py index 050a2af..8c8baa0 100644 --- a/TD3.py +++ b/TD3.py @@ -7,19 +7,6 @@ from environment import ESSEnv from tools import * -def update_buffer(_trajectory): - ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32) - ary_other = torch.as_tensor([item[1] for item in _trajectory]) - ary_other[:, 0] = ary_other[:, 0] # ten_reward - ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma - - buffer.extend_buffer(ten_state, ary_other) - - _steps = ten_state.shape[0] - _r_exp = ary_other[:, 0].mean() # other = (reward, mask, action) - return _steps, _r_exp - - if __name__ == '__main__': args = Arguments() reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []} diff --git a/environment.py b/environment.py index 2d8a001..8e302c2 100644 --- a/environment.py +++ b/environment.py @@ -11,8 +11,8 @@ class ESSEnv(gym.Env): def __init__(self, **kwargs): super(ESSEnv, self).__init__() self.excess = None - self.unbalance = None self.shedding = None + self.unbalance = None self.real_unbalance = None self.operation_cost = None self.current_output = None @@ -113,7 +113,7 @@ class ESSEnv(gym.Env): sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient else: sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient - # real unbalance that even grid could not meet + # real unbalance that grid could not meet self.excess = unbalance - self.grid.exchange_ability excess_penalty = self.excess * self.penalty_coefficient else: # unbalance <0, its load shedding model, deficient penalty is used diff --git a/tools.py b/tools.py index c3c303f..ac1cb7b 100644 --- a/tools.py +++ b/tools.py @@ -233,6 +233,19 @@ def get_episode_return(env, act, device): return episode_return, episode_unbalance +def update_buffer(_trajectory): + ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32) + ary_other = torch.as_tensor([item[1] for item in _trajectory]) + ary_other[:, 0] = ary_other[:, 0] # ten_reward + ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma + + buffer.extend_buffer(ten_state, ary_other) + + _steps = ten_state.shape[0] + _r_exp = ary_other[:, 0].mean() # other = (reward, mask, action) + return _steps, _r_exp + + class ReplayBuffer: def __init__(self, max_len, state_dim, action_dim, gpu_id=0): self.now_len = 0