This commit is contained in:
chenxiaodong 2024-06-19 15:55:41 +08:00
parent 035d5fd534
commit 88bbddbb7f
5 changed files with 16 additions and 43 deletions

16
DDPG.py
View File

@ -7,22 +7,9 @@ from environment import ESSEnv
from tools import *
def update_buffer(_trajectory):
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
ary_other = torch.as_tensor([item[1] for item in _trajectory])
ary_other[:, 0] = ary_other[:, 0] # ten_reward
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
buffer.extend_buffer(ten_state, ary_other)
_steps = ten_state.shape[0]
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
return _steps, _r_exp
if __name__ == '__main__':
args = Arguments()
'''here record real unbalance'''
'''record real unbalance'''
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
args.visible_gpu = '0'
@ -32,7 +19,6 @@ if __name__ == '__main__':
agent_name = f'{args.agent.__class__.__name__}'
args.agent.cri_target = True
args.env = ESSEnv()
# creat lists of lists/or creat a long list?
args.init_before_training(if_main=True)
'''init agent and environment'''
agent = args.agent

13
SAC.py
View File

@ -7,19 +7,6 @@ from environment import ESSEnv
from tools import *
def update_buffer(_trajectory):
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
ary_other = torch.as_tensor([item[1] for item in _trajectory])
ary_other[:, 0] = ary_other[:, 0] # ten_reward
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
buffer.extend_buffer(ten_state, ary_other)
_steps = ten_state.shape[0]
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
return _steps, _r_exp
if __name__ == '__main__':
args = Arguments()
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}

13
TD3.py
View File

@ -7,19 +7,6 @@ from environment import ESSEnv
from tools import *
def update_buffer(_trajectory):
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
ary_other = torch.as_tensor([item[1] for item in _trajectory])
ary_other[:, 0] = ary_other[:, 0] # ten_reward
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
buffer.extend_buffer(ten_state, ary_other)
_steps = ten_state.shape[0]
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
return _steps, _r_exp
if __name__ == '__main__':
args = Arguments()
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}

View File

@ -11,8 +11,8 @@ class ESSEnv(gym.Env):
def __init__(self, **kwargs):
super(ESSEnv, self).__init__()
self.excess = None
self.unbalance = None
self.shedding = None
self.unbalance = None
self.real_unbalance = None
self.operation_cost = None
self.current_output = None
@ -113,7 +113,7 @@ class ESSEnv(gym.Env):
sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient
else:
sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient
# real unbalance that even grid could not meet
# real unbalance that grid could not meet
self.excess = unbalance - self.grid.exchange_ability
excess_penalty = self.excess * self.penalty_coefficient
else: # unbalance <0, its load shedding model, deficient penalty is used

View File

@ -233,6 +233,19 @@ def get_episode_return(env, act, device):
return episode_return, episode_unbalance
def update_buffer(_trajectory):
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
ary_other = torch.as_tensor([item[1] for item in _trajectory])
ary_other[:, 0] = ary_other[:, 0] # ten_reward
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
buffer.extend_buffer(ten_state, ary_other)
_steps = ten_state.shape[0]
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
return _steps, _r_exp
class ReplayBuffer:
def __init__(self, max_len, state_dim, action_dim, gpu_id=0):
self.now_len = 0