nothing
This commit is contained in:
parent
035d5fd534
commit
88bbddbb7f
16
DDPG.py
16
DDPG.py
|
@ -7,22 +7,9 @@ from environment import ESSEnv
|
|||
from tools import *
|
||||
|
||||
|
||||
def update_buffer(_trajectory):
|
||||
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
|
||||
ary_other = torch.as_tensor([item[1] for item in _trajectory])
|
||||
ary_other[:, 0] = ary_other[:, 0] # ten_reward
|
||||
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
|
||||
|
||||
buffer.extend_buffer(ten_state, ary_other)
|
||||
|
||||
_steps = ten_state.shape[0]
|
||||
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
|
||||
return _steps, _r_exp
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = Arguments()
|
||||
'''here record real unbalance'''
|
||||
'''record real unbalance'''
|
||||
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
|
||||
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
|
||||
args.visible_gpu = '0'
|
||||
|
@ -32,7 +19,6 @@ if __name__ == '__main__':
|
|||
agent_name = f'{args.agent.__class__.__name__}'
|
||||
args.agent.cri_target = True
|
||||
args.env = ESSEnv()
|
||||
# creat lists of lists/or creat a long list?
|
||||
args.init_before_training(if_main=True)
|
||||
'''init agent and environment'''
|
||||
agent = args.agent
|
||||
|
|
13
SAC.py
13
SAC.py
|
@ -7,19 +7,6 @@ from environment import ESSEnv
|
|||
from tools import *
|
||||
|
||||
|
||||
def update_buffer(_trajectory):
|
||||
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
|
||||
ary_other = torch.as_tensor([item[1] for item in _trajectory])
|
||||
ary_other[:, 0] = ary_other[:, 0] # ten_reward
|
||||
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
|
||||
|
||||
buffer.extend_buffer(ten_state, ary_other)
|
||||
|
||||
_steps = ten_state.shape[0]
|
||||
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
|
||||
return _steps, _r_exp
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = Arguments()
|
||||
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
|
||||
|
|
13
TD3.py
13
TD3.py
|
@ -7,19 +7,6 @@ from environment import ESSEnv
|
|||
from tools import *
|
||||
|
||||
|
||||
def update_buffer(_trajectory):
|
||||
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
|
||||
ary_other = torch.as_tensor([item[1] for item in _trajectory])
|
||||
ary_other[:, 0] = ary_other[:, 0] # ten_reward
|
||||
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
|
||||
|
||||
buffer.extend_buffer(ten_state, ary_other)
|
||||
|
||||
_steps = ten_state.shape[0]
|
||||
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
|
||||
return _steps, _r_exp
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = Arguments()
|
||||
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
|
||||
|
|
|
@ -11,8 +11,8 @@ class ESSEnv(gym.Env):
|
|||
def __init__(self, **kwargs):
|
||||
super(ESSEnv, self).__init__()
|
||||
self.excess = None
|
||||
self.unbalance = None
|
||||
self.shedding = None
|
||||
self.unbalance = None
|
||||
self.real_unbalance = None
|
||||
self.operation_cost = None
|
||||
self.current_output = None
|
||||
|
@ -113,7 +113,7 @@ class ESSEnv(gym.Env):
|
|||
sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient
|
||||
else:
|
||||
sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient
|
||||
# real unbalance that even grid could not meet
|
||||
# real unbalance that grid could not meet
|
||||
self.excess = unbalance - self.grid.exchange_ability
|
||||
excess_penalty = self.excess * self.penalty_coefficient
|
||||
else: # unbalance <0, its load shedding model, deficient penalty is used
|
||||
|
|
13
tools.py
13
tools.py
|
@ -233,6 +233,19 @@ def get_episode_return(env, act, device):
|
|||
return episode_return, episode_unbalance
|
||||
|
||||
|
||||
def update_buffer(_trajectory):
|
||||
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
|
||||
ary_other = torch.as_tensor([item[1] for item in _trajectory])
|
||||
ary_other[:, 0] = ary_other[:, 0] # ten_reward
|
||||
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
|
||||
|
||||
buffer.extend_buffer(ten_state, ary_other)
|
||||
|
||||
_steps = ten_state.shape[0]
|
||||
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
|
||||
return _steps, _r_exp
|
||||
|
||||
|
||||
class ReplayBuffer:
|
||||
def __init__(self, max_len, state_dim, action_dim, gpu_id=0):
|
||||
self.now_len = 0
|
||||
|
|
Loading…
Reference in New Issue