nothing
This commit is contained in:
parent
b5a1842147
commit
69fe33deec
13
DDPG.py
13
DDPG.py
|
@ -7,6 +7,19 @@ from environment import ESSEnv
|
|||
from tools import *
|
||||
|
||||
|
||||
def update_buffer(_trajectory):
|
||||
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
|
||||
ary_other = torch.as_tensor([item[1] for item in _trajectory])
|
||||
ary_other[:, 0] = ary_other[:, 0] # ten_reward
|
||||
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
|
||||
|
||||
buffer.extend_buffer(ten_state, ary_other)
|
||||
|
||||
_steps = ten_state.shape[0]
|
||||
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
|
||||
return _steps, _r_exp
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = Arguments()
|
||||
'''record real unbalance'''
|
||||
|
|
13
SAC.py
13
SAC.py
|
@ -7,6 +7,19 @@ from environment import ESSEnv
|
|||
from tools import *
|
||||
|
||||
|
||||
def update_buffer(_trajectory):
|
||||
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
|
||||
ary_other = torch.as_tensor([item[1] for item in _trajectory])
|
||||
ary_other[:, 0] = ary_other[:, 0] # ten_reward
|
||||
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
|
||||
|
||||
buffer.extend_buffer(ten_state, ary_other)
|
||||
|
||||
_steps = ten_state.shape[0]
|
||||
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
|
||||
return _steps, _r_exp
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = Arguments()
|
||||
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
|
||||
|
|
13
TD3.py
13
TD3.py
|
@ -7,6 +7,19 @@ from environment import ESSEnv
|
|||
from tools import *
|
||||
|
||||
|
||||
def update_buffer(_trajectory):
|
||||
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
|
||||
ary_other = torch.as_tensor([item[1] for item in _trajectory])
|
||||
ary_other[:, 0] = ary_other[:, 0] # ten_reward
|
||||
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
|
||||
|
||||
buffer.extend_buffer(ten_state, ary_other)
|
||||
|
||||
_steps = ten_state.shape[0]
|
||||
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
|
||||
return _steps, _r_exp
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = Arguments()
|
||||
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
|
||||
|
|
13
tools.py
13
tools.py
|
@ -233,19 +233,6 @@ def get_episode_return(env, act, device):
|
|||
return episode_return, episode_unbalance
|
||||
|
||||
|
||||
def update_buffer(_trajectory):
|
||||
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
|
||||
ary_other = torch.as_tensor([item[1] for item in _trajectory])
|
||||
ary_other[:, 0] = ary_other[:, 0] # ten_reward
|
||||
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
|
||||
|
||||
buffer.extend_buffer(ten_state, ary_other)
|
||||
|
||||
_steps = ten_state.shape[0]
|
||||
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
|
||||
return _steps, _r_exp
|
||||
|
||||
|
||||
class ReplayBuffer:
|
||||
def __init__(self, max_len, state_dim, action_dim, gpu_id=0):
|
||||
self.now_len = 0
|
||||
|
|
Loading…
Reference in New Issue