56 lines
2.7 KiB
Python
56 lines
2.7 KiB
Python
import torch
|
|
|
|
|
|
def test_one_episode(env, act, device):
|
|
"""to get evaluate information, here record the unbalance of after taking action"""
|
|
record_state = []
|
|
record_action = []
|
|
record_reward = []
|
|
record_unbalance = []
|
|
record_system_info = [] # [time,price,netload,action,real action,soc,output*4,unbalance(exchange+penalty),cost]
|
|
record_init_info = [] # include month,day,time,intial soc
|
|
env.TRAIN = False
|
|
state = env.reset()
|
|
record_init_info.append([env.month, env.day, env.current_time, env.battery.current_soc])
|
|
print(f'current testing month is {env.month}, day is {env.day},initial_soc is {env.battery.current_soc}')
|
|
for i in range(24):
|
|
s_tensor = torch.as_tensor((state,), device=device)
|
|
a_tensor = act(s_tensor)
|
|
action = a_tensor.detach().cpu().numpy()[0] # not need detach(), because with torch.no_grad() outside
|
|
real_action = action
|
|
state, next_state, reward, done = env.step(action)
|
|
|
|
record_system_info.append([state[0], state[1], state[3] + env.wind.current_power, action, real_action,
|
|
env.battery.SOC(), env.battery.energy_change, next_state[4], next_state[5],
|
|
next_state[6], env.solar.current_power, env.wind.current_power, env.unbalance,
|
|
env.operation_cost, reward])
|
|
record_state.append(state)
|
|
record_action.append(real_action)
|
|
record_reward.append(reward)
|
|
record_unbalance.append(env.unbalance)
|
|
state = next_state
|
|
# add information of last step dg1, dh2, dg3, soc, tem, irr
|
|
record_system_info[-1][7:12] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2],
|
|
env.final_step_outputs[4], env.final_step_outputs[5]]
|
|
record_system_info[-1][5] = env.final_step_outputs[3]
|
|
record = {'init_info': record_init_info, 'system_info': record_system_info, 'state': record_state,
|
|
'action': record_action, 'reward': record_reward, 'unbalance': record_unbalance}
|
|
return record
|
|
|
|
|
|
def get_episode_return(env, act, device):
|
|
episode_reward = 0.0 # sum of rewards in an episode
|
|
episode_unbalance = 0.0
|
|
state = env.reset()
|
|
for i in range(24):
|
|
s_tensor = torch.as_tensor((state,), device=device)
|
|
a_tensor = act(s_tensor)
|
|
action = a_tensor.detach().cpu().numpy()[0] # not need detach(), because with torch.no_grad() outside
|
|
state, next_state, reward, done, = env.step(action)
|
|
state = next_state
|
|
episode_reward += reward
|
|
episode_unbalance += env.real_unbalance
|
|
if done:
|
|
break
|
|
return episode_reward, episode_unbalance
|