import torch def test_one_episode(env, act, device): """get evaluate information, record the unbalance of after taking action""" record_system_info = [] # same as observation record_init_info = [] # include month,day,time env.TRAIN = False state = env.reset() record_init_info.append([env.month, env.day, env.current_time]) print(f'current testing month is {env.month}, day is {env.day}') for i in range(24): s_tensor = torch.as_tensor((state,), device=device) a_tensor = act(s_tensor) action = a_tensor.detach().cpu().numpy()[0] state, next_state, reward, done = env.step(action) record_system_info.append([state[1], state[2], env.HST.current_soc(), env.HST.get_power(), env.EC.current_power, action, reward]) state = next_state # add information of last step EC, HST.current_soc, HST.power, grid record_system_info[-1][2:5] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2]] record = {'init_info': record_init_info, 'system_info': record_system_info} return record def get_episode_return(env, act, device): episode_reward = 0.0 # sum of rewards in an episode episode_unbalance = 0.0 state = env.reset() for i in range(24): s_tensor = torch.as_tensor((state,), device=device) a_tensor = act(s_tensor) action = a_tensor.detach().cpu().numpy()[0] # not need detach(), because with torch.no_grad() outside state, next_state, reward, done, = env.step(action) state = next_state episode_reward += reward episode_unbalance += env.unbalance if done: break return episode_reward, episode_unbalance