diff --git a/inference.py b/inference.py index aaa5ed0..6f1b95a 100644 --- a/inference.py +++ b/inference.py @@ -99,8 +99,8 @@ def main(): if args.service: args.cwd = agent_name - service_act_save_path = f'{args.cwd}/actor.pth' - agent.act.load_state_dict(torch.load(service_act_save_path)) + act_save_path = f'{args.cwd}/actor.pth' + agent.act.load_state_dict(torch.load(act_save_path)) # 创建一个队列用于线程间通信 data_queue = queue.Queue() diff --git a/models/env.py b/models/env.py index 0258a3f..d600c9f 100644 --- a/models/env.py +++ b/models/env.py @@ -124,7 +124,7 @@ class WgzGym(gym.Env): reward = - self.a * demand_cost - self.b * economic_cost + self.c * eco_benifit self.unbalance = power_gap + heat_gap - final_step_outputs = [self.HST.current_soc, self.EC.current_power, self.grid.current_power] + final_step_outputs = [self.HST.current_soc, self.HST.get_power(), self.EC.current_power, self.grid.current_power] self.current_time += 1 finish = (self.current_time == self.episode_length) if finish: diff --git a/models/tools.py b/models/tools.py index 16253b3..9a5acfd 100644 --- a/models/tools.py +++ b/models/tools.py @@ -14,17 +14,13 @@ def test_one_episode(env, act, device): a_tensor = act(s_tensor) action = a_tensor.detach().cpu().numpy()[0] state, next_state, reward, done = env.step(action) - obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(temper), - np.float32(solar), np.float32(load), np.float32(heat), - np.float32(people), np.float32(ec_out), np.float32(hst_soc), np.float32(wind)), axis=None) - record_system_info.append([state[0], state[1], state[2], action, EC.current_power(), - env.HST.current_soc(), env.HST.get_power(), next_state[4], next_state[5], - next_state[6], env.solar.current_power, env.power_demand, env.heat_demand, reward]) + record_system_info.append([state[1], state[2], env.HST.current_soc(), env.HST.get_power(), + env.EC.current_power, env.grid.current_power, action, reward]) state = next_state # add information of last step EC, HST.current_soc, HST.power, grid - record_system_info[-1][7:12] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2], - env.final_step_outputs[4], env.final_step_outputs[5]] - record_system_info[-1][5] = env.final_step_outputs[3] + final_step_outputs = [self.HST.current_soc, self.HST.get_power(), self.EC.current_power, self.grid.current_power] + record_system_info[-1][2:6] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2], + env.final_step_outputs[3]] record = {'init_info': record_init_info, 'system_info': record_system_info} return record @@ -40,7 +36,7 @@ def get_episode_return(env, act, device): state, next_state, reward, done, = env.step(action) state = next_state episode_reward += reward - episode_unbalance += env.real_unbalance + episode_unbalance += env.unbalance if done: break return episode_reward, episode_unbalance diff --git a/train.py b/train.py index 23fa770..e10d732 100644 --- a/train.py +++ b/train.py @@ -236,8 +236,6 @@ if __name__ == '__main__': '''init training params''' # args.train = False # args.save_network = False - # args.test_network = False - # args.save_test_data = False if args.train: for i_episode in range(num_episode): with torch.no_grad(): @@ -266,14 +264,3 @@ if __name__ == '__main__': pickle.dump(reward_record, tf) torch.save(agent.act.state_dict(), act_save_path) print('actor params have been saved') - - if args.test_network: - args.cwd = agent_name - agent.act.load_state_dict(torch.load(act_save_path)) - record = test_one_episode(env, agent.act, agent.device) - - if args.save_test_data: - test_data_save_path = f'{args.cwd}/test.pkl' - with open(test_data_save_path, 'wb') as tf: - pickle.dump(record, tf) - print('test data have been saved')