diff --git a/models/env.py b/models/env.py index c33707b..ac3c554 100644 --- a/models/env.py +++ b/models/env.py @@ -25,6 +25,8 @@ class WgzGym(gym.Env): self.a = 0.5 self.b = 0.3 self.c = 0.2 + self.heat_a = 0.6 + self.power_a = 0.4 self.EC_parameters = kwargs.get('EC_parameters', EC_parameters) # 电解水制氢器 self.HST_parameters = kwargs.get('dg_parameters', dg_parameters) # 储氢罐 @@ -53,6 +55,7 @@ class WgzGym(gym.Env): def _build_state(self): hst_soc = self.HST.current_soc ec_out = self.EC.get_hydrogen() + grid_ex = self.grid time_step = self.current_time price = self.data_manager.get_price_data(self.month, self.day, self.current_time) @@ -64,13 +67,13 @@ class WgzGym(gym.Env): obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(temper), np.float32(solar), np.float32(load), np.float32(heat), - np.float32(people), np.float32(ec_out), np.float32(hst_soc), np.float32(wind)), axis=None) + np.float32(people), np.float32(ec_out), np.float32(hst_soc), np.float32(grid_ex)), axis=None) return obs def step(self, action): - # 在每个组件中添加动作 + # 每个组件执行动作 one step current_obs = self._build_state() - self.EC.step(action[0]) # 执行状态转换,电池当前容量也改变 + self.EC.step(action[0]) self.HST.step(action[1]) self.grid.step(action[2]) price = current_obs[1] @@ -78,26 +81,27 @@ class WgzGym(gym.Env): solar = current_obs[3] load = current_obs[4] heat = current_obs[5] + gym_to_grid = solar + self.HST.get_power() - self.EC.current_power - load + heat_penalty = 0 # reward = 0.0 sell_benefit, buy_cost = 0, 0 - excess_penalty, deficient_penalty = 0, 0 + # power_penalty = 0 if gym_to_grid >= 0: # 过剩 sell_benefit = self.grid.get_cost(price, gym_to_grid) * self.sell_coefficient - excess_penalty = gym_to_grid * self.penalty_coefficient else: # 缺少 buy_cost = self.grid.get_cost(price, abs(gym_to_grid)) - deficient_penalty = abs(gym_to_grid) * self.penalty_coefficient + power_penalty = abs(gym_to_grid) * self.penalty_coefficient hst_cost = self.HST.get_cost() ec_cost = self.EC.get_cost(price) solar_cost = solar # 待补充 economic_cost = hst_cost + ec_cost + solar_cost - sell_benefit + buy_cost - demand_cost = excess_penalty + deficient_penalty + demand_cost = self.heat_a * heat_penalty + self.power_a * power_penalty eco_benifit = 0 - reward = - self.a * economic_cost - self.b * demand_cost + self.c * eco_benifit + reward = - self.a * demand_cost - self.b * economic_cost + self.c * eco_benifit self.unbalance = gym_to_grid final_step_outputs = [self.HST.current_soc, self.EC.current_power, self.grid.current_power] diff --git a/models/module.py b/models/module.py index fdf284d..e3293f2 100644 --- a/models/module.py +++ b/models/module.py @@ -22,6 +22,9 @@ class EC: def get_hydrogen(self): return self.current_power * self.electricity_efficiency * self.hydrogen_produce + def get_heat(self): + return self.current_power * (1 - self.electricity_efficiency) + def get_carbon(self): return self.current_power * self.carbon_reduce @@ -51,6 +54,7 @@ class HST: 储氢罐的放气速率 = 供电 (电价低时多电解,电价高时释放) ''' + def step(self, action_hst): energy = action_hst * self.ramp updated_soc = max(self.min_soc, min(self.max_soc, (self.current_soc * self.capacity + energy) / self.capacity)) @@ -59,7 +63,13 @@ class HST: def get_power(self): if self.hydrogen_charge > 0: - return self.hydrogen_charge * self.lower_heating_value * self.charge_efficiency * self.generate_efficiency + return self.hydrogen_charge * self.charge_efficiency * self.lower_heating_value * self.generate_efficiency + else: + return 0 + + def get_heat(self): + if self.hydrogen_charge < 0: + return self.hydrogen_charge * self.charge_efficiency * (1 - self.generate_efficiency) else: return 0 diff --git a/models/tools.py b/models/tools.py index 7440af6..16253b3 100644 --- a/models/tools.py +++ b/models/tools.py @@ -2,39 +2,30 @@ import torch def test_one_episode(env, act, device): - """to get evaluate information, here record the unbalance of after taking action""" - record_state = [] - record_action = [] - record_reward = [] - record_unbalance = [] - record_system_info = [] # [time,price,netload,action,real action,soc,output*4,unbalance(exchange+penalty),cost] - record_init_info = [] # include month,day,time,intial soc + """get evaluate information, record the unbalance of after taking action""" + record_system_info = [] # same as observation + record_init_info = [] # include month,day,time env.TRAIN = False state = env.reset() - record_init_info.append([env.month, env.day, env.current_time, env.battery.current_soc]) - print(f'current testing month is {env.month}, day is {env.day},initial_soc is {env.battery.current_soc}') + record_init_info.append([env.month, env.day, env.current_time]) + print(f'current testing month is {env.month}, day is {env.day}') for i in range(24): s_tensor = torch.as_tensor((state,), device=device) a_tensor = act(s_tensor) - action = a_tensor.detach().cpu().numpy()[0] # not need detach(), because with torch.no_grad() outside - real_action = action + action = a_tensor.detach().cpu().numpy()[0] state, next_state, reward, done = env.step(action) - - record_system_info.append([state[0], state[1], state[3] + env.wind.current_power, action, real_action, - env.battery.SOC(), env.battery.energy_change, next_state[4], next_state[5], - next_state[6], env.solar.current_power, env.wind.current_power, env.unbalance, - env.operation_cost, reward]) - record_state.append(state) - record_action.append(real_action) - record_reward.append(reward) - record_unbalance.append(env.unbalance) + obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(temper), + np.float32(solar), np.float32(load), np.float32(heat), + np.float32(people), np.float32(ec_out), np.float32(hst_soc), np.float32(wind)), axis=None) + record_system_info.append([state[0], state[1], state[2], action, EC.current_power(), + env.HST.current_soc(), env.HST.get_power(), next_state[4], next_state[5], + next_state[6], env.solar.current_power, env.power_demand, env.heat_demand, reward]) state = next_state - # add information of last step dg1, dh2, dg3, soc, tem, irr + # add information of last step EC, HST.current_soc, HST.power, grid record_system_info[-1][7:12] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2], env.final_step_outputs[4], env.final_step_outputs[5]] record_system_info[-1][5] = env.final_step_outputs[3] - record = {'init_info': record_init_info, 'system_info': record_system_info, 'state': record_state, - 'action': record_action, 'reward': record_reward, 'unbalance': record_unbalance} + record = {'init_info': record_init_info, 'system_info': record_system_info} return record diff --git a/train.py b/train.py index c1403df..23fa770 100644 --- a/train.py +++ b/train.py @@ -270,12 +270,10 @@ if __name__ == '__main__': if args.test_network: args.cwd = agent_name agent.act.load_state_dict(torch.load(act_save_path)) - print('params have been reload and test') record = test_one_episode(env, agent.act, agent.device) - eval_data = pd.DataFrame(record['system_info']) - eval_data.columns = ['time_step', 'price', 'load', 'action', 'real_action', 'soc', 'battery', - 'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward'] + if args.save_test_data: test_data_save_path = f'{args.cwd}/test.pkl' with open(test_data_save_path, 'wb') as tf: pickle.dump(record, tf) + print('test data have been saved')