diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a93d29e --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +/.idea/ diff --git a/data/actor.pth b/data/actor.pth new file mode 100644 index 0000000..c58c212 Binary files /dev/null and b/data/actor.pth differ diff --git a/data/loss.pkl b/data/loss.pkl new file mode 100644 index 0000000..3723816 Binary files /dev/null and b/data/loss.pkl differ diff --git a/data/reward.pkl b/data/reward.pkl new file mode 100644 index 0000000..b65f4d9 Binary files /dev/null and b/data/reward.pkl differ diff --git a/data/service_actions.csv b/data/service_actions.csv new file mode 100644 index 0000000..f6fd2a1 --- /dev/null +++ b/data/service_actions.csv @@ -0,0 +1,5 @@ +time,action +1,[-0.85844654 -0.913628 ] +1,[-0.97137856 -0.9997079 ] +1,[-0.97137856 -0.9997079 ] +1,[-0.97137856 -0.9997079 ] diff --git a/data/service_result.csv b/data/service_result.csv new file mode 100644 index 0000000..2659b21 --- /dev/null +++ b/data/service_result.csv @@ -0,0 +1,4 @@ +reward,unbalance +-0.09503999999999999,0.03 +-0.09503999999999999,0.03 +-0.09503999999999999,0.03 diff --git a/inference.py b/inference.py index e69de29..f14afcd 100644 --- a/inference.py +++ b/inference.py @@ -0,0 +1,119 @@ +import queue +import threading +import time +import torch + +from train import * + + +def test_one_step(env, act, device, data, action_path): + env.rec_data = data + state = env.reset() + s_tensor = torch.as_tensor((state,), device=device) + a_tensor = act(s_tensor) + action = a_tensor.detach().cpu().numpy()[0] + state, next_state, reward, done = env.step(action) + print(f'The action of {env.current_time} is {action}') + + with open(action_path, 'a') as af: + af.write(f'{env.current_time},{action}\n') + return reward, env.unbalance + + +def run_service_test(env, agent, data): + service_result_path = 'data/service_result.csv' + action_path = 'data/service_actions.csv' + + if not os.path.exists(service_result_path): + with open(service_result_path, 'w') as f: + f.write('reward,unbalance\n') + + if not os.path.exists(action_path): + with open(action_path, 'w') as af: + af.write('time,action\n') + + service_rewards = [] + service_unbalances = [] + + service_reward, service_unbalance = test_one_step(env, agent.act, agent.device, data, action_path) + service_rewards.append(service_reward) + service_unbalances.append(service_unbalance) + + if service_rewards: + avg_reward = sum(service_rewards) / len(service_rewards) + avg_unbalance = sum(service_unbalances) / len(service_unbalances) + + with open(service_result_path, 'a') as f: + f.write(f'{avg_reward},{avg_unbalance}\n') + + +# 接听端 +def listener_thread(env, agent, data_queue): + while True: + time.sleep(0.1) # 等待 + if not data_queue.empty(): + new_data = data_queue.get() + print(f"Data received: {new_data}") + run_service_test(env, agent, new_data) + data_queue.task_done() + + +# 发送端 +def sender_thread(data_queue): + while True: + try: + time.sleep(0.5) + user_input = input("请输入当前时刻的price, temper, solar, load, heat, people(用逗号分隔): \n") + + # 将输入字符串分割并转换为浮点数列表 + input_data = list(map(float, user_input.split(','))) + + # 检查输入是否包含六个数值 + if len(input_data) != 6: + print("输入格式不正确,请输入六个数值。") + continue + + # 将数据放入队列 + print(f"Sending data: {input_data}") + data_queue.put(input_data) + + except ValueError: + print("输入格式不正确,请输入数值。") + + +def main(): + args = Arguments() + args.visible_gpu = '0' + for seed in args.random_seed_list: + args.random_seed = seed + args.agent = AgentPPO() + args.agent.cri_target = True + args.env = WgzGym() + args.init_before_training() + + agent = args.agent + env = args.env + env.TRAIN = False + agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate) + + act_save_path = './data/actor.pth' + agent.act.load_state_dict(torch.load(act_save_path)) + + # 创建一个队列用于线程间通信 + data_queue = queue.Queue() + + listener = threading.Thread(target=listener_thread, args=(env, agent, data_queue)) + listener.daemon = True + listener.start() + + sender = threading.Thread(target=sender_thread, args=(data_queue,)) + sender.daemon = True + sender.start() + + # 主线程保持运行,等待数据传递 + while True: + time.sleep(10) + + +if __name__ == "__main__": + main() diff --git a/models/env.py b/models/env.py index ebb826c..c00789b 100644 --- a/models/env.py +++ b/models/env.py @@ -1,19 +1,18 @@ import gym +import numpy as np import pandas as pd -from data_manager import * -from module import * -from parameters import * +from models.data_manager import * +from models.module import * +from models.parameters import * class WgzGym(gym.Env): def __init__(self, **kwargs): super(WgzGym, self).__init__() - self.excess = None - self.shedding = None + self.rec_data = None self.unbalance = None - self.real_unbalance = None - self.operation_cost = None + self.reward = None self.current_output = None self.final_step_outputs = None self.data_manager = DataManager() @@ -23,21 +22,26 @@ class WgzGym(gym.Env): self.TRAIN = True self.current_time = None self.episode_length = 24 - self.penalty_coefficient = 50 # 约束惩罚系数 - self.sell_coefficient = 0.1 # 售出利润系数 + self.penalty_coefficient = 10 # 约束惩罚系数 + self.sell_coefficient = 0.5 # 售出利润系数 + self.a = 0.5 + self.b = 0.3 + self.c = 0.2 + self.heat_a = 0.6 + self.power_a = 0.4 self.EC_parameters = kwargs.get('EC_parameters', EC_parameters) # 电解水制氢器 - self.HST_parameters = kwargs.get('dg_parameters', dg_parameters) # 储氢罐 + self.HST_parameters = kwargs.get('HST_parameters', HST_parameters) # 储氢罐 self.grid = Grid() self.EC = EC(self.EC_parameters) self.HST = HST(self.HST_parameters) - self.action_space = gym.spaces.Box(low=-1, high=1, shape=(3,), dtype=np.float32) + self.action_space = gym.spaces.Box(low=-1, high=1, shape=(2,), dtype=np.float32) ''' 时间 光伏 温度(湿度暂未考虑) 电需 热需(转化为对应热水所需瓦数) 人数 电价 7 - 电解水制氢功率 市电功率 储氢罐容量占比 3 + 电解水制氢功率 储氢罐容量占比 2 市电功率(注意标准化)->舍(由供需控制) ''' - self.state_space = gym.spaces.Box(low=0, high=1, shape=(10,), dtype=np.float32) + self.state_space = gym.spaces.Box(low=0, high=1, shape=(9,), dtype=np.float32) def reset(self, *args): self.month = np.random.randint(1, 13) # choose 12 month @@ -51,74 +55,76 @@ class WgzGym(gym.Env): return self._build_state() def _build_state(self): - soc = self.HST.SOC() - ec_output = self.EC.current_output + hst_soc = self.HST.current_soc + ec_out = self.EC.get_hydrogen() + # grid_ex = self.grid.trade_energy time_step = self.current_time - price = self.data_manager.get_price_data(self.month, self.day, self.current_time) - temper = self.data_manager.get_temperature_data(self.month, self.day, self.current_time) - solar = self.data_manager.get_solar_data(self.month, self.day, self.current_time) - load = self.data_manager.get_load_data(self.month, self.day, self.current_time) - heat = self.data_manager.get_heat_data(self.month, self.day, self.current_time) - people = self.data_manager.get_people_data(self.month, self.day, self.current_time) + if self.TRAIN: + price = self.data_manager.get_price_data(self.month, self.day, self.current_time) + temper = self.data_manager.get_temper_data(self.month, self.day, self.current_time) + solar = self.data_manager.get_solar_data(self.month, self.day, self.current_time) + load = self.data_manager.get_load_data(self.month, self.day, self.current_time) + heat = self.data_manager.get_heat_data(self.month, self.day, self.current_time) + people = self.data_manager.get_people_data(self.month, self.day, self.current_time) + else: + price = self.rec_data[0] + temper = self.rec_data[1] + solar = self.rec_data[2] + load = self.rec_data[3] + heat = self.rec_data[4] + people = self.rec_data[5] - obs = np.concatenate((np.float32(time_step), np.float32(soc), np.float32(price), np.float32(netload), - np.float32(dg1_output), np.float32(dg2_output), np.float32(dg3_output), - np.float32(temperature), np.float32(irradiance), np.float32(windspeed)), axis=None) + obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(temper), + np.float32(solar), np.float32(load), np.float32(heat), + np.float32(people), np.float32(ec_out), np.float32(hst_soc)), axis=None) return obs - def step(self, action): # state transition: current_obs->take_action->get_reward->get_finish->next_obs - # 在每个组件中添加动作 + def step(self, action): + # 每个组件执行动作 one step current_obs = self._build_state() - temperature = current_obs[7] - irradiance = current_obs[8] - self.wind.current_power = current_obs[9] - self.battery.step(action[0]) # 执行状态转换,电池当前容量也改变 - self.dg1.step(action[1]) - self.dg2.step(action[2]) - self.dg3.step(action[3]) - self.solar.step(temperature, irradiance, action[4]) - self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output, - -self.battery.energy_change, self.solar.current_power, self.wind.current_power)) - actual_production = sum(self.current_output) + self.EC.step(action[0]) + self.HST.step(action[1]) + # self.grid.step(action[2], self.EC.power_max) price = current_obs[1] - netload = current_obs[3] - self.solar.output_change - unbalance = actual_production - netload + temper = current_obs[2] # 用途待补充 + solar = current_obs[3] + load = current_obs[4] + heat = current_obs[5] + people = current_obs[6] # 用途待补充 + + power_gap = solar + self.HST.get_power() - self.EC.current_power - load + heat_gap = self.HST.get_heat() + self.EC.get_heat() - heat # reward = 0.0 - excess_penalty = 0 - deficient_penalty = 0 sell_benefit, buy_cost = 0, 0 - self.excess, self.shedding = 0, 0 - if unbalance >= 0: # 过剩 - if unbalance <= self.grid.exchange_ability: - sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient - else: - sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient - # real unbalance:超电网限值 - self.excess = unbalance - self.grid.exchange_ability - excess_penalty = self.excess * self.penalty_coefficient - else: # unbalance <0, 缺少惩罚 - if abs(unbalance) <= self.grid.exchange_ability: - buy_cost = self.grid.get_cost(price, abs(unbalance)) - else: - buy_cost = self.grid.get_cost(price, self.grid.exchange_ability) - self.shedding = abs(unbalance) - self.grid.exchange_ability - deficient_penalty = self.shedding * self.penalty_coefficient - battery_cost = self.battery.get_cost(self.battery.energy_change) - dg1_cost = self.dg1.get_cost(self.dg1.current_output) - dg2_cost = self.dg2.get_cost(self.dg2.current_output) - dg3_cost = self.dg3.get_cost(self.dg3.current_output) - solar_cost = self.solar.get_cost(self.solar.current_power) - wind_cost = self.wind.gen_cost(self.wind.current_power) + if power_gap >= 0: # 过剩 + sell_benefit = self.grid.get_cost(price, power_gap) * self.sell_coefficient + power_gap = 0 + power_penalty = 0 + else: # 缺少 + power_gap = abs(power_gap) + buy_cost = self.grid.get_cost(price, power_gap) + power_penalty = power_gap * self.penalty_coefficient - self.operation_cost = (battery_cost + dg1_cost + dg2_cost + dg3_cost + solar_cost + wind_cost - + excess_penalty + deficient_penalty - sell_benefit + buy_cost) - reward = - self.operation_cost / 1e3 - self.unbalance = unbalance - self.real_unbalance = self.shedding + self.excess - final_step_outputs = [self.dg1.current_output, self.dg2.current_output, self.dg3.current_output, - self.battery.current_capacity, self.solar.current_power, self.wind.current_power] + if heat_gap >= 0: + heat_gap = 0 + heat_penalty = 0 + else: + heat_gap = abs(heat_gap) + heat_penalty = heat_gap * self.penalty_coefficient + + hst_cost = self.HST.get_cost() + ec_cost = self.EC.get_cost(price) + solar_cost = solar # 待补充 + + economic_cost = hst_cost + ec_cost + solar_cost - sell_benefit + buy_cost + demand_cost = self.heat_a * heat_penalty + self.power_a * power_penalty + eco_benifit = self.EC.less_carbon() - self.grid.get_carbon(power_gap) + reward = (- self.a * demand_cost - self.b * economic_cost + self.c * eco_benifit) / 1e3 + + self.unbalance = (power_gap + heat_gap) / 1e3 + final_step_outputs = [self.HST.current_soc, self.HST.get_power(), self.EC.current_power] self.current_time += 1 finish = (self.current_time == self.episode_length) if finish: @@ -134,7 +140,7 @@ class WgzGym(gym.Env): solar = data_df['solar_power'].to_numpy(dtype=float) temper = data_df['temper'].to_numpy(dtype=float) energy = data_df['energy_demand'].to_numpy(dtype=float) - water = data_df['water_demand'].to_numpy(dtype=float) + heat = data_df['water_demand'].to_numpy(dtype=float) people = data_df['people_count'].to_numpy(dtype=float) price = data_df['price'].to_numpy(dtype=float) @@ -145,9 +151,9 @@ class WgzGym(gym.Env): transformed_e = transform_function(e) add_function(transformed_e) - process_elements(solar, lambda x: x, self.data_manager.add_load_element) - process_elements(temper, lambda x: x, self.data_manager.add_load_element) - process_elements(energy, lambda x: x, self.data_manager.add_irradiance_element) - process_elements(water, lambda x: x, self.data_manager.add_temperature_element) - process_elements(people, lambda x: x, self.data_manager.add_wind_element) + process_elements(solar, lambda x: x, self.data_manager.add_solar_element) + process_elements(temper, lambda x: x, self.data_manager.add_temper_element) + process_elements(energy, lambda x: x, self.data_manager.add_electricity_element) + process_elements(heat, lambda x: x, self.data_manager.add_heat_element) + process_elements(people, lambda x: x, self.data_manager.add_people_element) process_elements(price, lambda x: x, self.data_manager.add_price_element) diff --git a/models/module.py b/models/module.py index 3154fe4..08ef162 100644 --- a/models/module.py +++ b/models/module.py @@ -1,70 +1,96 @@ class EC: def __init__(self, params): - self.current_output = None - self.electricity_efficiency = params['electricity_efficiency'] + self.current_power = None self.hydrogen_produce = params['hydrogen_produce'] self.power_max = params['power_max'] self.power_min = params['power_min'] self.ramp = params['ramp'] self.lifetime = params['lifetime'] self.equipment_cost = params['equipment_cost'] + self.electrolysis_efficiency = params['electrolysis_efficiency'] self.carbon_reduce = params['carbon_reduce'] def step(self, action_ec): - output = self.current_output + action_ec * self.ramp + output = self.current_power + action_ec * self.ramp output = max(self.power_min, min(self.power_max, output)) if output > 0 else 0 - self.current_output = output + self.current_power = output def get_cost(self, price): - return self.equipment_cost / self.lifetime + price * self.current_output + # 成本 = 设备费用 / 生命周期 * 电价 * (用电量 / 最大用电量) + return self.equipment_cost / self.lifetime * price * self.current_power / self.power_max def get_hydrogen(self): - return self.current_output * self.electricity_efficiency * self.hydrogen_produce + return self.current_power * self.electrolysis_efficiency * self.hydrogen_produce + + def get_heat(self): + return self.current_power * (1 - self.electrolysis_efficiency) + + def less_carbon(self): + return self.current_power * self.carbon_reduce def reset(self): - self.current_output = 0 + self.current_power = 0 class HST: def __init__(self, params): - self.current_capacity = None - self.hydrogen_change = None + self.current_soc = None + self.hydrogen_charge = None self.capacity = params['capacity'] self.min_soc = params['min_soc'] self.max_soc = params['max_soc'] - self.degradation = params['degradation'] - self.holding = params['holding'] - self.ramp = params['ramp'] - self.efficiency = params['efficiency'] + self.lifetime = params['lifetime'] + self.equipment_cost = params['equipment_cost'] + self.charge_efficiency = params['charge_efficiency'] + self.generate_efficiency = params['generate_efficiency'] + self.lower_heating_value = params['lower_heating_value'] ''' - 储氢罐的充气速率 = 电解水制氢速率 (电解水制氢会满足热水需求?) + 储氢罐的充气速率 = 电解水制氢速率 (电解水制氢放的热会满足热水需求?) + 如何控制上述待补充 + 储氢罐的放气速率 = 供电 (电价低时多电解,电价高时释放) ''' + def step(self, action_hst): - energy = action_hst * self.ramp - current_energy = self.current_capacity * self.capacity - updated_capacity = max(self.min_soc, min(self.max_soc, (current_energy + energy) / self.capacity)) - self.hydrogen_change = (updated_capacity - self.current_capacity) * self.capacity - self.current_capacity = updated_capacity # update capacity to current state + energy = action_hst * self.capacity + updated_soc = max(self.min_soc, min(self.max_soc, (self.current_soc * self.capacity + energy) / self.capacity)) + self.hydrogen_charge = (updated_soc - self.current_soc) * self.capacity + self.current_soc = updated_soc - def get_cost(self, energy_change): - cost = abs(energy_change) * self.degradation + def get_power(self): + if self.hydrogen_charge > 0: + return self.hydrogen_charge * self.charge_efficiency * self.lower_heating_value * self.generate_efficiency + else: + return 0 + + def get_heat(self): + if self.hydrogen_charge < 0: + return self.hydrogen_charge * self.charge_efficiency * (1 - self.generate_efficiency) + else: + return 0 + + def get_cost(self): + cost = self.equipment_cost / self.lifetime * abs(self.hydrogen_charge) return cost - def SOC(self): - return self.current_capacity - def reset(self): - self.current_capacity = 0.2 + self.current_soc = 0.1 class Grid: def __init__(self): self.delta = 1 - self.exchange_ability = 100 + self.carbon_increace = 0.9 + # self.trade_energy = None - def get_cost(self, current_price, energy_exchange): - return current_price * energy_exchange * self.delta + def get_cost(self, price, trade_energy): + return price * trade_energy * self.delta + + def get_carbon(self, trade_energy): + return trade_energy * self.carbon_increace + + # def step(self, action_grid, ec_power_max): + # self.trade_energy = (action_grid + 1) / 2 * ec_power_max # 反标准化 def retrieve_past_price(self): result = [] diff --git a/models/parameters.py b/models/parameters.py index 9ab1cc8..7321124 100644 --- a/models/parameters.py +++ b/models/parameters.py @@ -1,17 +1,21 @@ EC_parameters = { - 'electrolysis_efficiency': 0.8, 'hydrogen_produce': 0.5, 'power_max': 200, 'power_min': 0, 'ramp': 100, 'lifetime': 6000, # hour 'equipment_cost': 10000, # yuan - 'carbon_reduce': 1, + 'electrolysis_efficiency': 0.8, + 'carbon_reduce': 0.9, } HST_parameters = { 'capacity': 1000, 'min_soc': 0.1, 'max_soc': 0.9, - 'efficiency': 0.95, + 'lifetime': 6000, # hour + 'equipment_cost': 10000, # yuan + 'charge_efficiency': 0.95, + 'generate_efficiency': 0.6, + 'lower_heating_value': 33.33, } diff --git a/models/tools.py b/models/tools.py index 1432d1f..703e011 100644 --- a/models/tools.py +++ b/models/tools.py @@ -2,39 +2,24 @@ import torch def test_one_episode(env, act, device): - """to get evaluate information, here record the unbalance of after taking action""" - record_state = [] - record_action = [] - record_reward = [] - record_unbalance = [] - record_system_info = [] # [time,price,netload,action,real action,soc,output*4,unbalance(exchange+penalty),cost] - record_init_info = [] # include month,day,time,intial soc + """get evaluate information, record the unbalance of after taking action""" + record_system_info = [] # same as observation + record_init_info = [] # include month,day,time env.TRAIN = False state = env.reset() - record_init_info.append([env.month, env.day, env.current_time, env.battery.current_capacity]) - print(f'current testing month is {env.month}, day is {env.day},initial_soc is {env.battery.current_capacity}') + record_init_info.append([env.month, env.day, env.current_time]) + print(f'current testing month is {env.month}, day is {env.day}') for i in range(24): s_tensor = torch.as_tensor((state,), device=device) a_tensor = act(s_tensor) - action = a_tensor.detach().cpu().numpy()[0] # not need detach(), because with torch.no_grad() outside - real_action = action + action = a_tensor.detach().cpu().numpy()[0] state, next_state, reward, done = env.step(action) - - record_system_info.append([state[0], state[1], state[3] + env.wind.current_power, action, real_action, - env.battery.SOC(), env.battery.energy_change, next_state[4], next_state[5], - next_state[6], env.solar.current_power, env.wind.current_power, env.unbalance, - env.operation_cost, reward]) - record_state.append(state) - record_action.append(real_action) - record_reward.append(reward) - record_unbalance.append(env.unbalance) + record_system_info.append([state[1], state[2], env.HST.current_soc(), env.HST.get_power(), + env.EC.current_power, action, reward]) state = next_state - # add information of last step dg1, dh2, dg3, soc, tem, irr - record_system_info[-1][7:12] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2], - env.final_step_outputs[4], env.final_step_outputs[5]] - record_system_info[-1][5] = env.final_step_outputs[3] - record = {'init_info': record_init_info, 'system_info': record_system_info, 'state': record_state, - 'action': record_action, 'reward': record_reward, 'unbalance': record_unbalance} + # add information of last step EC, HST.current_soc, HST.power, grid + record_system_info[-1][2:5] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2]] + record = {'init_info': record_init_info, 'system_info': record_system_info} return record @@ -49,7 +34,7 @@ def get_episode_return(env, act, device): state, next_state, reward, done, = env.step(action) state = next_state episode_reward += reward - episode_unbalance += env.real_unbalance + episode_unbalance += env.unbalance if done: break return episode_reward, episode_unbalance diff --git a/train.py b/train.py index c1403df..d70cdae 100644 --- a/train.py +++ b/train.py @@ -3,11 +3,12 @@ import pickle os.environ['OMP_WAIT_POLICY'] = 'PASSIVE' # 确保在pytorch前设置 from copy import deepcopy -import pandas as pd +import numpy as np +import torch import torch.nn.functional as F from models.env import WgzGym from models.net import ActorPPO, CriticAdv -from models.tools import get_episode_return, test_one_episode +from models.tools import get_episode_return def smooth_rewards(rewards, window=10): @@ -170,8 +171,6 @@ class Arguments: def __init__(self, agent=None, env=None): self.agent = agent self.env = env - self.cwd = None # current work directory. None means set automatically - self.if_remove = False # remove the cwd folder? (True, False, None:ask me) self.visible_gpu = '0' # os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,' self.num_threads = 32 # cpu_num for evaluate model @@ -193,14 +192,8 @@ class Arguments: self.random_seed_list = [1234] self.train = True self.save_network = True - self.test_network = True - self.save_test_data = True def init_before_training(self): - if self.cwd is None: - agent_name = self.agent.__class__.__name__ - self.cwd = f'./{agent_name}' - np.random.seed(self.random_seed) torch.manual_seed(self.random_seed) torch.set_num_threads(self.num_threads) @@ -216,7 +209,6 @@ if __name__ == '__main__': for seed in args.random_seed_list: args.random_seed = seed args.agent = AgentPPO() - agent_name = f'{args.agent.__class__.__name__}' args.agent.cri_target = True args.env = WgzGym() args.init_before_training() @@ -225,9 +217,9 @@ if __name__ == '__main__': env = args.env agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate) gamma = args.gamma - batch_size = args.batch_size # data used to update net - target_step = args.target_step # steps of one episode should stop - repeat_times = args.repeat_times # times should update for one batch size data + batch_size = args.batch_size + target_step = args.target_step + repeat_times = args.repeat_times soft_update_tau = args.soft_update_tau num_episode = args.num_episode agent.state = env.reset() @@ -236,8 +228,6 @@ if __name__ == '__main__': '''init training params''' # args.train = False # args.save_network = False - # args.test_network = False - # args.save_test_data = False if args.train: for i_episode in range(num_episode): with torch.no_grad(): @@ -255,9 +245,9 @@ if __name__ == '__main__': reward_record['unbalance'].append(episode_unbalance) print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}') - act_save_path = f'{args.cwd}/actor.pth' - loss_record_path = f'{args.cwd}/loss.pkl' - reward_record_path = f'{args.cwd}/reward.pkl' + act_save_path = './data/actor.pth' + loss_record_path = './data/loss.pkl' + reward_record_path = './data/reward.pkl' if args.save_network: with open(loss_record_path, 'wb') as tf: @@ -266,16 +256,3 @@ if __name__ == '__main__': pickle.dump(reward_record, tf) torch.save(agent.act.state_dict(), act_save_path) print('actor params have been saved') - - if args.test_network: - args.cwd = agent_name - agent.act.load_state_dict(torch.load(act_save_path)) - print('params have been reload and test') - record = test_one_episode(env, agent.act, agent.device) - eval_data = pd.DataFrame(record['system_info']) - eval_data.columns = ['time_step', 'price', 'load', 'action', 'real_action', 'soc', 'battery', - 'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward'] - if args.save_test_data: - test_data_save_path = f'{args.cwd}/test.pkl' - with open(test_data_save_path, 'wb') as tf: - pickle.dump(record, tf)