import json import os import gurobipy as gp import numpy as np import pandas as pd import torch from gurobipy import GRB def optimization_base_result(env, month, day, initial_soc=0.2): price = env.data_manager.get_series_price_data(month, day) load = env.data_manager.get_series_load_cons_data(month, day) temperature = env.data_manager.get_series_temperature_data(month, day) irradiance = env.data_manager.get_series_irradiance_data(month, day) wind_speed = env.data_manager.get_series_wind_data(month, day) period = env.episode_length DG_parameters = env.dg_parameters def get_dg_info(parameters): p_max = [] p_min = [] ramping_up = [] ramping_down = [] a_para = [] b_para = [] c_para = [] for name, gen_info in parameters.items(): p_max.append(gen_info['power_output_max']) p_min.append(gen_info['power_output_min']) ramping_up.append(gen_info['ramping_up']) ramping_down.append(gen_info['ramping_down']) a_para.append(gen_info['a']) b_para.append(gen_info['b']) c_para.append(gen_info['c']) return p_max, p_min, ramping_up, ramping_down, a_para, b_para, c_para p_max, p_min, ramping_up, ramping_down, a_para, b_para, c_para = get_dg_info(parameters=DG_parameters) NUM_GEN = len(DG_parameters.keys()) battery_capacity = env.battery.capacity battery_efficiency = env.battery.efficiency solar_cofficient = env.solar.opex_cofficient wind_cofficient = env.wind.opex_cofficient battery_degradation = env.battery.degradation battery_holding = env.battery.holding m = gp.Model("UC") m.setParam('LogToConsole', 0) # 设置系统变量 on_off = m.addVars(NUM_GEN, period, vtype=GRB.BINARY, name='on_off') gen_output = m.addVars(NUM_GEN, period, vtype=GRB.CONTINUOUS, name='output') # 设置充放电约束 soc = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0.2, ub=0.8, name='SOC') battery_energy_change = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-env.battery.max_charge, ub=env.battery.max_charge, name='battery_action') # 设置外部电网与能源系统交换约束 grid_energy_import = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=env.grid.exchange_ability, name='import') grid_energy_export = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=env.grid.exchange_ability, name='export') # 设置电压控制约束 pv_voltage = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name='pv_voltage') # 计算光伏和风力发电量 pv = [(0.2 * irradiance[t] + 0.05 * temperature[t] - 9.25) * (1 + pv_voltage[t]) for t in range(period)] wind = [172.265625 * wind_speed[t] ** 3 / 1e3 if 3 <= wind_speed[t] < 8 else (172.265625 * 8 ** 3 / 1e3 if 8 <= wind_speed[t] < 12 else 0) for t in range(period)] # 1. 添加平衡约束 m.addConstrs(((sum(gen_output[g, t] for g in range(NUM_GEN)) + pv[t] + wind[t] + grid_energy_import[t] >= load[t] + battery_energy_change[t] + grid_energy_export[t]) for t in range(period)), name='powerbalance') # 2. 添加发电机最大/最小功率约束 m.addConstrs((gen_output[g, t] <= on_off[g, t] * p_max[g] for g in range(NUM_GEN) for t in range(period)), 'gen_output_max') m.addConstrs((gen_output[g, t] >= on_off[g, t] * p_min[g] for g in range(NUM_GEN) for t in range(period)), 'gen_output_min') # 3. 添加上升和下降约束 m.addConstrs((gen_output[g, t + 1] - gen_output[g, t] <= ramping_up[g] for g in range(NUM_GEN) for t in range(period - 1)), 'ramping_up') m.addConstrs((gen_output[g, t] - gen_output[g, t + 1] <= ramping_down[g] for g in range(NUM_GEN) for t in range(period - 1)), 'ramping_down') # 4. 添加电池容量约束 m.addConstr(battery_capacity * soc[0] == battery_capacity * initial_soc + (battery_energy_change[0] * battery_efficiency), name='soc0') m.addConstrs((battery_capacity * soc[t] == battery_capacity * soc[t - 1] + (battery_energy_change[t] * battery_efficiency) for t in range(1, period)), name='soc update') # 设置成本函数 cost_gen = gp.quicksum( (a_para[g] * gen_output[g, t] * gen_output[g, t] + b_para[g] * gen_output[g, t] + c_para[g] * on_off[g, t]) for t in range(period) for g in range(NUM_GEN)) cost_battery = gp.quicksum( battery_energy_change[t] * battery_degradation + soc[t] * battery_holding for t in range(period)) cost_import = gp.quicksum(grid_energy_import[t] * price[t] for t in range(period)) cost_export = gp.quicksum(grid_energy_export[t] * price[t] * env.sell_coefficient for t in range(period)) cost_solar = gp.quicksum(pv[t] * solar_cofficient for t in range(period)) cost_wind = gp.quicksum(wind[t] * wind_cofficient for t in range(period)) m.setObjective((cost_gen + cost_battery + cost_import - cost_export + cost_solar + cost_wind), GRB.MINIMIZE) m.optimize() # 记录数据便于绘图 pv = [(0.2 * irradiance[t] + 0.05 * temperature[t] - 9.25) * (1 + pv_voltage[t].x) for t in range(period)] output_record = {'pv': [], 'wind': [], 'price': [], 'load': [], 'netload': [], 'soc': [], 'battery_energy_change': [], 'grid_import': [], 'grid_export': [], 'gen1': [], 'gen2': [], 'gen3': [], 'step_cost': []} for t in range(period): gen_cost = sum((on_off[g, t].x * ( a_para[g] * gen_output[g, t].x * gen_output[g, t].x + b_para[g] * gen_output[g, t].x + c_para[g])) for g in range(NUM_GEN)) battery_cost = battery_energy_change[t].x * battery_degradation + soc[t].x * battery_holding import_cost = grid_energy_import[t].x * price[t] export_cost = grid_energy_export[t].x * price[t] * env.sell_coefficient solar_cost = pv[t] * solar_cofficient wind_cost = wind[t] * wind_cofficient output_record['pv'].append(pv[t]) output_record['wind'].append(wind[t]) output_record['price'].append(price[t]) output_record['load'].append(load[t]) output_record['netload'].append(load[t] - pv[t] - wind[t]) output_record['soc'].append(soc[t].x) output_record['battery_energy_change'].append(battery_energy_change[t].x) output_record['grid_import'].append(grid_energy_import[t].x) output_record['grid_export'].append(grid_energy_export[t].x) output_record['gen1'].append(gen_output[0, t].x) output_record['gen2'].append(gen_output[1, t].x) output_record['gen3'].append(gen_output[2, t].x) output_record['step_cost'].append(gen_cost + battery_cost + import_cost - export_cost + solar_cost + wind_cost) output_record_df = pd.DataFrame.from_dict(output_record) return output_record_df class Arguments: """revise here for our own purpose""" def __init__(self, agent=None, env=None): self.agent = agent self.env = env self.cwd = None # current work directory, None means set automatically self.if_remove = False # remove the cwd folder? (True, False, None:ask me) self.visible_gpu = '0,1,2,3' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,' self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads) '''Arguments for training''' self.num_episode = 1000 self.gamma = 0.995 # discount factor of future rewards self.learning_rate = 2 ** -14 # 2e-4 self.soft_update_tau = 2 ** -8 # 2 ** -8 ~= 5e-3 self.net_dim = 256 # the network width 256 self.batch_size = 4096 # num of transitions sampled from replay buffer. self.repeat_times = 2 ** 5 # repeatedly update network to keep critic's loss small self.target_step = 4096 # collect target_step experiences, then update network, 1024 self.max_memo = 500000 # capacity of replay buffer self.if_per_or_gae = False # PER for off-policy sparse reward: Prioritized Experience Replay. '''Arguments for evaluate''' self.random_seed = 0 # initialize random seed in self.init_before_training() # self.random_seed_list = [1234, 2234, 3234, 4234, 5234] self.random_seed_list = [1234] '''Arguments for save and plot issues''' self.train = True self.save_network = True self.test_network = True self.save_test_data = True self.compare_with_gurobi = True self.plot_on = True def init_before_training(self, if_main): if self.cwd is None: agent_name = self.agent.__class__.__name__ self.cwd = f'./{agent_name}' if if_main: import shutil # remove history according to bool(if_remove) if self.if_remove is None: self.if_remove = bool(input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y') elif self.if_remove: shutil.rmtree(self.cwd, ignore_errors=True) print(f"| Remove cwd: {self.cwd}") os.makedirs(self.cwd, exist_ok=True) np.random.seed(self.random_seed) torch.manual_seed(self.random_seed) torch.set_num_threads(self.num_threads) torch.set_default_dtype(torch.float32) os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu) def test_one_episode(env, act, device): """to get evaluate information, here record the unbalance of after taking action""" record_state = [] record_action = [] record_reward = [] record_unbalance = [] record_system_info = [] # [time,price,netload,action,real action,soc,output*4,unbalance(exchange+penalty),cost] record_init_info = [] # include month,day,time,intial soc env.TRAIN = False state = env.reset() record_init_info.append([env.month, env.day, env.current_time, env.battery.current_capacity]) print(f'current testing month is {env.month}, day is {env.day},initial_soc is {env.battery.current_capacity}') for i in range(24): s_tensor = torch.as_tensor((state,), device=device) a_tensor = act(s_tensor) action = a_tensor.detach().cpu().numpy()[0] # not need detach(), because with torch.no_grad() outside real_action = action state, next_state, reward, done = env.step(action) record_system_info.append([state[0], state[1], state[3] + env.wind.current_power, action, real_action, env.battery.SOC(), env.battery.energy_change, next_state[4], next_state[5], next_state[6], env.solar.current_power, env.wind.current_power, env.unbalance, env.operation_cost, reward]) record_state.append(state) record_action.append(real_action) record_reward.append(reward) record_unbalance.append(env.unbalance) state = next_state # add information of last step dg1, dh2, dg3, soc, tem, irr record_system_info[-1][7:12] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2], env.final_step_outputs[4], env.final_step_outputs[5]] record_system_info[-1][5] = env.final_step_outputs[3] record = {'init_info': record_init_info, 'system_info': record_system_info, 'state': record_state, 'action': record_action, 'reward': record_reward, 'unbalance': record_unbalance} return record def get_episode_return(env, act, device): episode_reward = 0.0 # sum of rewards in an episode episode_unbalance = 0.0 state = env.reset() for i in range(24): s_tensor = torch.as_tensor((state,), device=device) a_tensor = act(s_tensor) action = a_tensor.detach().cpu().numpy()[0] # not need detach(), because with torch.no_grad() outside state, next_state, reward, done, = env.step(action) state = next_state episode_reward += reward episode_unbalance += env.real_unbalance if done: break return episode_reward, episode_unbalance class ReplayBuffer: def __init__(self, max_len, state_dim, action_dim, gpu_id=0): self.now_len = 0 self.next_idx = 0 self.if_full = False self.max_len = max_len self.data_type = torch.float32 self.action_dim = action_dim self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu") other_dim = 1 + 1 + self.action_dim self.buf_other = torch.empty(size=(max_len, other_dim), dtype=self.data_type, device=self.device) if isinstance(state_dim, int): self.buf_state = torch.empty((max_len, state_dim), dtype=torch.float32, device=self.device) elif isinstance(state_dim, tuple): self.buf_state = torch.empty((max_len, *state_dim), dtype=torch.uint8, device=self.device) else: raise ValueError('state_dim') def extend_buffer(self, state, other): # CPU array to CPU array size = len(other) next_idx = self.next_idx + size if next_idx > self.max_len: self.buf_state[self.next_idx:self.max_len] = state[:self.max_len - self.next_idx] self.buf_other[self.next_idx:self.max_len] = other[:self.max_len - self.next_idx] self.if_full = True next_idx = next_idx - self.max_len self.buf_state[0:next_idx] = state[-next_idx:] self.buf_other[0:next_idx] = other[-next_idx:] else: self.buf_state[self.next_idx:next_idx] = state self.buf_other[self.next_idx:next_idx] = other self.next_idx = next_idx def sample_batch(self, batch_size) -> tuple: indices = np.random.randint(self.now_len - 1, size=batch_size) r_m_a = self.buf_other[indices] return (r_m_a[:, 0:1], r_m_a[:, 1:2], r_m_a[:, 2:], self.buf_state[indices], self.buf_state[indices + 1]) def update_now_len(self): self.now_len = self.max_len if self.if_full else self.next_idx