291 lines
14 KiB
Python
291 lines
14 KiB
Python
import json
|
|
import os
|
|
import gurobipy as gp
|
|
import numpy as np
|
|
import pandas as pd
|
|
import torch
|
|
from gurobipy import GRB
|
|
|
|
|
|
def optimization_base_result(env, month, day, initial_soc=0.2):
|
|
price = env.data_manager.get_series_price_data(month, day)
|
|
load = env.data_manager.get_series_load_cons_data(month, day)
|
|
temperature = env.data_manager.get_series_temperature_data(month, day)
|
|
irradiance = env.data_manager.get_series_irradiance_data(month, day)
|
|
wind_speed = env.data_manager.get_series_wind_data(month, day)
|
|
|
|
period = env.episode_length
|
|
DG_parameters = env.dg_parameters
|
|
|
|
def get_dg_info(parameters):
|
|
p_max = []
|
|
p_min = []
|
|
ramping_up = []
|
|
ramping_down = []
|
|
a_para = []
|
|
b_para = []
|
|
c_para = []
|
|
for name, gen_info in parameters.items():
|
|
p_max.append(gen_info['power_output_max'])
|
|
p_min.append(gen_info['power_output_min'])
|
|
ramping_up.append(gen_info['ramping_up'])
|
|
ramping_down.append(gen_info['ramping_down'])
|
|
a_para.append(gen_info['a'])
|
|
b_para.append(gen_info['b'])
|
|
c_para.append(gen_info['c'])
|
|
return p_max, p_min, ramping_up, ramping_down, a_para, b_para, c_para
|
|
|
|
p_max, p_min, ramping_up, ramping_down, a_para, b_para, c_para = get_dg_info(parameters=DG_parameters)
|
|
NUM_GEN = len(DG_parameters.keys())
|
|
battery_capacity = env.battery.capacity
|
|
battery_efficiency = env.battery.efficiency
|
|
solar_cofficient = env.solar.opex_cofficient
|
|
wind_cofficient = env.wind.opex_cofficient
|
|
battery_degradation = env.battery.degradation
|
|
battery_holding = env.battery.holding
|
|
|
|
m = gp.Model("UC")
|
|
m.setParam('LogToConsole', 0)
|
|
|
|
# 设置系统变量
|
|
on_off = m.addVars(NUM_GEN, period, vtype=GRB.BINARY, name='on_off')
|
|
gen_output = m.addVars(NUM_GEN, period, vtype=GRB.CONTINUOUS, name='output')
|
|
# 设置充放电约束
|
|
soc = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0.2, ub=0.8, name='SOC')
|
|
battery_energy_change = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-env.battery.max_charge,
|
|
ub=env.battery.max_charge, name='battery_action')
|
|
# 设置外部电网与能源系统交换约束
|
|
grid_energy_import = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=env.grid.exchange_ability, name='import')
|
|
grid_energy_export = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=env.grid.exchange_ability, name='export')
|
|
# 设置电压控制约束
|
|
pv_voltage = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name='pv_voltage')
|
|
|
|
# 计算光伏和风力发电量
|
|
pv = [(0.2 * irradiance[t] + 0.05 * temperature[t] - 9.25) * (1 + pv_voltage[t]) for t in range(period)]
|
|
wind = [172.265625 * wind_speed[t] ** 3 / 1e3 if 3 <= wind_speed[t] < 8
|
|
else (172.265625 * 8 ** 3 / 1e3 if 8 <= wind_speed[t] < 12 else 0) for t in range(period)]
|
|
|
|
# 1. 添加平衡约束
|
|
m.addConstrs(((sum(gen_output[g, t] for g in range(NUM_GEN)) + pv[t] + wind[t] + grid_energy_import[t] >= load[t]
|
|
+ battery_energy_change[t] + grid_energy_export[t]) for t in range(period)), name='powerbalance')
|
|
# 2. 添加发电机最大/最小功率约束
|
|
m.addConstrs((gen_output[g, t] <= on_off[g, t] * p_max[g] for g in range(NUM_GEN) for t in range(period)),
|
|
'gen_output_max')
|
|
m.addConstrs((gen_output[g, t] >= on_off[g, t] * p_min[g] for g in range(NUM_GEN) for t in range(period)),
|
|
'gen_output_min')
|
|
# 3. 添加上升和下降约束
|
|
m.addConstrs((gen_output[g, t + 1] - gen_output[g, t] <= ramping_up[g] for g in range(NUM_GEN)
|
|
for t in range(period - 1)), 'ramping_up')
|
|
m.addConstrs((gen_output[g, t] - gen_output[g, t + 1] <= ramping_down[g] for g in range(NUM_GEN)
|
|
for t in range(period - 1)), 'ramping_down')
|
|
# 4. 添加电池容量约束
|
|
m.addConstr(battery_capacity * soc[0] == battery_capacity * initial_soc +
|
|
(battery_energy_change[0] * battery_efficiency), name='soc0')
|
|
m.addConstrs((battery_capacity * soc[t] == battery_capacity * soc[t - 1] +
|
|
(battery_energy_change[t] * battery_efficiency) for t in range(1, period)), name='soc update')
|
|
# 设置成本函数
|
|
cost_gen = gp.quicksum(
|
|
(a_para[g] * gen_output[g, t] * gen_output[g, t] + b_para[g] * gen_output[g, t] + c_para[g] * on_off[g, t])
|
|
for t in range(period) for g in range(NUM_GEN))
|
|
cost_battery = gp.quicksum(
|
|
battery_energy_change[t] * battery_degradation + soc[t] * battery_holding for t in range(period))
|
|
cost_import = gp.quicksum(grid_energy_import[t] * price[t] for t in range(period))
|
|
cost_export = gp.quicksum(grid_energy_export[t] * price[t] * env.sell_coefficient for t in range(period))
|
|
cost_solar = gp.quicksum(pv[t] * solar_cofficient for t in range(period))
|
|
cost_wind = gp.quicksum(wind[t] * wind_cofficient for t in range(period))
|
|
|
|
m.setObjective((cost_gen + cost_battery + cost_import - cost_export + cost_solar + cost_wind), GRB.MINIMIZE)
|
|
m.optimize()
|
|
|
|
# 记录数据便于绘图
|
|
pv = [(0.2 * irradiance[t] + 0.05 * temperature[t] - 9.25) * (1 + pv_voltage[t].x) for t in range(period)]
|
|
output_record = {'pv': [], 'wind': [], 'price': [], 'load': [], 'netload': [],
|
|
'soc': [], 'battery_energy_change': [], 'grid_import': [], 'grid_export': [],
|
|
'gen1': [], 'gen2': [], 'gen3': [], 'step_cost': []}
|
|
for t in range(period):
|
|
gen_cost = sum((on_off[g, t].x * (
|
|
a_para[g] * gen_output[g, t].x * gen_output[g, t].x + b_para[g] * gen_output[g, t].x + c_para[g]))
|
|
for g in range(NUM_GEN))
|
|
battery_cost = battery_energy_change[t].x * battery_degradation + soc[t].x * battery_holding
|
|
import_cost = grid_energy_import[t].x * price[t]
|
|
export_cost = grid_energy_export[t].x * price[t] * env.sell_coefficient
|
|
solar_cost = pv[t] * solar_cofficient
|
|
wind_cost = wind[t] * wind_cofficient
|
|
output_record['pv'].append(pv[t])
|
|
output_record['wind'].append(wind[t])
|
|
output_record['price'].append(price[t])
|
|
output_record['load'].append(load[t])
|
|
output_record['netload'].append(load[t] - pv[t] - wind[t])
|
|
output_record['soc'].append(soc[t].x)
|
|
output_record['battery_energy_change'].append(battery_energy_change[t].x)
|
|
output_record['grid_import'].append(grid_energy_import[t].x)
|
|
output_record['grid_export'].append(grid_energy_export[t].x)
|
|
output_record['gen1'].append(gen_output[0, t].x)
|
|
output_record['gen2'].append(gen_output[1, t].x)
|
|
output_record['gen3'].append(gen_output[2, t].x)
|
|
output_record['step_cost'].append(gen_cost + battery_cost + import_cost - export_cost + solar_cost + wind_cost)
|
|
|
|
output_record_df = pd.DataFrame.from_dict(output_record)
|
|
return output_record_df
|
|
|
|
|
|
class Arguments:
|
|
"""revise here for our own purpose"""
|
|
|
|
def __init__(self, agent=None, env=None):
|
|
self.agent = agent
|
|
self.env = env
|
|
self.cwd = None # current work directory, None means set automatically
|
|
self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
|
|
self.visible_gpu = '0,1,2,3' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
|
|
self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
|
|
|
|
'''Arguments for training'''
|
|
self.num_episode = 1000
|
|
self.gamma = 0.995 # discount factor of future rewards
|
|
self.learning_rate = 2 ** -14 # 2e-4
|
|
self.soft_update_tau = 2 ** -8 # 2 ** -8 ~= 5e-3
|
|
self.net_dim = 256 # the network width 256
|
|
self.batch_size = 4096 # num of transitions sampled from replay buffer.
|
|
self.repeat_times = 2 ** 5 # repeatedly update network to keep critic's loss small
|
|
self.target_step = 4096 # collect target_step experiences, then update network, 1024
|
|
self.max_memo = 500000 # capacity of replay buffer
|
|
self.if_per_or_gae = False # PER for off-policy sparse reward: Prioritized Experience Replay.
|
|
|
|
'''Arguments for evaluate'''
|
|
self.random_seed = 0 # initialize random seed in self.init_before_training()
|
|
# self.random_seed_list = [1234, 2234, 3234, 4234, 5234]
|
|
self.random_seed_list = [1234]
|
|
'''Arguments for save and plot issues'''
|
|
self.train = True
|
|
self.save_network = True
|
|
self.test_network = True
|
|
self.save_test_data = True
|
|
self.compare_with_gurobi = True
|
|
self.plot_on = True
|
|
|
|
def init_before_training(self, if_main):
|
|
if self.cwd is None:
|
|
agent_name = self.agent.__class__.__name__
|
|
self.cwd = f'./{agent_name}'
|
|
|
|
if if_main:
|
|
import shutil # remove history according to bool(if_remove)
|
|
if self.if_remove is None:
|
|
self.if_remove = bool(input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y')
|
|
elif self.if_remove:
|
|
shutil.rmtree(self.cwd, ignore_errors=True)
|
|
print(f"| Remove cwd: {self.cwd}")
|
|
os.makedirs(self.cwd, exist_ok=True)
|
|
|
|
np.random.seed(self.random_seed)
|
|
torch.manual_seed(self.random_seed)
|
|
torch.set_num_threads(self.num_threads)
|
|
torch.set_default_dtype(torch.float32)
|
|
|
|
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
|
|
|
|
|
|
def test_one_episode(env, act, device):
|
|
"""to get evaluate information, here record the unbalance of after taking action"""
|
|
record_state = []
|
|
record_action = []
|
|
record_reward = []
|
|
record_unbalance = []
|
|
record_system_info = [] # [time,price,netload,action,real action,soc,output*4,unbalance(exchange+penalty),cost]
|
|
record_init_info = [] # include month,day,time,intial soc
|
|
env.TRAIN = False
|
|
state = env.reset()
|
|
record_init_info.append([env.month, env.day, env.current_time, env.battery.current_capacity])
|
|
print(f'current testing month is {env.month}, day is {env.day},initial_soc is {env.battery.current_capacity}')
|
|
for i in range(24):
|
|
s_tensor = torch.as_tensor((state,), device=device)
|
|
a_tensor = act(s_tensor)
|
|
action = a_tensor.detach().cpu().numpy()[0] # not need detach(), because with torch.no_grad() outside
|
|
real_action = action
|
|
state, next_state, reward, done = env.step(action)
|
|
|
|
record_system_info.append([state[0], state[1], state[3] + env.wind.current_power, action, real_action,
|
|
env.battery.SOC(), env.battery.energy_change, next_state[4], next_state[5],
|
|
next_state[6], env.solar.current_power, env.wind.current_power, env.unbalance,
|
|
env.operation_cost, reward])
|
|
record_state.append(state)
|
|
record_action.append(real_action)
|
|
record_reward.append(reward)
|
|
record_unbalance.append(env.unbalance)
|
|
state = next_state
|
|
# add information of last step dg1, dh2, dg3, soc, tem, irr
|
|
record_system_info[-1][7:12] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2],
|
|
env.final_step_outputs[4], env.final_step_outputs[5]]
|
|
record_system_info[-1][5] = env.final_step_outputs[3]
|
|
record = {'init_info': record_init_info, 'system_info': record_system_info, 'state': record_state,
|
|
'action': record_action, 'reward': record_reward, 'unbalance': record_unbalance}
|
|
return record
|
|
|
|
|
|
def get_episode_return(env, act, device):
|
|
episode_reward = 0.0 # sum of rewards in an episode
|
|
episode_unbalance = 0.0
|
|
state = env.reset()
|
|
for i in range(24):
|
|
s_tensor = torch.as_tensor((state,), device=device)
|
|
a_tensor = act(s_tensor)
|
|
action = a_tensor.detach().cpu().numpy()[0] # not need detach(), because with torch.no_grad() outside
|
|
state, next_state, reward, done, = env.step(action)
|
|
state = next_state
|
|
episode_reward += reward
|
|
episode_unbalance += env.real_unbalance
|
|
if done:
|
|
break
|
|
return episode_reward, episode_unbalance
|
|
|
|
|
|
class ReplayBuffer:
|
|
def __init__(self, max_len, state_dim, action_dim, gpu_id=0):
|
|
self.now_len = 0
|
|
self.next_idx = 0
|
|
self.if_full = False
|
|
self.max_len = max_len
|
|
self.data_type = torch.float32
|
|
self.action_dim = action_dim
|
|
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
|
|
|
|
other_dim = 1 + 1 + self.action_dim
|
|
self.buf_other = torch.empty(size=(max_len, other_dim), dtype=self.data_type, device=self.device)
|
|
|
|
if isinstance(state_dim, int):
|
|
self.buf_state = torch.empty((max_len, state_dim), dtype=torch.float32, device=self.device)
|
|
elif isinstance(state_dim, tuple):
|
|
self.buf_state = torch.empty((max_len, *state_dim), dtype=torch.uint8, device=self.device)
|
|
else:
|
|
raise ValueError('state_dim')
|
|
|
|
def extend_buffer(self, state, other): # CPU array to CPU array
|
|
size = len(other)
|
|
next_idx = self.next_idx + size
|
|
|
|
if next_idx > self.max_len:
|
|
self.buf_state[self.next_idx:self.max_len] = state[:self.max_len - self.next_idx]
|
|
self.buf_other[self.next_idx:self.max_len] = other[:self.max_len - self.next_idx]
|
|
self.if_full = True
|
|
|
|
next_idx = next_idx - self.max_len
|
|
self.buf_state[0:next_idx] = state[-next_idx:]
|
|
self.buf_other[0:next_idx] = other[-next_idx:]
|
|
else:
|
|
self.buf_state[self.next_idx:next_idx] = state
|
|
self.buf_other[self.next_idx:next_idx] = other
|
|
self.next_idx = next_idx
|
|
|
|
def sample_batch(self, batch_size) -> tuple:
|
|
indices = np.random.randint(self.now_len - 1, size=batch_size)
|
|
r_m_a = self.buf_other[indices]
|
|
return (r_m_a[:, 0:1],
|
|
r_m_a[:, 1:2],
|
|
r_m_a[:, 2:],
|
|
self.buf_state[indices],
|
|
self.buf_state[indices + 1])
|
|
|
|
def update_now_len(self):
|
|
self.now_len = self.max_len if self.if_full else self.next_idx
|