building-agents/tools.py

307 lines
15 KiB
Python

import json
import os
import gurobipy as gp
import numpy as np
import numpy.random as rd
import pandas as pd
import torch
from gurobipy import GRB
def optimization_base_result(env, month, day, initial_soc):
price = env.data_manager.get_series_price_data(month, day)
load = env.data_manager.get_series_load_cons_data(month, day)
temperature = env.data_manager.get_series_temperature_data(month, day)
irradiance = env.data_manager.get_series_irradiance_data(month, day)
wind_speed = env.data_manager.get_series_wind_data(month, day)
period = env.episode_length
DG_parameters = env.dg_parameters
def get_dg_info(parameters):
p_max = []
p_min = []
ramping_up = []
ramping_down = []
a_para = []
b_para = []
c_para = []
for name, gen_info in parameters.items():
p_max.append(gen_info['power_output_max'])
p_min.append(gen_info['power_output_min'])
ramping_up.append(gen_info['ramping_up'])
ramping_down.append(gen_info['ramping_down'])
a_para.append(gen_info['a'])
b_para.append(gen_info['b'])
c_para.append(gen_info['c'])
return p_max, p_min, ramping_up, ramping_down, a_para, b_para, c_para
p_max, p_min, ramping_up, ramping_down, a_para, b_para, c_para = get_dg_info(parameters=DG_parameters)
NUM_GEN = len(DG_parameters.keys())
battery_capacity = env.battery.capacity
battery_efficiency = env.battery.efficiency
solar_cofficient = env.solar.opex_cofficient
wind_cofficient = env.wind.opex_cofficient
battery_degradation = env.battery.degradation
battery_holding = env.battery.holding
m = gp.Model("UC")
# 设置系统变量
on_off = m.addVars(NUM_GEN, period, vtype=GRB.BINARY, name='on_off')
gen_output = m.addVars(NUM_GEN, period, vtype=GRB.CONTINUOUS, name='output')
# 设置充放电约束
soc = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0.2, ub=0.8, name='SOC')
battery_energy_change = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-env.battery.max_charge,
ub=env.battery.max_charge, name='battery_action')
# 设置外部电网与能源系统交换约束
grid_energy_import = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=env.grid.exchange_ability, name='import')
grid_energy_export = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=env.grid.exchange_ability, name='export')
# 设置电压控制约束
pv_voltage = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name='pv_voltage')
# 计算光伏和风力发电量
pv = [(0.2 * irradiance[t] + 0.05 * temperature[t] - 9.25) * (1 + pv_voltage[t]) for t in range(period)]
wind = [172.265625 * wind_speed[t] ** 3 / 1e3 if 3 <= wind_speed[t] < 8
else (172.265625 * 8 ** 3 / 1e3 if 8 <= wind_speed[t] < 12 else 0) for t in range(period)]
# 1. 添加平衡约束
m.addConstrs(((sum(gen_output[g, t] for g in range(NUM_GEN)) + pv[t] + wind[t] + grid_energy_import[t] >= load[t]
+ battery_energy_change[t] + grid_energy_export[t]) for t in range(period)), name='powerbalance')
# 2. 添加发电机最大/最小功率约束
m.addConstrs((gen_output[g, t] <= on_off[g, t] * p_max[g] for g in range(NUM_GEN) for t in range(period)),
'gen_output_max')
m.addConstrs((gen_output[g, t] >= on_off[g, t] * p_min[g] for g in range(NUM_GEN) for t in range(period)),
'gen_output_min')
# 3. 添加上升和下降约束
m.addConstrs((gen_output[g, t + 1] - gen_output[g, t] <= ramping_up[g] for g in range(NUM_GEN)
for t in range(period - 1)), 'ramping_up')
m.addConstrs((gen_output[g, t] - gen_output[g, t + 1] <= ramping_down[g] for g in range(NUM_GEN)
for t in range(period - 1)), 'ramping_down')
# 4. 添加电池容量约束
m.addConstr(battery_capacity * soc[0] == battery_capacity * initial_soc +
(battery_energy_change[0] * battery_efficiency), name='soc0')
m.addConstrs((battery_capacity * soc[t] == battery_capacity * soc[t - 1] +
(battery_energy_change[t] * battery_efficiency) for t in range(1, period)), name='soc update')
# 设置成本函数
cost_gen = gp.quicksum(
(a_para[g] * gen_output[g, t] * gen_output[g, t] + b_para[g] * gen_output[g, t] + c_para[g] * on_off[g, t])
for t in range(period) for g in range(NUM_GEN))
cost_battery = gp.quicksum(
battery_energy_change[t] * battery_degradation + soc[t] * battery_holding for t in range(period))
cost_import = gp.quicksum(grid_energy_import[t] * price[t] for t in range(period))
cost_export = gp.quicksum(grid_energy_export[t] * price[t] * env.sell_coefficient for t in range(period))
cost_solar = gp.quicksum(pv[t] * solar_cofficient for t in range(period))
cost_wind = gp.quicksum(wind[t] * wind_cofficient for t in range(period))
m.setObjective((cost_gen + cost_battery + cost_import - cost_export + cost_solar + cost_wind), GRB.MINIMIZE)
m.optimize()
# 记录数据便于绘图
pv = [(0.2 * irradiance[t] + 0.05 * temperature[t] - 9.25) * (1 + pv_voltage[t].x) for t in range(period)]
output_record = {'pv': [], 'wind': [], 'price': [], 'load': [], 'netload': [],
'soc': [], 'battery_energy_change': [], 'grid_import': [], 'grid_export': [],
'gen1': [], 'gen2': [], 'gen3': [], 'step_cost': []}
for t in range(period):
gen_cost = sum((on_off[g, t].x * (
a_para[g] * gen_output[g, t].x * gen_output[g, t].x + b_para[g] * gen_output[g, t].x + c_para[g]))
for g in range(NUM_GEN))
battery_cost = battery_energy_change[t].x * battery_degradation + soc[t].x * battery_holding
import_cost = grid_energy_import[t].x * price[t]
export_cost = grid_energy_export[t].x * price[t] * env.sell_coefficient
solar_cost = pv[t] * solar_cofficient
wind_cost = wind[t] * wind_cofficient
output_record['pv'].append(pv[t])
output_record['wind'].append(wind[t])
output_record['price'].append(price[t])
output_record['load'].append(load[t])
output_record['netload'].append(load[t] - pv[t] - wind[t])
output_record['soc'].append(soc[t].x)
output_record['battery_energy_change'].append(battery_energy_change[t].x)
output_record['grid_import'].append(grid_energy_import[t].x)
output_record['grid_export'].append(grid_energy_export[t].x)
output_record['gen1'].append(gen_output[0, t].x)
output_record['gen2'].append(gen_output[1, t].x)
output_record['gen3'].append(gen_output[2, t].x)
output_record['step_cost'].append(gen_cost + battery_cost + import_cost - export_cost + solar_cost + wind_cost)
# 记录动作便于辅助决策
results = []
for t in range(period):
action = [battery_energy_change[t].x / 100,
(gen_output[0, t].x - (gen_output[0, t - 1].x if t > 0 else 0)) / 100,
(gen_output[1, t].x - (gen_output[1, t - 1].x if t > 0 else 0)) / 100,
(gen_output[2, t].x - (gen_output[2, t - 1].x if t > 0 else 0)) / 200,
pv_voltage[t].x - (pv_voltage[t - 1].x if t > 0 else 0)]
results.append(action)
with open('./results.json', 'w') as f:
json.dump(results, f, indent=2)
output_record_df = pd.DataFrame.from_dict(output_record)
return output_record_df
class Arguments:
"""revise here for our own purpose"""
def __init__(self, agent=None, env=None):
self.agent = agent
self.env = env
self.cwd = None # current work directory, None means set automatically
self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
self.visible_gpu = '0,1,2,3' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
'''Arguments for training'''
self.num_episode = 1000
self.gamma = 0.995 # discount factor of future rewards
# self.reward_scale = 1 # an approximate target reward usually be closed to 256
self.learning_rate = 2 ** -14 # 2e-4
self.soft_update_tau = 2 ** -8 # 2 ** -8 ~= 5e-3
self.net_dim = 256 # the network width 256
self.batch_size = 4096 # num of transitions sampled from replay buffer.
self.repeat_times = 2 ** 3 # repeatedly update network to keep critic's loss small
self.target_step = 4096 # collect target_step experiences, then update network, 1024
self.max_memo = 500000 # capacity of replay buffer
self.if_per_or_gae = False # PER for off-policy sparse reward: Prioritized Experience Replay.
'''Arguments for evaluate'''
self.random_seed = 0 # initialize random seed in self.init_before_training()
# self.random_seed_list = [1234, 2234, 3234, 4234, 5234]
self.random_seed_list = [1234]
'''Arguments for save and plot issues'''
self.train = True
self.save_network = True
self.test_network = True
self.save_test_data = True
self.compare_with_gurobi = True
self.plot_on = True
def init_before_training(self, if_main):
if self.cwd is None:
agent_name = self.agent.__class__.__name__
self.cwd = f'./{agent_name}'
if if_main:
import shutil # remove history according to bool(if_remove)
if self.if_remove is None:
self.if_remove = bool(input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y')
elif self.if_remove:
shutil.rmtree(self.cwd, ignore_errors=True)
print(f"| Remove cwd: {self.cwd}")
os.makedirs(self.cwd, exist_ok=True)
np.random.seed(self.random_seed)
torch.manual_seed(self.random_seed)
torch.set_num_threads(self.num_threads)
torch.set_default_dtype(torch.float32)
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
def test_one_episode(env, act, device):
"""to get evaluate information, here record the unbalance of after taking action"""
record_state = []
record_action = []
record_reward = []
record_output = []
record_cost = []
record_unbalance = []
record_system_info = [] # [time,price,netload,action,real action,soc,output*4,unbalance(exchange+penalty),cost]
record_init_info = [] # include month,day,time,intial soc
env.TRAIN = False
state = env.reset()
record_init_info.append([env.month, env.day, env.current_time, env.battery.current_capacity])
print(f'current testing month is {env.month}, day is {env.day},initial_soc is {env.battery.current_capacity}')
for i in range(24):
s_tensor = torch.as_tensor((state,), device=device)
a_tensor = act(s_tensor)
action = a_tensor.detach().cpu().numpy()[0] # not need detach(), because with torch.no_grad() outside
real_action = action
state, next_state, reward, done = env.step(action)
record_system_info.append([state[0], state[1], state[3], action, real_action, env.battery.SOC(),
env.battery.energy_change, next_state[4], next_state[5], next_state[6],
next_state[7], next_state[8], env.unbalance, env.operation_cost])
record_state.append(state)
record_action.append(real_action)
record_reward.append(reward)
record_output.append(env.current_output)
record_unbalance.append(env.unbalance)
state = next_state
# add information of last step dg1, dh2, dg3, soc
record_system_info[-1][7:12] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2],
env.final_step_outputs[4], env.final_step_outputs[5]]
record_system_info[-1][5] = env.final_step_outputs[3]
record = {'init_info': record_init_info, 'system_info': record_system_info, 'state': record_state,
'action': record_action, 'reward': record_reward, 'cost': record_cost, 'unbalance': record_unbalance,
'record_output': record_output}
return record
def get_episode_return(env, act, device):
episode_return = 0.0 # sum of rewards in an episode
episode_unbalance = 0.0
state = env.reset()
for i in range(24):
s_tensor = torch.as_tensor((state,), device=device)
a_tensor = act(s_tensor)
action = a_tensor.detach().cpu().numpy()[0] # not need detach(), because with torch.no_grad() outside
state, next_state, reward, done, = env.step(action)
state = next_state
episode_return += reward
episode_unbalance += env.real_unbalance
if done:
break
return episode_return, episode_unbalance
class ReplayBuffer:
def __init__(self, max_len, state_dim, action_dim, gpu_id=0):
self.now_len = 0
self.next_idx = 0
self.if_full = False
self.max_len = max_len
self.data_type = torch.float32
self.action_dim = action_dim
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
other_dim = 1 + 1 + self.action_dim
self.buf_other = torch.empty(size=(max_len, other_dim), dtype=self.data_type, device=self.device)
if isinstance(state_dim, int): # state is pixel
self.buf_state = torch.empty((max_len, state_dim), dtype=torch.float32, device=self.device)
elif isinstance(state_dim, tuple):
self.buf_state = torch.empty((max_len, *state_dim), dtype=torch.uint8, device=self.device)
else:
raise ValueError('state_dim')
def extend_buffer(self, state, other): # CPU array to CPU array
size = len(other)
next_idx = self.next_idx + size
if next_idx > self.max_len:
self.buf_state[self.next_idx:self.max_len] = state[:self.max_len - self.next_idx]
self.buf_other[self.next_idx:self.max_len] = other[:self.max_len - self.next_idx]
self.if_full = True
next_idx = next_idx - self.max_len
self.buf_state[0:next_idx] = state[-next_idx:]
self.buf_other[0:next_idx] = other[-next_idx:]
else:
self.buf_state[self.next_idx:next_idx] = state
self.buf_other[self.next_idx:next_idx] = other
self.next_idx = next_idx
def sample_batch(self, batch_size) -> tuple:
indices = rd.randint(self.now_len - 1, size=batch_size)
r_m_a = self.buf_other[indices]
return (r_m_a[:, 0:1],
r_m_a[:, 1:2],
r_m_a[:, 2:],
self.buf_state[indices],
self.buf_state[indices + 1])
def update_now_len(self):
self.now_len = self.max_len if self.if_full else self.next_idx