building-agents/tools.py

import json
import os
import gurobipy as gp
import numpy as np
import numpy.random as rd
import pandas as pd
import torch
from gurobipy import GRB


def optimization_base_result(env, month, day, initial_soc):
    price = env.data_manager.get_series_price_data(month, day)
    load = env.data_manager.get_series_load_cons_data(month, day)
    temperature = env.data_manager.get_series_temperature_data(month, day)
    irradiance = env.data_manager.get_series_irradiance_data(month, day)
    wind_speed = env.data_manager.get_series_wind_data(month, day)

    period = env.episode_length
    DG_parameters = env.dg_parameters

    def get_dg_info(parameters):
        p_max = []
        p_min = []
        ramping_up = []
        ramping_down = []
        a_para = []
        b_para = []
        c_para = []
        for name, gen_info in parameters.items():
            p_max.append(gen_info['power_output_max'])
            p_min.append(gen_info['power_output_min'])
            ramping_up.append(gen_info['ramping_up'])
            ramping_down.append(gen_info['ramping_down'])
            a_para.append(gen_info['a'])
            b_para.append(gen_info['b'])
            c_para.append(gen_info['c'])
        return p_max, p_min, ramping_up, ramping_down, a_para, b_para, c_para

    p_max, p_min, ramping_up, ramping_down, a_para, b_para, c_para = get_dg_info(parameters=DG_parameters)
    NUM_GEN = len(DG_parameters.keys())
    battery_capacity = env.battery.capacity
    battery_efficiency = env.battery.efficiency
    solar_cofficient = env.solar.opex_cofficient
    wind_cofficient = env.wind.opex_cofficient
    battery_degradation = env.battery.degradation
    battery_holding = env.battery.holding

    m = gp.Model("UC")

    # 设置系统变量
    on_off = m.addVars(NUM_GEN, period, vtype=GRB.BINARY, name='on_off')
    gen_output = m.addVars(NUM_GEN, period, vtype=GRB.CONTINUOUS, name='output')
    # 设置充放电约束
    soc = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0.2, ub=0.8, name='SOC')
    battery_energy_change = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-env.battery.max_charge,
                                      ub=env.battery.max_charge, name='battery_action')
    # 设置外部电网与能源系统交换约束
    grid_energy_import = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=env.grid.exchange_ability, name='import')
    grid_energy_export = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=env.grid.exchange_ability, name='export')
    # 设置电压控制约束
    pv_voltage = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name='pv_voltage')

    # 计算光伏和风力发电量
    pv = [(0.2 * irradiance[t] + 0.05 * temperature[t] - 9.25) * (1 + pv_voltage[t]) for t in range(period)]
    wind = [172.265625 * wind_speed[t] ** 3 / 1e3 if 3 <= wind_speed[t] < 8
            else (172.265625 * 8 ** 3 / 1e3 if 8 <= wind_speed[t] < 12 else 0) for t in range(period)]

    # 1. 添加平衡约束
    m.addConstrs(((sum(gen_output[g, t] for g in range(NUM_GEN)) + pv[t] + wind[t] + grid_energy_import[t] >= load[t]
                   + battery_energy_change[t] + grid_energy_export[t]) for t in range(period)), name='powerbalance')
    # 2. 添加发电机最大/最小功率约束
    m.addConstrs((gen_output[g, t] <= on_off[g, t] * p_max[g] for g in range(NUM_GEN) for t in range(period)),
                 'gen_output_max')
    m.addConstrs((gen_output[g, t] >= on_off[g, t] * p_min[g] for g in range(NUM_GEN) for t in range(period)),
                 'gen_output_min')
    # 3. 添加上升和下降约束
    m.addConstrs((gen_output[g, t + 1] - gen_output[g, t] <= ramping_up[g] for g in range(NUM_GEN)
                  for t in range(period - 1)), 'ramping_up')
    m.addConstrs((gen_output[g, t] - gen_output[g, t + 1] <= ramping_down[g] for g in range(NUM_GEN)
                  for t in range(period - 1)), 'ramping_down')
    # 4. 添加电池容量约束
    m.addConstr(battery_capacity * soc[0] == battery_capacity * initial_soc +
                (battery_energy_change[0] * battery_efficiency), name='soc0')
    m.addConstrs((battery_capacity * soc[t] == battery_capacity * soc[t - 1] +
                  (battery_energy_change[t] * battery_efficiency) for t in range(1, period)), name='soc update')
    # 设置成本函数
    cost_gen = gp.quicksum(
        (a_para[g] * gen_output[g, t] * gen_output[g, t] + b_para[g] * gen_output[g, t] + c_para[g] * on_off[g, t])
        for t in range(period) for g in range(NUM_GEN))
    cost_battery = gp.quicksum(
        battery_energy_change[t] * battery_degradation + soc[t] * battery_holding for t in range(period))
    cost_import = gp.quicksum(grid_energy_import[t] * price[t] for t in range(period))
    cost_export = gp.quicksum(grid_energy_export[t] * price[t] * env.sell_coefficient for t in range(period))
    cost_solar = gp.quicksum(pv[t] * solar_cofficient for t in range(period))
    cost_wind = gp.quicksum(wind[t] * wind_cofficient for t in range(period))

    m.setObjective((cost_gen + cost_battery + cost_import - cost_export + cost_solar + cost_wind), GRB.MINIMIZE)
    m.optimize()

    # 记录数据便于绘图
    pv = [(0.2 * irradiance[t] + 0.05 * temperature[t] - 9.25) * (1 + pv_voltage[t].x) for t in range(period)]
    output_record = {'pv': [], 'wind': [], 'price': [], 'load': [], 'netload': [],
                     'soc': [], 'battery_energy_change': [], 'grid_import': [], 'grid_export': [],
                     'gen1': [], 'gen2': [], 'gen3': [], 'step_cost': []}
    for t in range(period):
        gen_cost = sum((on_off[g, t].x * (
                a_para[g] * gen_output[g, t].x * gen_output[g, t].x + b_para[g] * gen_output[g, t].x + c_para[g]))
                       for g in range(NUM_GEN))
        battery_cost = battery_energy_change[t].x * battery_degradation + soc[t].x * battery_holding
        import_cost = grid_energy_import[t].x * price[t]
        export_cost = grid_energy_export[t].x * price[t] * env.sell_coefficient
        solar_cost = pv[t] * solar_cofficient
        wind_cost = wind[t] * wind_cofficient
        output_record['pv'].append(pv[t])
        output_record['wind'].append(wind[t])
        output_record['price'].append(price[t])
        output_record['load'].append(load[t])
        output_record['netload'].append(load[t] - pv[t] - wind[t])
        output_record['soc'].append(soc[t].x)
        output_record['battery_energy_change'].append(battery_energy_change[t].x)
        output_record['grid_import'].append(grid_energy_import[t].x)
        output_record['grid_export'].append(grid_energy_export[t].x)
        output_record['gen1'].append(gen_output[0, t].x)
        output_record['gen2'].append(gen_output[1, t].x)
        output_record['gen3'].append(gen_output[2, t].x)
        output_record['step_cost'].append(gen_cost + battery_cost + import_cost - export_cost + solar_cost + wind_cost)

    # 记录动作便于辅助决策
    results = []
    for t in range(period):
        action = [battery_energy_change[t].x / 100,
                  (gen_output[0, t].x - (gen_output[0, t - 1].x if t > 0 else 0)) / 100,
                  (gen_output[1, t].x - (gen_output[1, t - 1].x if t > 0 else 0)) / 100,
                  (gen_output[2, t].x - (gen_output[2, t - 1].x if t > 0 else 0)) / 200,
                  pv_voltage[t].x - (pv_voltage[t - 1].x if t > 0 else 0)]
        results.append(action)
    with open('./results.json', 'w') as f:
        json.dump(results, f, indent=2)

    output_record_df = pd.DataFrame.from_dict(output_record)
    return output_record_df


class Arguments:
    """revise here for our own purpose"""

    def __init__(self, agent=None, env=None):
        self.agent = agent
        self.env = env
        self.cwd = None  # current work directory, None means set automatically
        self.if_remove = False  # remove the cwd folder? (True, False, None:ask me)
        self.visible_gpu = '0,1,2,3'  # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
        self.num_threads = 32  # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)

        '''Arguments for training'''
        self.num_episode = 1000
        self.gamma = 0.995  # discount factor of future rewards
        # self.reward_scale = 1  # an approximate target reward usually be closed to 256
        self.learning_rate = 2 ** -14  # 2e-4
        self.soft_update_tau = 2 ** -8  # 2 ** -8 ~= 5e-3
        self.net_dim = 256  # the network width 256
        self.batch_size = 4096  # num of transitions sampled from replay buffer.
        self.repeat_times = 2 ** 3  # repeatedly update network to keep critic's loss small
        self.target_step = 4096  # collect target_step experiences, then update network, 1024
        self.max_memo = 500000  # capacity of replay buffer
        self.if_per_or_gae = False  # PER for off-policy sparse reward: Prioritized Experience Replay.

        '''Arguments for evaluate'''
        self.random_seed = 0  # initialize random seed in self.init_before_training()
        # self.random_seed_list = [1234, 2234, 3234, 4234, 5234]
        self.random_seed_list = [1234]
        '''Arguments for save and plot issues'''
        self.train = True
        self.save_network = True
        self.test_network = True
        self.save_test_data = True
        self.compare_with_gurobi = True
        self.plot_on = True

    def init_before_training(self, if_main):
        if self.cwd is None:
            agent_name = self.agent.__class__.__name__
            self.cwd = f'./{agent_name}'

        if if_main:
            import shutil  # remove history according to bool(if_remove)
            if self.if_remove is None:
                self.if_remove = bool(input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y')
            elif self.if_remove:
                shutil.rmtree(self.cwd, ignore_errors=True)
                print(f"| Remove cwd: {self.cwd}")
            os.makedirs(self.cwd, exist_ok=True)

        np.random.seed(self.random_seed)
        torch.manual_seed(self.random_seed)
        torch.set_num_threads(self.num_threads)
        torch.set_default_dtype(torch.float32)

        os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)


def test_one_episode(env, act, device):
    """to get evaluate information, here record the unbalance of after taking action"""
    record_state = []
    record_action = []
    record_reward = []
    record_output = []
    record_cost = []
    record_unbalance = []
    record_system_info = []  # [time,price,netload,action,real action,soc,output*4,unbalance(exchange+penalty),cost]
    record_init_info = []  # include month,day,time,intial soc
    env.TRAIN = False
    state = env.reset()
    record_init_info.append([env.month, env.day, env.current_time, env.battery.current_capacity])
    print(f'current testing month is {env.month}, day is {env.day},initial_soc is {env.battery.current_capacity}')
    for i in range(24):
        s_tensor = torch.as_tensor((state,), device=device)
        a_tensor = act(s_tensor)
        action = a_tensor.detach().cpu().numpy()[0]  # not need detach(), because with torch.no_grad() outside
        real_action = action
        state, next_state, reward, done = env.step(action)

        record_system_info.append([state[0], state[1], state[3], action, real_action, env.battery.SOC(),
                                   env.battery.energy_change, next_state[4], next_state[5], next_state[6],
                                   next_state[7], next_state[8], env.unbalance, env.operation_cost])
        record_state.append(state)
        record_action.append(real_action)
        record_reward.append(reward)
        record_output.append(env.current_output)
        record_unbalance.append(env.unbalance)
        state = next_state
    # add information of last step dg1, dh2, dg3, soc
    record_system_info[-1][7:12] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2],
                                    env.final_step_outputs[4], env.final_step_outputs[5]]
    record_system_info[-1][5] = env.final_step_outputs[3]
    record = {'init_info': record_init_info, 'system_info': record_system_info, 'state': record_state,
              'action': record_action, 'reward': record_reward, 'cost': record_cost, 'unbalance': record_unbalance,
              'record_output': record_output}
    return record


def get_episode_return(env, act, device):
    episode_return = 0.0  # sum of rewards in an episode
    episode_unbalance = 0.0
    state = env.reset()
    for i in range(24):
        s_tensor = torch.as_tensor((state,), device=device)
        a_tensor = act(s_tensor)
        action = a_tensor.detach().cpu().numpy()[0]  # not need detach(), because with torch.no_grad() outside
        state, next_state, reward, done, = env.step(action)
        state = next_state
        episode_return += reward
        episode_unbalance += env.real_unbalance
        if done:
            break
    return episode_return, episode_unbalance


class ReplayBuffer:
    def __init__(self, max_len, state_dim, action_dim, gpu_id=0):
        self.now_len = 0
        self.next_idx = 0
        self.if_full = False
        self.max_len = max_len
        self.data_type = torch.float32
        self.action_dim = action_dim
        self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")

        other_dim = 1 + 1 + self.action_dim
        self.buf_other = torch.empty(size=(max_len, other_dim), dtype=self.data_type, device=self.device)

        if isinstance(state_dim, int):  # state is pixel
            self.buf_state = torch.empty((max_len, state_dim), dtype=torch.float32, device=self.device)
        elif isinstance(state_dim, tuple):
            self.buf_state = torch.empty((max_len, *state_dim), dtype=torch.uint8, device=self.device)
        else:
            raise ValueError('state_dim')

    def extend_buffer(self, state, other):  # CPU array to CPU array
        size = len(other)
        next_idx = self.next_idx + size

        if next_idx > self.max_len:
            self.buf_state[self.next_idx:self.max_len] = state[:self.max_len - self.next_idx]
            self.buf_other[self.next_idx:self.max_len] = other[:self.max_len - self.next_idx]
            self.if_full = True

            next_idx = next_idx - self.max_len
            self.buf_state[0:next_idx] = state[-next_idx:]
            self.buf_other[0:next_idx] = other[-next_idx:]
        else:
            self.buf_state[self.next_idx:next_idx] = state
            self.buf_other[self.next_idx:next_idx] = other
        self.next_idx = next_idx

    def sample_batch(self, batch_size) -> tuple:
        indices = rd.randint(self.now_len - 1, size=batch_size)
        r_m_a = self.buf_other[indices]
        return (r_m_a[:, 0:1],
                r_m_a[:, 1:2],
                r_m_a[:, 2:],
                self.buf_state[indices],
                self.buf_state[indices + 1])

    def update_now_len(self):
        self.now_len = self.max_len if self.if_full else self.next_idx