From 80344bd10a9284f49e433486a5bc2d4db0aef18f Mon Sep 17 00:00:00 2001
From: chenxiaodong <chenxiaodong@qibebt.ac.cn>
Date: Tue, 25 Jun 2024 11:03:07 +0800
Subject: [PATCH] llm

---
 tools备份.py | 285 ---------------------------------------------------
 1 file changed, 285 deletions(-)
 delete mode 100644 tools备份.py

diff --git a/tools备份.py b/tools备份.py
deleted file mode 100644
index 99ed610..0000000
--- a/tools备份.py
+++ /dev/null
@@ -1,285 +0,0 @@
-import os
-import gurobipy as gp
-import numpy as np
-import numpy.random as rd
-import pandas as pd
-import torch
-from gurobipy import GRB
-
-
-def optimization_base_result(env, month, day, initial_soc):
-    price = env.data_manager.get_series_price_data(month, day)
-    load = env.data_manager.get_series_load_cons_data(month, day)
-    temperature = env.data_manager.get_series_temperature_data(month, day)
-    irradiance = env.data_manager.get_series_irradiance_data(month, day)
-    wind_speed = env.data_manager.get_series_wind_data(month, day)
-
-    pv = [env.solar.step(temp, irr) for temp, irr in zip(temperature, irradiance)]
-    wind = [env.wind.step(ws) for ws in wind_speed]
-
-    period = env.episode_length
-    DG_parameters = env.dg_parameters
-
-    def get_dg_info(parameters):
-        p_max = []
-        p_min = []
-        ramping_up = []
-        ramping_down = []
-        a_para = []
-        b_para = []
-        c_para = []
-
-        for name, gen_info in parameters.items():
-            p_max.append(gen_info['power_output_max'])
-            p_min.append(gen_info['power_output_min'])
-            ramping_up.append(gen_info['ramping_up'])
-            ramping_down.append(gen_info['ramping_down'])
-            a_para.append(gen_info['a'])
-            b_para.append(gen_info['b'])
-            c_para.append(gen_info['c'])
-        return p_max, p_min, ramping_up, ramping_down, a_para, b_para, c_para
-
-    p_max, p_min, ramping_up, ramping_down, a_para, b_para, c_para = get_dg_info(parameters=DG_parameters)
-    NUM_GEN = len(DG_parameters.keys())
-    battery_capacity = env.battery.capacity
-    battery_efficiency = env.battery.efficiency
-
-    m = gp.Model("UC")
-    m.setParam('OutputFlag', 1)
-
-    # 设置系统变量
-    on_off = m.addVars(NUM_GEN, period, vtype=GRB.BINARY, name='on_off')
-    gen_output = m.addVars(NUM_GEN, period, vtype=GRB.CONTINUOUS, name='output')
-    # 设置充放电约束
-    battery_energy_change = m.addVars(period, vtype=GRB.CONTINUOUS, lb=env.battery.max_discharge,
-                                      ub=env.battery.max_charge, name='battery_action')
-    # 设置外部电网与能源系统交换约束
-    grid_energy_import = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=env.grid.exchange_ability, name='import')
-    grid_energy_export = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=env.grid.exchange_ability, name='export')
-    soc = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0.2, ub=0.8, name='SOC')
-
-    # 1. 添加平衡约束
-    m.addConstrs(((sum(gen_output[g, t] for g in range(NUM_GEN)) + pv[t] + wind[t] + grid_energy_import[t] >= load[t] +
-                   battery_energy_change[t] + grid_energy_export[t]) for t in range(period)), name='powerbalance')
-    # 2. 添加发电机最大/最小功率约束
-    m.addConstrs((gen_output[g, t] <= on_off[g, t] * p_max[g] for g in range(NUM_GEN) for t in range(period)),
-                 'gen_output_max')
-    m.addConstrs((gen_output[g, t] >= on_off[g, t] * p_min[g] for g in range(NUM_GEN) for t in range(period)),
-                 'gen_output_min')
-    # 3. 添加上升和下降约束
-    m.addConstrs((gen_output[g, t + 1] - gen_output[g, t] <= ramping_up[g] for g in range(NUM_GEN)
-                  for t in range(period - 1)), 'ramping_up')
-    m.addConstrs((gen_output[g, t] - gen_output[g, t + 1] <= ramping_down[g] for g in range(NUM_GEN)
-                  for t in range(period - 1)), 'ramping_down')
-    # 4. 添加电池容量约束
-    m.addConstr(battery_capacity * soc[0] == battery_capacity * initial_soc +
-                (battery_energy_change[0] * battery_efficiency), name='soc0')
-    m.addConstrs((battery_capacity * soc[t] == battery_capacity * soc[t - 1] +
-                  (battery_energy_change[t] * battery_efficiency) for t in range(1, period)), name='soc update')
-    # 设置成本函数
-    # 发电机成本
-    cost_gen = gp.quicksum(
-        (a_para[g] * gen_output[g, t] * gen_output[g, t] + b_para[g] * gen_output[g, t] + c_para[g] * on_off[g, t]) for
-        t in range(period) for g in range(NUM_GEN))
-    cost_grid_import = gp.quicksum(grid_energy_import[t] * price[t] for t in range(period))
-    cost_grid_export = gp.quicksum(grid_energy_export[t] * price[t] * env.sell_coefficient for t in range(period))
-
-    m.setObjective((cost_gen + cost_grid_import - cost_grid_export), GRB.MINIMIZE)
-    m.optimize()
-
-    m.computeIIS()
-
-    output_record = {'pv': [], 'wind': [], 'price': [], 'load': [],
-                     'netload': [], 'soc': [], 'battery_energy_change': [], 'grid_import': [], 'grid_export': [],
-                     'gen1': [], 'gen2': [], 'gen3': [], 'step_cost': []}
-    for t in range(period):
-        gen_cost = sum((on_off[g, t].x * (
-                    a_para[g] * gen_output[g, t].x * gen_output[g, t].x + b_para[g] * gen_output[g, t].x + c_para[g]))
-                       for g in range(NUM_GEN))
-        grid_import_cost = grid_energy_import[t].x * price[t]
-        grid_export_cost = grid_energy_export[t].x * price[t] * env.sell_coefficient
-        output_record['pv'].append(pv[t])
-        # output_record['temperature'].append(temperature[t])
-        # output_record['irradiance'].append(irradiance[t])
-        output_record['wind'].append(wind[t])
-        output_record['price'].append(price[t])
-        output_record['load'].append(load[t])
-        output_record['netload'].append(load[t] - pv[t])
-        output_record['soc'].append(soc[t].x)
-        output_record['battery_energy_change'].append(battery_energy_change[t].x)
-        output_record['grid_import'].append(grid_energy_import[t].x)
-        output_record['grid_export'].append(grid_energy_export[t].x)
-        output_record['gen1'].append(gen_output[0, t].x)
-        output_record['gen2'].append(gen_output[1, t].x)
-        output_record['gen3'].append(gen_output[2, t].x)
-        output_record['step_cost'].append(gen_cost + grid_import_cost - grid_export_cost)
-    output_record_df = pd.DataFrame.from_dict(output_record)
-    return output_record_df
-
-
-class Arguments:
-    """revise here for our own purpose"""
-
-    def __init__(self, agent=None, env=None):
-        self.agent = agent  # Deep Reinforcement Learning algorithm
-        self.env = env  # the environment for training
-        # self.plot_shadow_on = False  # control do we need to plot all shadow figures
-        self.cwd = None  # current work directory. None means set automatically
-        self.if_remove = False  # remove the cwd folder? (True, False, None:ask me)
-        self.visible_gpu = '0,1,2,3'  # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
-        # self.worker_num = 2  # rollout workers number pre GPU (adjust it to get high GPU usage)
-        self.num_threads = 32  # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
-
-        '''Arguments for training'''
-        self.num_episode = 1000
-        self.gamma = 0.995  # discount factor of future rewards
-        # self.reward_scale = 1  # an approximate target reward usually be closed to 256
-        self.learning_rate = 2 ** -14  # 2 ** -14 ~= 6e-5
-        self.soft_update_tau = 2 ** -8  # 2 ** -8 ~= 5e-3
-        self.net_dim = 256  # the network width 256
-        self.batch_size = 4096  # num of transitions sampled from replay buffer.
-        self.repeat_times = 2 ** 5  # repeatedly update network to keep critic's loss small
-        self.target_step = 4096  # collect target_step experiences, then update network, 1024
-        self.max_memo = 500000  # capacity of replay buffer
-        self.if_per_or_gae = False  # PER for off-policy sparse reward: Prioritized Experience Replay.
-
-        '''Arguments for evaluate'''
-        # self.eval_gap = 2 ** 6  # evaluate the agent per eval_gap seconds
-        # self.eval_times = 2  # number of times that get episode return in first
-        self.random_seed = 0  # initialize random seed in self.init_before_training()
-        # self.random_seed_list = [1234, 2234, 3234, 4234, 5234]
-        self.random_seed_list = [1234]
-        '''Arguments for save and plot issues'''
-        self.train = True
-        self.save_network = True
-        self.test_network = True
-        self.save_test_data = True
-        self.compare_with_gurobi = True
-        self.plot_on = True
-
-    def init_before_training(self, if_main):
-        if self.cwd is None:
-            agent_name = self.agent.__class__.__name__
-            self.cwd = f'./{agent_name}'
-
-        if if_main:
-            import shutil  # remove history according to bool(if_remove)
-            if self.if_remove is None:
-                self.if_remove = bool(input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y')
-            elif self.if_remove:
-                shutil.rmtree(self.cwd, ignore_errors=True)
-                print(f"| Remove cwd: {self.cwd}")
-            os.makedirs(self.cwd, exist_ok=True)
-
-        np.random.seed(self.random_seed)
-        torch.manual_seed(self.random_seed)
-        torch.set_num_threads(self.num_threads)
-        torch.set_default_dtype(torch.float32)
-
-        os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)  # control how many GPU is used 　
-
-
-def test_one_episode(env, act, device):
-    """to get evaluate information, here record the unbalance of after taking action"""
-    record_state = []
-    record_action = []
-    record_reward = []
-    record_output = []
-    record_cost = []
-    record_unbalance = []
-    record_system_info = []  # [time price,netload,action,real action, output*4,soc,unbalance(exchange+penalty)]
-    record_init_info = []  # include month,day,time,intial soc
-    env.TRAIN = False
-    state = env.reset()
-    record_init_info.append([env.month, env.day, env.current_time, env.battery.current_capacity])
-    print(f'current testing month is {env.month}, day is {env.day},initial_soc is {env.battery.current_capacity}')
-    for i in range(24):
-        s_tensor = torch.as_tensor((state,), device=device)
-        a_tensor = act(s_tensor)
-        action = a_tensor.detach().cpu().numpy()[0]  # not need detach(), because with torch.no_grad() outside
-        real_action = action
-        state, next_state, reward, done = env.step(action)
-
-        record_system_info.append([state[0], state[1], state[3], action, real_action, env.battery.SOC(),
-                                   env.battery.energy_change, next_state[4], next_state[5], next_state[6],
-                                   next_state[7], next_state[8], env.unbalance, env.operation_cost])
-        record_state.append(state)
-        record_action.append(real_action)
-        record_reward.append(reward)
-        record_output.append(env.current_output)
-        record_unbalance.append(env.unbalance)
-        state = next_state
-    # add information of last step dg1, dh2, dg3, soc
-    record_system_info[-1][7:10] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2]]
-    record_system_info[-1][5] = env.final_step_outputs[3]
-    record = {'init_info': record_init_info, 'system_info': record_system_info, 'state': record_state,
-              'action': record_action, 'reward': record_reward, 'cost': record_cost, 'unbalance': record_unbalance,
-              'record_output': record_output}
-    return record
-
-
-def get_episode_return(env, act, device):
-    episode_return = 0.0  # sum of rewards in an episode
-    episode_unbalance = 0.0
-    state = env.reset()
-    for i in range(24):
-        s_tensor = torch.as_tensor((state,), device=device)
-        a_tensor = act(s_tensor)
-        action = a_tensor.detach().cpu().numpy()[0]  # not need detach(), because with torch.no_grad() outside
-        state, next_state, reward, done, = env.step(action)
-        state = next_state
-        episode_return += reward
-        episode_unbalance += env.real_unbalance
-        if done:
-            break
-    return episode_return, episode_unbalance
-
-
-class ReplayBuffer:
-    def __init__(self, max_len, state_dim, action_dim, gpu_id=0):
-        self.now_len = 0
-        self.next_idx = 0
-        self.if_full = False
-        self.max_len = max_len
-        self.data_type = torch.float32
-        self.action_dim = action_dim
-        self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
-
-        other_dim = 1 + 1 + self.action_dim
-        self.buf_other = torch.empty(size=(max_len, other_dim), dtype=self.data_type, device=self.device)
-
-        if isinstance(state_dim, int):  # state is pixel
-            self.buf_state = torch.empty((max_len, state_dim), dtype=torch.float32, device=self.device)
-        elif isinstance(state_dim, tuple):
-            self.buf_state = torch.empty((max_len, *state_dim), dtype=torch.uint8, device=self.device)
-        else:
-            raise ValueError('state_dim')
-
-    def extend_buffer(self, state, other):  # CPU array to CPU array
-        size = len(other)
-        next_idx = self.next_idx + size
-
-        if next_idx > self.max_len:
-            self.buf_state[self.next_idx:self.max_len] = state[:self.max_len - self.next_idx]
-            self.buf_other[self.next_idx:self.max_len] = other[:self.max_len - self.next_idx]
-            self.if_full = True
-
-            next_idx = next_idx - self.max_len
-            self.buf_state[0:next_idx] = state[-next_idx:]
-            self.buf_other[0:next_idx] = other[-next_idx:]
-        else:
-            self.buf_state[self.next_idx:next_idx] = state
-            self.buf_other[self.next_idx:next_idx] = other
-        self.next_idx = next_idx
-
-    def sample_batch(self, batch_size) -> tuple:
-        indices = rd.randint(self.now_len - 1, size=batch_size)
-        r_m_a = self.buf_other[indices]
-        return (r_m_a[:, 0:1],
-                r_m_a[:, 1:2],
-                r_m_a[:, 2:],
-                self.buf_state[indices],
-                self.buf_state[indices + 1])
-
-    def update_now_len(self):
-        self.now_len = self.max_len if self.if_full else self.next_idx