论文提交

2024-11-22 10:03:31 +08:00 · 2024-11-22 10:03:31 +08:00 · 83b1abbf76
commit 83b1abbf76
129 changed files with 359057 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
 __pycache__/
 .idea/
 gurobi.log
--- a/AgentDDPG/DRL_10_plots/gurobi.svg
+++ b/AgentDDPG/DRL_10_plots/gurobi.svg
--- a/AgentDDPG/DRL_10_plots/rl.svg
+++ b/AgentDDPG/DRL_10_plots/rl.svg
--- a/AgentDDPG/actor.pth
+++ b/AgentDDPG/actor.pth
--- a/AgentDDPG/loss_data.pkl
+++ b/AgentDDPG/loss_data.pkl
--- a/AgentDDPG/reward_data.pkl
+++ b/AgentDDPG/reward_data.pkl
--- a/AgentDDPG/test_data.pkl
+++ b/AgentDDPG/test_data.pkl
--- a/AgentPPO/DRL_10_plots/gurobi.svg
+++ b/AgentPPO/DRL_10_plots/gurobi.svg
--- a/AgentPPO/DRL_10_plots/rl.svg
+++ b/AgentPPO/DRL_10_plots/rl.svg
--- a/AgentPPO/DRL_llm_1015_plots/energy.png
+++ b/AgentPPO/DRL_llm_1015_plots/energy.png
--- a/AgentPPO/DRL_llm_1015_plots/gurobi.svg
+++ b/AgentPPO/DRL_llm_1015_plots/gurobi.svg
--- a/AgentPPO/DRL_llm_1015_plots/melt.png
+++ b/AgentPPO/DRL_llm_1015_plots/melt.png
--- a/AgentPPO/DRL_llm_1015_plots/rl.svg
+++ b/AgentPPO/DRL_llm_1015_plots/rl.svg
--- a/AgentPPO/DRL_llm_1015_plots/soc.png
+++ b/AgentPPO/DRL_llm_1015_plots/soc.png
--- a/AgentPPO/DRL_ram_plots/gurobi.svg
+++ b/AgentPPO/DRL_ram_plots/gurobi.svg
--- a/AgentPPO/actor_10.pth
+++ b/AgentPPO/actor_10.pth
--- a/AgentPPO/actor_llm_1015.pth
+++ b/AgentPPO/actor_llm_1015.pth
--- a/AgentPPO/actor_ram.pth
+++ b/AgentPPO/actor_ram.pth
--- a/AgentPPO/loss_10.pkl
+++ b/AgentPPO/loss_10.pkl
--- a/AgentPPO/loss_llm_1015.pkl
+++ b/AgentPPO/loss_llm_1015.pkl
--- a/AgentPPO/loss_ram.pkl
+++ b/AgentPPO/loss_ram.pkl
--- a/AgentPPO/melt_llm.pkl
+++ b/AgentPPO/melt_llm.pkl
--- a/AgentPPO/melt_lmppo.pkl
+++ b/AgentPPO/melt_lmppo.pkl
--- a/AgentPPO/melt_milp.pkl
+++ b/AgentPPO/melt_milp.pkl
--- a/AgentPPO/melt_ppo.pkl
+++ b/AgentPPO/melt_ppo.pkl
--- a/AgentPPO/melt_ram.pkl
+++ b/AgentPPO/melt_ram.pkl
--- a/AgentPPO/reward_10.pkl
+++ b/AgentPPO/reward_10.pkl
--- a/AgentPPO/reward_llm_1015.pkl
+++ b/AgentPPO/reward_llm_1015.pkl
--- a/AgentPPO/reward_ram.pkl
+++ b/AgentPPO/reward_ram.pkl
--- a/AgentPPO/test_10.pkl
+++ b/AgentPPO/test_10.pkl
--- a/AgentPPO/test_llm_1015.pkl
+++ b/AgentPPO/test_llm_1015.pkl
--- a/AgentPPO/test_ram.pkl
+++ b/AgentPPO/test_ram.pkl
--- a/AgentSAC/DRL_10_plots/gurobi.svg
+++ b/AgentSAC/DRL_10_plots/gurobi.svg
--- a/AgentSAC/DRL_10_plots/rl.svg
+++ b/AgentSAC/DRL_10_plots/rl.svg
--- a/AgentSAC/actor.pth
+++ b/AgentSAC/actor.pth
--- a/AgentSAC/loss_data.pkl
+++ b/AgentSAC/loss_data.pkl
--- a/AgentSAC/reward_data.pkl
+++ b/AgentSAC/reward_data.pkl
--- a/AgentSAC/test_data.pkl
+++ b/AgentSAC/test_data.pkl
--- a/AgentTD3/DRL_10_plots/gurobi.svg
+++ b/AgentTD3/DRL_10_plots/gurobi.svg
--- a/AgentTD3/DRL_10_plots/rl.svg
+++ b/AgentTD3/DRL_10_plots/rl.svg
--- a/AgentTD3/actor.pth
+++ b/AgentTD3/actor.pth
--- a/AgentTD3/loss_data.pkl
+++ b/AgentTD3/loss_data.pkl
--- a/AgentTD3/reward_data.pkl
+++ b/AgentTD3/reward_data.pkl
--- a/AgentTD3/test_data.pkl
+++ b/AgentTD3/test_data.pkl
--- a/DDPG.py
+++ b/DDPG.py
@ -0,0 +1,127 @@
 import pickle
 import pandas as pd
 import torch
 from agent import AgentDDPG
 from environment import ESSEnv
 from tools import *
 def update_buffer(_trajectory):
    ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
    ary_other = torch.as_tensor([item[1] for item in _trajectory])
    ary_other[:, 0] = ary_other[:, 0]  # ten_reward
    ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma  # ten_mask = (1.0 - ary_done) * gamma
    buffer.extend_buffer(ten_state, ary_other)
    _steps = ten_state.shape[0]
    _r_exp = ary_other[:, 0].mean()  # other = (reward, mask, action)
    return _steps, _r_exp
 if __name__ == '__main__':
    args = Arguments()
    '''record real unbalance'''
    reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
    loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
    args.visible_gpu = '0'
    for seed in args.random_seed_list:
        args.random_seed = seed
        args.agent = AgentDDPG()
        agent_name = f'{args.agent.__class__.__name__}'
        args.agent.cri_target = True
        args.env = ESSEnv()
        args.init_before_training(if_main=True)
        '''init agent and environment'''
        agent = args.agent
        env = args.env
        agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
                   args.if_per_or_gae)
        '''init replay buffer'''
        buffer = ReplayBuffer(max_len=args.max_memo, state_dim=env.state_space.shape[0],
                              action_dim=env.action_space.shape[0])
        '''start training'''
        cwd = args.cwd
        gamma = args.gamma
        batch_size = args.batch_size
        target_step = args.target_step  # how manysteps of one episode should stop
        repeat_times = args.repeat_times  # how many times should update for one batch size data
        soft_update_tau = args.soft_update_tau
        num_episode = args.num_episode
        agent.state = env.reset()
        '''collect data, train and update network'''
        # args.train = False
        # args.save_network = False
        # args.test_network = False
        # args.save_test_data = False
        # args.compare_with_gurobi = False
        if args.train:
            collect_data = True
            while collect_data:
                print(f'buffer:{buffer.now_len}')
                with torch.no_grad():
                    trajectory = agent.explore_env(env, target_step)
                    steps, r_exp = update_buffer(trajectory)
                    buffer.update_now_len()
                if buffer.now_len >= 10000:
                    collect_data = False
            for i_episode in range(num_episode):
                critic_loss, actor_loss = agent.update_net(buffer, batch_size, repeat_times, soft_update_tau)
                loss_record['critic_loss'].append(critic_loss)
                loss_record['actor_loss'].append(actor_loss)
                with torch.no_grad():
                    episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
                    reward_record['mean_episode_reward'].append(episode_reward)
                    reward_record['unbalance'].append(episode_unbalance)
                print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}, buffer_length: {buffer.now_len}')
                if i_episode % 10 == 0:  # target_step
                    with torch.no_grad():
                        trajectory = agent.explore_env(env, target_step)
                        steps, r_exp = update_buffer(trajectory)
    loss_record_path = f'{args.cwd}/loss_data.pkl'
    reward_record_path = f'{args.cwd}/reward_data.pkl'
    act_save_path = f'{args.cwd}/actor.pth'
    if args.save_network:
        with open(loss_record_path, 'wb') as tf:
            pickle.dump(loss_record, tf)
        with open(reward_record_path, 'wb') as tf:
            pickle.dump(reward_record, tf)
        torch.save(agent.act.state_dict(), act_save_path)
        print('actor parameters have been saved')
    if args.test_network:
        args.cwd = agent_name
        agent.act.load_state_dict(torch.load(act_save_path))
        print('parameters have been reload and test')
        record = test_one_episode(env, agent.act, agent.device)
        eval_data = pd.DataFrame(record['system_info'])
        eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery',
                             'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
    if args.save_test_data:
        test_data_save_path = f'{args.cwd}/test_data.pkl'
        with open(test_data_save_path, 'wb') as tf:
            pickle.dump(record, tf)
    '''compare with gurobi data and results'''
    if args.compare_with_gurobi:
        month = record['init_info'][0][0]
        day = record['init_info'][0][1]
        initial_soc = record['init_info'][0][3]
        base_result = optimization_base_result(env, month, day, initial_soc)
    if args.plot_on:
        from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
        plot_args = PlotArgs()
        plot_args.feature_change = '10'
        args.cwd = agent_name
        plot_dir = make_dir(args.cwd, plot_args.feature_change)
        plot_optimization_result(base_result, plot_dir)
        plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
    '''compare the different cost get from gurobi and DDPG'''
    ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
    print('rl_cost:', sum(eval_data['operation_cost']))
    print('gurobi_cost:', sum(base_result['step_cost']))
    print('ration:', ration)
--- a/PPO.py
+++ b/PPO.py
@ -0,0 +1,374 @@
 import os
 import pickle
 from copy import deepcopy
 import numpy as np
 import pandas as pd
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from environment import ESSEnv
 from tools import get_episode_return, test_one_episode, optimization_base_result
 class ActorPPO(nn.Module):
    def __init__(self, mid_dim, state_dim, action_dim):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
                                 nn.Linear(mid_dim, mid_dim), nn.ReLU(),
                                 nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
                                 nn.Linear(mid_dim, action_dim))
        # the logarithm (log) of standard deviation (std) of action, it is a trainable parameter
        self.a_logstd = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True)
        self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi))
    def forward(self, state):
        return self.net(state).tanh()  # action.tanh() limit the data output of action
    def get_action(self, state):
        a_avg = self.forward(state)  # too big for the action
        a_std = self.a_logstd.exp()
        noise = torch.randn_like(a_avg)
        action = a_avg + noise * a_std
        return action, noise
    def get_logprob_entropy(self, state, action):
        a_avg = self.forward(state)
        a_std = self.a_logstd.exp()
        delta = ((a_avg - action) / a_std).pow(2) * 0.5
        logprob = -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1)  # new_logprob
        dist_entropy = (logprob.exp() * logprob).mean()  # policy entropy
        return logprob, dist_entropy
    def get_old_logprob(self, _action, noise):  # noise = action - a_noise
        delta = noise.pow(2) * 0.5
        return -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1)  # old_logprob
 class CriticAdv(nn.Module):
    def __init__(self, mid_dim, state_dim, _action_dim):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
                                 nn.Linear(mid_dim, mid_dim), nn.ReLU(),
                                 nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
                                 nn.Linear(mid_dim, 1))
    def forward(self, state):
        return self.net(state)  # Advantage value
 def smooth_rewards(rewards, window=10):
    rewards = rewards.unsqueeze(0).unsqueeze(0)  # 将 rewards 转为 [1, 1, len] 的形状以适应 conv1d
    kernel = torch.ones(1, 1, window, device=rewards.device) / window  # 创建一个均匀的滑动平均核
    smoothed_rewards = F.conv1d(rewards, kernel, padding='valid')  # 滑动平均
    smoothed_rewards = smoothed_rewards.squeeze(0).squeeze(0)  # 去掉多余的维度
    # 保持与原始奖励序列相同的长度，将前 window-1 个奖励保持不变
    return torch.cat((rewards[0, 0, :window-1], smoothed_rewards))
 class AgentPPO:
    def __init__(self):
        super().__init__()
        self.state = None
        self.device = None
        self.action_dim = None
        self.get_obj_critic = None
        self.criterion = torch.nn.SmoothL1Loss()
        self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
        self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
        '''init modify'''
        self.ClassCri = CriticAdv
        self.ClassAct = ActorPPO
        self.ratio_clip = 0.2  # ratio.clamp(1 - clip, 1 + clip)
        self.lambda_entropy = 0.02  # could be 0.01~0.05
        self.lambda_gae_adv = 0.98  # could be 0.95~0.99, (Generalized Advantage Estimation. ICLR.2016.)
        self.get_reward_sum = None  # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
        self.trajectory_list = None
    def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=True, gpu_id=0):
        self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
        self.trajectory_list = list()
        # choose whether to use gae or not
        self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
        self.cri = self.ClassCri(net_dim, state_dim, action_dim).to(self.device)
        self.act = self.ClassAct(net_dim, state_dim, action_dim).to(self.device) if self.ClassAct else self.cri
        self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
        self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
        self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
        self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
    def select_action(self, state):
        states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
        actions, noises = self.act.get_action(states)
        return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
    def explore_env(self, env, target_step):
        state = self.state  # sent state to agent and then agent sent state to method
        trajectory_temp = list()
        last_done = 0
        for i in range(target_step):
            action, noise = self.select_action(state)
            state, next_state, reward, done, = env.step(np.tanh(action))
            trajectory_temp.append((state, reward, done, action, noise))
            if done:
                state = env.reset()
                last_done = i
            else:
                state = next_state
        self.state = state
        '''splice list'''
        # store 0 trajectory information to list
        trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1]
        self.trajectory_list = trajectory_temp[last_done:]
        return trajectory_list
    def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
        """put data extract and update network together"""
        with torch.no_grad():
            buf_len = buffer[0].shape[0]
            # decompose buffer data
            buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
            '''get buf_r_sum, buf_logprob'''
            bs = 4096
            buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
            buf_value = torch.cat(buf_value, dim=0)
            buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
            buf_r_sum, buf_advantage = self.get_reward_sum(buf_len, buf_reward, buf_mask, buf_value)  # detach()
            # normalize advantage
            buf_advantage = (buf_advantage - buf_advantage.mean()) / (buf_advantage.std() + 1e-5)
            buf_advantage = smooth_rewards(buf_advantage, window=10)
            del buf_noise, buffer[:]
        '''PPO: Surrogate objective of Trust Region'''
        obj_critic = obj_actor = None
        for _ in range(int(buf_len / batch_size * repeat_times)):
            indices = torch.randint(buf_len, size=(batch_size,), requires_grad=False, device=self.device)
            state = buf_state[indices]
            action = buf_action[indices]
            r_sum = buf_r_sum[indices]
            logprob = buf_logprob[indices]
            advantage = buf_advantage[indices]
            new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action)  # it is obj_actor
            ratio = (new_logprob - logprob.detach()).exp()
            surrogate1 = advantage * ratio
            surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
            obj_surrogate = -torch.min(surrogate1, surrogate2).mean()
            obj_actor = obj_surrogate + obj_entropy * self.lambda_entropy
            self.optim_update(self.act_optim, obj_actor)  # update actor
            value = self.cri(state).squeeze(1)  # critic network predicts the reward_sum (Q value) of state
            # use smoothloss L1 to evaluate the value loss
            # obj_critic = self.criterion(value, r_sum) / (r_sum.std() + 1e-6)
            obj_critic = self.criterion(value, r_sum)
            self.optim_update(self.cri_optim, obj_critic)  # calculate and update the back propogation of value loss
            # choose whether to use soft update
            self.soft_update(self.cri_target, self.cri, soft_update_tau) if self.cri_target is not self.cri else None
        a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1))
        return obj_critic.item(), obj_actor.item(), a_std_log.mean().item()  # logging_tuple
    def get_reward_sum_raw(self, buf_len, buf_reward, buf_mask, buf_value) -> (torch.Tensor, torch.Tensor):
        buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device)  # reward sum
        pre_r_sum = 0
        for i in range(buf_len - 1, -1, -1):
            buf_r_sum[i] = buf_reward[i] + buf_mask[i] * pre_r_sum
            pre_r_sum = buf_r_sum[i]
        buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
        return buf_r_sum, buf_advantage
    def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value) -> (torch.Tensor, torch.Tensor):
        buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device)  # old policy value
        buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device)  # advantage value
        pre_r_sum = 0.0
        pre_advantage = 0.0  # advantage value of previous step
        for i in range(buf_len - 1, -1, -1):
            buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
            pre_r_sum = buf_r_sum[i]
            buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i])
            pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
        return buf_r_sum, buf_advantage
    @staticmethod
    def optim_update(optimizer, objective):
        optimizer.zero_grad()
        objective.backward()
        optimizer.step()
    @staticmethod
    def soft_update(target_net, current_net, tau):
        for tar, cur in zip(target_net.parameters(), current_net.parameters()):
            tar.data.copy_(cur.data.__mul__(tau) + tar.data.__mul__(1.0 - tau))
 class Arguments:
    def __init__(self, agent=None, env=None):
        self.agent = agent  # Deep Reinforcement Learning algorithm
        self.env = env  # the environment for training
        self.cwd = None  # current work directory. None means set automatically
        self.if_remove = False  # remove the cwd folder? (True, False, None:ask me)
        self.visible_gpu = '0'  # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
        self.num_threads = 32  # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
        '''Arguments for training'''
        self.num_episode = 1000  # to control the train episodes for PPO
        self.gamma = 0.995  # discount factor of future rewards
        self.learning_rate = 1e-4  # 1e-4 2 ** -14 2e-4
        self.soft_update_tau = 2 ** -8  # 1e-3 2 ** -8
        self.net_dim = 256  # the network width
        self.batch_size = 4096  # num of transitions sampled from replay buffer.
        self.repeat_times = 2 ** 5  # collect target_step, then update network
        self.target_step = 4096  # repeatedly update network to keep critic's loss small
        self.max_memo = self.target_step  # capacity of replay buffer
        self.if_gae_or_raw = True  # GAE for on-policy sparse reward: Generalized Advantage Estimation.
        '''Arguments for evaluate'''
        self.random_seed = 1234
        # self.random_seed_list = [1234, 2234, 3234, 4234, 5234]
        self.random_seed_list = [1567]
        self.train = True
        self.save_network = True
        self.test_network = True
        self.save_test_data = True
        self.compare_with_gurobi = True
        self.plot_on = True
    def init_before_training(self):
        if self.cwd is None:
            agent_name = self.agent.__class__.__name__
            self.cwd = f'./{agent_name}'
        np.random.seed(self.random_seed)
        torch.manual_seed(self.random_seed)
        torch.set_num_threads(self.num_threads)
        torch.set_default_dtype(torch.float32)
        os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
 def update_buffer(_trajectory):
    _trajectory = list(map(list, zip(*_trajectory)))  # 2D-list transpose, here cut the trajectory into 5 parts 
    ten_state = torch.as_tensor(_trajectory[0])  # tensor state here
    ten_reward = torch.as_tensor(_trajectory[1], dtype=torch.float32)
    # _trajectory[2] = done, 将 done 替换为掩码，节省内存
    ten_mask = (1.0 - torch.as_tensor(_trajectory[2], dtype=torch.float32)) * gamma
    ten_action = torch.as_tensor(_trajectory[3])
    ten_noise = torch.as_tensor(_trajectory[4], dtype=torch.float32)
    buffer[:] = (ten_state, ten_action, ten_noise, ten_reward, ten_mask)  # list store tensors
    _steps = ten_reward.shape[0]  # how many steps are collected in all trajectories
    _r_exp = ten_reward.mean()  # the mean reward
    return _steps, _r_exp
 if __name__ == '__main__':
    args = Arguments()
    reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
    loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
    args.visible_gpu = '0'
    for seed in args.random_seed_list:
        args.random_seed = seed
        args.agent = AgentPPO()
        agent_name = f'{args.agent.__class__.__name__}'
        args.agent.cri_target = True
        args.env = ESSEnv()
        args.init_before_training()
        '''init agent and environment'''
        agent = args.agent
        env = args.env
        agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
                   args.if_gae_or_raw)
        cwd = args.cwd
        gamma = args.gamma
        batch_size = args.batch_size  # how much data should be used to update net
        target_step = args.target_step  # how manysteps of one episode should stop
        repeat_times = args.repeat_times  # how many times should update for one batch size data
        soft_update_tau = args.soft_update_tau
        num_episode = args.num_episode
        agent.state = env.reset()
        '''init buffer'''
        buffer = list()
        '''init training parameters'''
        args.train = False
        args.save_network = False
        # args.test_network = False
        # args.save_test_data = False
        # args.compare_with_gurobi = False
        # args.plot_on = False
        if args.train:
            for i_episode in range(num_episode):
                with torch.no_grad():
                    trajectory_list = agent.explore_env(env, target_step)
                    steps, r_exp = update_buffer(trajectory_list)
                critic_loss, actor_loss, entropy_loss = agent.update_net(buffer, batch_size, repeat_times,
                                                                         soft_update_tau)
                loss_record['critic_loss'].append(critic_loss)
                loss_record['actor_loss'].append(actor_loss)
                loss_record['entropy_loss'].append(entropy_loss)
                with torch.no_grad():
                    episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
                    reward_record['mean_episode_reward'].append(episode_reward)
                    reward_record['unbalance'].append(episode_unbalance)
                print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}')
    act_save_path = f'{args.cwd}/actor_10.pth'
    loss_record_path = f'{args.cwd}/loss_10.pkl'
    reward_record_path = f'{args.cwd}/reward_10.pkl'
    if args.save_network:
        with open(loss_record_path, 'wb') as tf:
            pickle.dump(loss_record, tf)
        with open(reward_record_path, 'wb') as tf:
            pickle.dump(reward_record, tf)
        torch.save(agent.act.state_dict(), act_save_path)
        print('actor parameters have been saved')
    if args.test_network:
        args.cwd = agent_name
        agent.act.load_state_dict(torch.load(act_save_path))
        print('parameters have been reload and test')
        record = test_one_episode(env, agent.act, agent.device)
        eval_data = pd.DataFrame(record['system_info'])
        eval_data.columns = ['time_step', 'price', 'load', 'action', 'real_action', 'soc', 'battery',
                             'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
    if args.save_test_data:
        test_data_save_path = f'{args.cwd}/test_10.pkl'
        with open(test_data_save_path, 'wb') as tf:
            pickle.dump(record, tf)
    '''compare with gurobi data and results'''
    if args.compare_with_gurobi:
        month = record['init_info'][0][0]
        day = record['init_info'][0][1]
        initial_soc = record['init_info'][0][3]
        base_result = optimization_base_result(env, month, day, initial_soc)
    if args.plot_on:
        from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
        plot_args = PlotArgs()
        plot_args.feature_change = '10'
        args.cwd = agent_name
        plot_dir = make_dir(args.cwd, plot_args.feature_change)
        plot_optimization_result(base_result, plot_dir)
        plot_evaluation_information(args.cwd + '/' + 'test_10.pkl', plot_dir)
    '''compare the different cost get from gurobi and PPO'''
    ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
    print('rl_cost:', sum(eval_data['operation_cost']))
    print('gurobi_cost:', sum(base_result['step_cost']))
    print('ration:', ration)
--- a/PPO_llm.py
+++ b/PPO_llm.py
@ -0,0 +1,472 @@
 import json
 import os
 import pickle
 from copy import deepcopy
 import numpy as np
 import pandas as pd
 import torch
 import torch.nn as nn
 from data_manager import *
 from environment import ESSEnv
 from tools import optimization_base_result
 def load_llm_actions(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data
 class ActorPPO(nn.Module):
    def __init__(self, mid_dim, state_dim, action_dim):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
                                 nn.Linear(mid_dim, mid_dim), nn.ReLU(),
                                 nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
                                 nn.Linear(mid_dim, action_dim))
        # the logarithm (log) of standard deviation (std) of action, it is a trainable parameter
        self.a_logstd = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True)
        self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi))
    def forward(self, state):
        return self.net(state).tanh()  # action.tanh() limit the data output of action
    def get_action(self, state):
        a_avg = self.forward(state)  # too big for the action
        a_std = self.a_logstd.exp()
        noise = torch.randn_like(a_avg)
        action = a_avg + noise * a_std
        return action, noise
    def get_logprob_entropy(self, state, action):
        a_avg = self.forward(state)
        a_std = self.a_logstd.exp()
        delta = ((a_avg - action) / a_std).pow(2) * 0.5
        logprob = -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1)  # new_logprob
        dist_entropy = (logprob.exp() * logprob).mean()  # policy entropy
        return logprob, dist_entropy
    def get_old_logprob(self, _action, noise):  # noise = action - a_noise
        delta = noise.pow(2) * 0.5
        return -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1)  # old_logprob
 class CriticAdv(nn.Module):
    def __init__(self, mid_dim, state_dim, _action_dim):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
                                 nn.Linear(mid_dim, mid_dim), nn.ReLU(),
                                 nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
                                 nn.Linear(mid_dim, 1)
                                 )
    def forward(self, state):
        return self.net(state)  # Advantage value
 class AgentPPO:
    def __init__(self):
        super().__init__()
        self.state = None
        self.device = None
        self.action_dim = None
        self.get_obj_critic = None
        self.current_index = 0
        self.criterion = torch.nn.SmoothL1Loss()
        self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
        self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
        self.ClassCri = CriticAdv
        self.ClassAct = ActorPPO
        self.ratio_clip = 0.2  # ratio.clamp(1 - clip, 1 + clip)
        self.lambda_entropy = 0.02  # could be 0.01~0.05
        self.lambda_gae_adv = 0.98  # could be 0.95~0.99, GAE (Generalized Advantage Estimation. ICLR.2016.)
        self.get_reward_sum = None  # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
        self.trajectory_list = None
        self.llm_actions = load_llm_actions('data/results.json')
    def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=False, gpu_id=0):
        self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
        self.trajectory_list = list()
        # choose whether to use gae or not
        self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
        self.cri = self.ClassCri(net_dim, state_dim, action_dim).to(self.device)
        self.act = self.ClassAct(net_dim, state_dim, action_dim).to(self.device) if self.ClassAct else self.cri
        self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
        self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
        self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
        self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
    def select_action(self, state):
        states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
        actions, noises = self.act.get_action(states)
        return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
    def explore_env(self, env, target_step, index):
        state = self.state  # sent state to agent and then agent sent state to method
        trajectory_temp = list()
        last_done = 0
        for i in range(target_step):
            action, noise = self.select_action(state)
            llm_action = self.llm_actions[index]
            action = [0.95 * action[i] + 0.05 * llm_action[i] for i in range(5)]
            state, next_state, reward, done, = env.step(np.tanh(action))
            trajectory_temp.append((state, reward, done, action, noise))
            if done:
                state = env.reset()
                last_done = i
            else:
                state = next_state
        self.state = state
        '''splice list'''
        # store 0 trajectory information to list
        trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1]
        self.trajectory_list = trajectory_temp[last_done:]
        return trajectory_list
    def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
        """put data extract and update network together"""
        with torch.no_grad():
            buf_len = buffer[0].shape[0]
            # decompose buffer data
            buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
            '''get buf_r_sum, buf_logprob'''
            bs = 4096
            buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
            buf_value = torch.cat(buf_value, dim=0)
            buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
            buf_r_sum, buf_advantage = self.get_reward_sum(buf_len, buf_reward, buf_mask, buf_value)  # detach()
            # normalize advantage
            buf_advantage = (buf_advantage - buf_advantage.mean()) / (buf_advantage.std() + 1e-5)
            del buf_noise, buffer[:]
        '''PPO: Surrogate objective of Trust Region'''
        obj_critic = obj_actor = None
        for _ in range(int(buf_len / batch_size * repeat_times)):
            indices = torch.randint(buf_len, size=(batch_size,), requires_grad=False, device=self.device)
            state = buf_state[indices]
            action = buf_action[indices]
            r_sum = buf_r_sum[indices]
            logprob = buf_logprob[indices]
            advantage = buf_advantage[indices]
            new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action)  # it's obj_actor
            ratio = (new_logprob - logprob.detach()).exp()
            surrogate1 = advantage * ratio
            surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
            obj_surrogate = -torch.min(surrogate1, surrogate2).mean()
            obj_actor = obj_surrogate + obj_entropy * self.lambda_entropy
            self.optim_update(self.act_optim, obj_actor)  # update actor
            value = self.cri(state).squeeze(1)  # critic network predicts the reward_sum (Q value) of state
            # use smoothloss L1 to evaluate the value loss
            # obj_critic = self.criterion(value, r_sum) / (r_sum.std() + 1e-6)
            obj_critic = self.criterion(value, r_sum)
            self.optim_update(self.cri_optim, obj_critic)  # calculate and update the back propogation of value loss
            # choose whether to use soft update
            self.soft_update(self.cri_target, self.cri, soft_update_tau) if self.cri_target is not self.cri else None
        a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1))
        return obj_critic.item(), obj_actor.item(), a_std_log.mean().item()  # logging_tuple
    def get_reward_sum_raw(self, buf_len, buf_reward, buf_mask, buf_value) -> (torch.Tensor, torch.Tensor):
        buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device)  # reward sum
        pre_r_sum = 0
        for i in range(buf_len - 1, -1, -1):
            buf_r_sum[i] = buf_reward[i] + buf_mask[i] * pre_r_sum
            pre_r_sum = buf_r_sum[i]
        buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
        return buf_r_sum, buf_advantage
    def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value) -> (torch.Tensor, torch.Tensor):
        buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device)  # old policy value
        buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device)  # advantage value
        pre_r_sum = 0.0
        pre_advantage = 0.0  # advantage value of previous step
        for i in range(buf_len - 1, -1, -1):
            buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
            pre_r_sum = buf_r_sum[i]
            buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i])  # fix a bug here
            pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
        return buf_r_sum, buf_advantage
    @staticmethod
    def optim_update(optimizer, objective):
        optimizer.zero_grad()
        objective.backward()
        optimizer.step()
    @staticmethod
    def soft_update(target_net, current_net, tau):
        for tar, cur in zip(target_net.parameters(), current_net.parameters()):
            tar.data.copy_(cur.data.__mul__(tau) + tar.data.__mul__(1.0 - tau))
 class Arguments:
    def __init__(self, agent=None, env=None):
        self.agent = agent  # Deep Reinforcement Learning algorithm
        self.env = env  # the environment for training
        self.cwd = None  # current work directory. None means set automatically
        self.visible_gpu = '0'  # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
        self.num_threads = 32  # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
        '''Arguments for training'''
        self.num_episode = 1000  # to control the train episodes for PPO
        self.gamma = 0.995  # discount factor of future rewards
        self.learning_rate = 1e-4  # 2e-4 / 6e-5 / 2 ** -4
        self.soft_update_tau = 2 ** -8  # 5e-3 / 4e-4 / 2 ** -8 1e-3
        self.net_dim = 256  # the network width
        self.batch_size = 4096  # num of transitions sampled from replay buffer.
        self.repeat_times = 2 ** 5  # collect target_step, then update network
        self.target_step = 4096  # repeatedly update network to keep critic's loss small
        self.max_memo = self.target_step  # capacity of replay buffer
        self.if_gae_or_raw = True  # GAE for on-policy sparse reward: Generalized Advantage Estimation.
        '''Arguments for evaluate'''
        self.random_seed = 1234  # initialize random seed in self.init_before_training()
        # self.random_seed_list = [1234, 2234, 3234]
        self.random_seed_list = [3234]
        self.train = True
        self.save_network = True
        self.test_network = True
        self.save_test_data = True
        self.compare_with_gurobi = True
        self.plot_on = True
    def init_before_training(self):
        if self.cwd is None:
            agent_name = self.agent.__class__.__name__
            self.cwd = f'./{agent_name}'
        np.random.seed(self.random_seed)
        torch.manual_seed(self.random_seed)
        torch.set_num_threads(self.num_threads)
        torch.set_default_dtype(torch.float32)
        os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
 def update_buffer(_trajectory):
    _trajectory = list(map(list, zip(*_trajectory)))  # 2D-list transpose, here cut the trajectory into 5 parts 
    ten_state = torch.as_tensor(_trajectory[0], dtype=torch.float32)  # tensor state here
    ten_reward = torch.as_tensor(_trajectory[1], dtype=torch.float32)
    # _trajectory[2] = done, replace done by mask, save memory
    ten_mask = (1.0 - torch.as_tensor(_trajectory[2], dtype=torch.float32)) * gamma
    ten_action = torch.as_tensor(_trajectory[3], dtype=torch.float32)
    ten_noise = torch.as_tensor(_trajectory[4], dtype=torch.float32)
    buffer[:] = (ten_state, ten_action, ten_noise, ten_reward, ten_mask)  # list store tensors
    _steps = ten_reward.shape[0]  # how many steps are collected in all trajectories
    _r_exp = ten_reward.mean()  # the mean reward
    return _steps, _r_exp
 def test_one_episode(env, act, device, index):
    """to get evaluate information, here record the unbalance of after taking action"""
    record_state = []
    record_action = []
    record_reward = []
    record_unbalance = []
    record_system_info = []  # [time, price, netload, action, real action, soc, output*4, unbalance, cost]
    record_init_info = []  # include month,day,time,intial soc
    env.TRAIN = False
    state = env.reset()
    record_init_info.append([env.month, env.day, env.current_time, env.battery.current_capacity])
    print(f'current testing month is {env.month}, day is {env.day},initial_soc is {env.battery.current_capacity}')
    llm_actions = load_llm_actions('data/results.json')
    for i in range(24):
        s_tensor = torch.as_tensor((state,), device=device)
        a_tensor = act(s_tensor)
        rl_action = a_tensor.detach().cpu().numpy()[0]
        llm_action = llm_actions[index]
        action = [0.95 * rl_action[i] + 0.05 * llm_action[i] for i in range(5)]
        real_action = action
        state, next_state, reward, done = env.step(action)
        record_system_info.append([state[0], state[1], state[3] + env.wind.current_power, action, real_action,
                                   env.battery.SOC(), env.battery.energy_change, next_state[4], next_state[5],
                                   next_state[6], env.solar.current_power, env.wind.current_power, env.unbalance,
                                   env.operation_cost, reward])
        record_state.append(state)
        record_action.append(real_action)
        record_reward.append(reward)
        record_unbalance.append(env.unbalance)
        state = next_state
    # add information of last step dg1, dh2, dg3, soc, tem, irr
    record_system_info[-1][7:12] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2],
                                    env.final_step_outputs[4], env.final_step_outputs[5]]
    record_system_info[-1][5] = env.final_step_outputs[3]
    record = {'init_info': record_init_info, 'system_info': record_system_info, 'state': record_state,
              'action': record_action, 'reward': record_reward, 'unbalance': record_unbalance}
    return record
 def test_llm(env, index):
    record_system_info = []
    record_init_info = []
    env.TRAIN = False
    record_init_info.append([env.month, env.day, env.current_time, env.battery.current_capacity])
    llm_actions = load_llm_actions('data/results.json')
    cumulative_reward = 0
    for i in range(24):
        action = llm_actions[index + i]
        real_action = action
        state, next_state, reward, done = env.step(action)
        record_system_info.append([state[0], state[1], state[3] + env.wind.current_power, action, real_action,
                                   env.battery.SOC(), env.battery.energy_change, next_state[4], next_state[5],
                                   next_state[6], env.solar.current_power, env.wind.current_power, env.unbalance,
                                   env.operation_cost, reward])
        cumulative_reward += reward
        if done:
            break
    record_system_info[-1][7:12] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2],
                                    env.final_step_outputs[4], env.final_step_outputs[5]]
    record_system_info[-1][5] = env.final_step_outputs[3]
    record = {'system_info': record_system_info, 'cumulative_reward': cumulative_reward}
    return record
 def get_episode_return(env, act, device, index):
    episode_reward = 0.0
    episode_unbalance = 0.0
    state = env.reset()
    llm_actions = load_llm_actions('data/results.json')
    for i in range(24):
        s_tensor = torch.as_tensor((state,), device=device)
        a_tensor = act(s_tensor)
        rl_action = a_tensor.detach().cpu().numpy()[0]
        llm_action = llm_actions[index]
        action = [0.95 * rl_action[i] + 0.05 * llm_action[i] for i in range(5)]
        state, next_state, reward, done, = env.step(action)
        state = next_state
        episode_reward += reward
        episode_unbalance += env.real_unbalance
        if done:
            break
    return episode_reward, episode_unbalance
 if __name__ == '__main__':
    args = Arguments()
    reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
    loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
    args.visible_gpu = '0,1'
    for seed in args.random_seed_list:
        args.random_seed = seed
        args.agent = AgentPPO()
        args.agent.cri_target = True
        agent_name = f'{args.agent.__class__.__name__}'
        args.env = ESSEnv()
        args.init_before_training()
        '''init agent and environment'''
        agent = args.agent
        env = args.env
        agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
                   args.if_gae_or_raw)
        cwd = args.cwd
        gamma = args.gamma
        batch_size = args.batch_size  # how much data should be used to update net
        target_step = args.target_step  # how many steps of one episode should stop
        repeat_times = args.repeat_times  # how many times should update for one batch size data
        soft_update_tau = args.soft_update_tau
        agent.state = env.reset()
        '''init buffer'''
        buffer = list()
        '''init training parameters'''
        num_episode = args.num_episode
        args.train = False
        args.save_network = False
        # args.test_network = False
        # args.save_test_data = False
        # args.compare_with_gurobi = False
        # args.plot_on = False
        if args.train:
            for i_episode in range(num_episode):
                with torch.no_grad():
                    index = (sum(Constant.MONTHS_LEN[:env.month - 1]) + env.day - 1) * 24 + env.current_time
                    trajectory_list = agent.explore_env(env, target_step, index)
                    steps, r_exp = update_buffer(trajectory_list)
                critic_loss, actor_loss, entropy_loss = agent.update_net(buffer, batch_size, repeat_times,
                                                                         soft_update_tau)
                loss_record['critic_loss'].append(critic_loss)
                loss_record['actor_loss'].append(actor_loss)
                loss_record['entropy_loss'].append(entropy_loss)
                with torch.no_grad():
                    episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device, index)
                    reward_record['mean_episode_reward'].append(episode_reward)
                    reward_record['unbalance'].append(episode_unbalance)
                print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}')
    act_save_path = f'{args.cwd}/actor_llm_1015.pth'
    loss_record_path = f'{args.cwd}/loss_llm_1015.pkl'
    reward_record_path = f'{args.cwd}/reward_llm_1015.pkl'
    if args.save_network:
        with open(loss_record_path, 'wb') as tf:
            pickle.dump(loss_record, tf)
        with open(reward_record_path, 'wb') as tf:
            pickle.dump(reward_record, tf)
        torch.save(agent.act.state_dict(), act_save_path)
        print('actor parameters have been saved')
    if args.test_network:
        args.cwd = agent_name
        agent.act.load_state_dict(torch.load(act_save_path))
        print('parameters have been reload and test')
        index = (sum(Constant.MONTHS_LEN[:env.month - 1]) + env.day - 1) * 24 + env.current_time
        record = test_one_episode(env, agent.act, agent.device, index)
        # re = test_llm(env, index)
        rl_data = pd.DataFrame(record['system_info'])
        # llm_data = pd.DataFrame(re['system_info'])
        rl_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery',
                           'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
    if args.save_test_data:
        test_data_save_path = f'{args.cwd}/test_llm_1015.pkl'
        # test_llm_save_path = f'{args.cwd}/test_only_llm_1015.pkl'
        with open(test_data_save_path, 'wb') as tf:
            pickle.dump(record, tf)
        # with open(test_llm_save_path, 'wb') as f:
        #     pickle.dump(re, f)
    '''compare with gurobi data and results'''
    if args.compare_with_gurobi:
        month = record['init_info'][0][0]
        day = record['init_info'][0][1]
        initial_soc = record['init_info'][0][3]
        base_result = optimization_base_result(env, month, day, initial_soc)
    if args.plot_on:
        from plotDRL import *
        plot_args = PlotArgs()
        plot_args.feature_change = 'llm_1015'
        args.cwd = agent_name
        plot_dir = make_dir(args.cwd, plot_args.feature_change)
        # plot_optimization_result(base_result, plot_dir)
        # plot_evaluation_information(args.cwd + '/' + 'test_llm_1015.pkl', plot_dir)
        plot_soc(base_result, args.cwd + '/' + 'test_llm_1015.pkl', plot_dir)
        plot_energy(base_result, args.cwd + '/' + 'test_llm_1015.pkl', plot_dir)
        '''compare the different cost get from gurobi and PPO'''
        print('rl_cost:', sum(rl_data['operation_cost']))
        print('gurobi_cost:', sum(base_result['step_cost']))
        print('ration:', sum(rl_data['operation_cost']) / sum(base_result['step_cost']))
--- a/PPO_llm_melt.py
+++ b/PPO_llm_melt.py
@ -0,0 +1,136 @@
 import random
 from PPO_llm import *
 from plotDRL import *
 def generate_test_dates(num):
    dates = []
    months_days = {1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30, 7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31}
    while len(dates) < num:
        month = random.randint(1, 12)
        day = random.randint(20, months_days[month])
        if (month, day) not in dates:
            dates.append((month, day))
    return dates
 def test_llm(env, llm_actions, index):
    sum_reward = 0
    sum_unbalance = 0
    for i in range(24):
        action = llm_actions[index + i]
        state, next_state, reward, done = env.step(action)
        sum_reward += reward
        sum_unbalance += env.real_unbalance
    record = {'reward': [sum_reward], 'unbalance': [sum_unbalance]}
    return record
 def test_one_episode(env, act, device):
    state = env.reset()
    sum_reward = 0
    sum_unbalance = 0
    for i in range(24):
        s_tensor = torch.as_tensor((state,), device=device)
        a_tensor = act(s_tensor)
        action = a_tensor.detach().cpu().numpy()[0]
        # rl_action = a_tensor.detach().cpu().numpy()[0]
        # llm_action = llm_actions[index + i]
        # action = 0.95 * np.array(rl_action) + 0.05 * np.array(llm_action)
        state, next_state, reward, done = env.step(action)
        state = next_state
        sum_reward += reward
        sum_unbalance += env.real_unbalance
    record = {'reward': [sum_reward], 'unbalance': [sum_unbalance]}
    return record
 def run_multiple_tests(env, args, agent1, agent2, num_tests=10):
    all_ppo = pd.DataFrame()
    all_llm = pd.DataFrame()
    all_lmppo = pd.DataFrame()
    all_milp = pd.DataFrame()
    test_dates = generate_test_dates(num_tests)
    llm_actions = load_llm_actions('data/results.json')
    for month, day in test_dates:
        print(f'current testing month is {month}, day is {day}')
        index = (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24
        ppo_res = test_one_episode(env, agent1.act, agent1.device)
        base_res = optimization_base_result(env, month, day)
        llm_res = test_llm(env, llm_actions, index)
        lmppo_res = test_one_episode(env, agent2.act, agent2.device)
        ppo_data = pd.DataFrame(ppo_res)
        m_reward = - base_res['step_cost'].sum()/1e3
        milp_data = pd.DataFrame({'reward': [m_reward]})
        llm_data = pd.DataFrame(llm_res)
        lmppo_data = pd.DataFrame(lmppo_res)
        all_ppo = pd.concat([all_ppo, ppo_data], ignore_index=True)
        all_milp = pd.concat([all_milp, milp_data], ignore_index=True)
        all_llm = pd.concat([all_llm, llm_data], ignore_index=True)
        all_lmppo = pd.concat([all_lmppo, lmppo_data], ignore_index=True)
    test_ppo_path = f'{args.cwd}/melt_ppo.pkl'
    test_llm_path = f'{args.cwd}/melt_llm.pkl'
    test_lmppo_path = f'{args.cwd}/melt_lmppo.pkl'
    test_milp_path = f'{args.cwd}/melt_milp.pkl'
    with open(test_ppo_path, 'wb') as tf:
        pickle.dump(all_ppo, tf)
    with open(test_milp_path, 'wb') as tf:
        pickle.dump(all_milp, tf)
    with open(test_llm_path, 'wb') as f:
        pickle.dump(all_llm, f)
    with open(test_lmppo_path, 'wb') as tf:
        pickle.dump(all_lmppo, tf)
    plot_args = PlotArgs()
    plot_args.feature_change = 'llm_1015'
    args.cwd = agent_name
    plot_dir = make_dir(args.cwd, plot_args.feature_change)
    plot_melt(test_ppo_path, test_llm_path, test_lmppo_path, test_milp_path, plot_dir)
 if __name__ == '__main__':
    args = Arguments()
    args.visible_gpu = '0'
    for seed in args.random_seed_list:
        args.random_seed = seed
        args.agent1 = args.agent2 = AgentPPO()
        args.agent1.cri_target = args.agent2.cri_target = True
        agent_name = f'{args.agent1.__class__.__name__}'
        args.env = ESSEnv()
        args.init_before_training()
        agent1 = args.agent1
        agent2 = args.agent2
        env = args.env
        agent1.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
                    args.if_gae_or_raw)
        agent2.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
                    args.if_gae_or_raw)
        cwd = args.cwd
        gamma = args.gamma
        batch_size = args.batch_size
        target_step = args.target_step
        repeat_times = args.repeat_times
        soft_update_tau = args.soft_update_tau
        agent1.state = agent2.state = env.reset()
        buffer = list()
        num_episode = args.num_episode
        # args.test_network = False
    if args.test_network:
        args.cwd = agent_name
        ppo_act_save_path = f'{args.cwd}/actor_10.pth'
        lmppo_act_save_path = f'{args.cwd}/actor_llm_1015.pth'
        agent1.act.load_state_dict(torch.load(ppo_act_save_path))
        agent2.act.load_state_dict(torch.load(lmppo_act_save_path))
        print('parameters have been reload and test')
        env.TRAIN = False
        run_multiple_tests(env, args, agent1, agent2, num_tests=12)
--- a/PPO_llm_melt_2.py
+++ b/PPO_llm_melt_2.py
@ -0,0 +1,200 @@
 import random
 from PPO_llm import *
 from plotDRL import *
 test_day = 9  # 测试天数
 def test_ram_episode(env):
    state = env.reset()
    sum_reward = 0
    sum_unbalance = 0
    for i in range(24):
        action = np.random.uniform(env.action_space.low, env.action_space.high)
        state, next_state, reward, done = env.step(action)
        state = next_state
        sum_reward += reward
        sum_unbalance += env.real_unbalance
    record = {'reward': [sum_reward], 'unbalance': [sum_unbalance]}
    return record
 def test_llm(env, llm_actions, index):
    sum_reward = 0
    sum_unbalance = 0
    for i in range(24):
        action = llm_actions[index + i]
        state, next_state, reward, done = env.step(action)
        sum_reward += reward
        sum_unbalance += env.real_unbalance
    record = {'reward': [sum_reward], 'unbalance': [sum_unbalance]}
    return record
 def test_one_episode(env, act, device):
    state = env.reset()
    sum_reward = 0
    sum_unbalance = 0
    for i in range(24):
        s_tensor = torch.as_tensor((state,), device=device)
        a_tensor = act(s_tensor)
        action = a_tensor.detach().cpu().numpy()[0]
        # rl_action = a_tensor.detach().cpu().numpy()[0]
        # llm_action = llm_actions[index + i]
        # action = 0.95 * np.array(rl_action) + 0.05 * np.array(llm_action)
        state, next_state, reward, done = env.step(action)
        state = next_state
        sum_reward += reward
        sum_unbalance += env.real_unbalance
    record = {'reward': [sum_reward], 'unbalance': [sum_unbalance]}
    return record
 def generate_test_dates():
    # months_days = {1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30, 7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31}
    dates = []
    for month in range(1, 13):  # 遍历12个月
        start_day = 20
        # days_in_month = months_days[month]  # 确保每个月至少有9天
        # if days_in_month >= start_day + 9:  # 生成8个连续的天
        for day in range(start_day, start_day + test_day):
            dates.append((month, day))
    return dates
 def run_multiple_tests(env, args, agent1, agent2):
    all_ppo = pd.DataFrame()
    all_llm = pd.DataFrame()
    all_lmppo = pd.DataFrame()
    all_milp = pd.DataFrame()
    all_ram = pd.DataFrame()
    test_dates = generate_test_dates()  # 获取测试日期
    llm_actions = load_llm_actions('data/results_day.json')
    # 对每个日期进行测试
    for idx in range(0, len(test_dates), test_day):
        current_dates = test_dates[idx:idx + test_day]
        print(f'Start testing for {current_dates[0]} to {current_dates[-1]}')
        ppo_rewards = []
        ppo_unbalances = []
        llm_rewards = []
        llm_unbalances = []
        lmppo_rewards = []
        lmppo_unbalances = []
        milp_rewards = []
        ram_rewards = []
        ram_unbalances = []
        # 对当前这x天的日期进行逐个测试
        for month, day in current_dates:
            index = (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24
            ppo_res = test_one_episode(env, agent1.act, agent1.device)
            base_res = optimization_base_result(env, month, day)
            llm_res = test_llm(env, llm_actions, index)
            lmppo_res = test_one_episode(env, agent2.act, agent2.device)
            ram_res = test_ram_episode(env)
            # 存储每一天的结果
            ppo_rewards.append(ppo_res['reward'][0])
            ppo_unbalances.append(ppo_res['unbalance'][0])
            llm_rewards.append(llm_res['reward'][0])
            llm_unbalances.append(llm_res['unbalance'][0])
            lmppo_rewards.append(lmppo_res['reward'][0])
            lmppo_unbalances.append(lmppo_res['unbalance'][0])
            m_reward = - base_res['step_cost'].sum() / 1e3
            milp_rewards.append(m_reward)
            ram_rewards.append(ram_res['reward'][0])
            ram_unbalances.append(ram_res['unbalance'][0])
        # 计算x天的平均结果
        avg_ppo_reward = sum(ppo_rewards) / len(ppo_rewards)
        avg_ppo_unbalance = sum(ppo_unbalances) / len(ppo_unbalances)
        avg_llm_reward = sum(llm_rewards) / len(llm_rewards)
        avg_llm_unbalance = sum(llm_unbalances) / len(llm_unbalances)
        avg_lmppo_reward = sum(lmppo_rewards) / len(lmppo_rewards)
        avg_lmppo_unbalance = sum(lmppo_unbalances) / len(lmppo_unbalances)
        avg_milp_reward = sum(milp_rewards) / len(milp_rewards)
        avg_ram_reward = sum(ram_rewards) / len(ram_rewards)
        avg_ram_unbalance = sum(ram_unbalances) / len(ram_unbalances)
        # 保存每组x天的平均结果
        ppo_data = pd.DataFrame({'reward': [avg_ppo_reward], 'unbalance': [avg_ppo_unbalance]})
        llm_data = pd.DataFrame({'reward': [avg_llm_reward], 'unbalance': [avg_llm_unbalance]})
        lmppo_data = pd.DataFrame({'reward': [avg_lmppo_reward], 'unbalance': [avg_lmppo_unbalance]})
        milp_data = pd.DataFrame({'reward': [avg_milp_reward]})
        ram_data = pd.DataFrame({'reward': [avg_ram_reward], 'unbalance': [avg_ram_unbalance]})
        all_ppo = pd.concat([all_ppo, ppo_data], ignore_index=True)
        all_llm = pd.concat([all_llm, llm_data], ignore_index=True)
        all_lmppo = pd.concat([all_lmppo, lmppo_data], ignore_index=True)
        all_milp = pd.concat([all_milp, milp_data], ignore_index=True)
        all_ram = pd.concat([all_ram, ram_data], ignore_index=True)
    # 保存结果
    test_ppo_path = f'{args.cwd}/melt_ppo.pkl'
    test_llm_path = f'{args.cwd}/melt_llm.pkl'
    test_lmppo_path = f'{args.cwd}/melt_lmppo.pkl'
    test_milp_path = f'{args.cwd}/melt_milp.pkl'
    test_ram_path = f'{args.cwd}/melt_ram.pkl'
    with open(test_ppo_path, 'wb') as tf:
        pickle.dump(all_ppo, tf)
    with open(test_milp_path, 'wb') as tf:
        pickle.dump(all_milp, tf)
    with open(test_llm_path, 'wb') as f:
        pickle.dump(all_llm, f)
    with open(test_lmppo_path, 'wb') as tf:
        pickle.dump(all_lmppo, tf)
    with open(test_ram_path, 'wb') as tf:
        pickle.dump(all_ram, tf)
    plot_args = PlotArgs()
    plot_args.feature_change = 'llm_1015'
    args.cwd = agent_name
    plot_dir = make_dir(args.cwd, plot_args.feature_change)
    plot_melt(test_ppo_path, test_llm_path, test_lmppo_path, test_milp_path, test_ram_path, plot_dir)
 if __name__ == '__main__':
    args = Arguments()
    args.visible_gpu = '0'
    for seed in args.random_seed_list:
        args.random_seed = seed
        args.agent1 = args.agent2 = AgentPPO()
        args.agent1.cri_target = args.agent2.cri_target = True
        agent_name = f'{args.agent1.__class__.__name__}'
        args.env = ESSEnv()
        args.init_before_training()
        agent1 = args.agent1
        agent2 = args.agent2
        env = args.env
        agent1.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
                    args.if_gae_or_raw)
        agent2.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
                    args.if_gae_or_raw)
        cwd = args.cwd
        gamma = args.gamma
        batch_size = args.batch_size
        target_step = args.target_step
        repeat_times = args.repeat_times
        soft_update_tau = args.soft_update_tau
        agent1.state = agent2.state = env.reset()
        buffer = list()
        num_episode = args.num_episode
        # args.test_network = False
    if args.test_network:
        args.cwd = agent_name
        ppo_act_save_path = f'{args.cwd}/actor_10.pth'
        lmppo_act_save_path = f'{args.cwd}/actor_llm_1015.pth'
        agent1.act.load_state_dict(torch.load(ppo_act_save_path))
        agent2.act.load_state_dict(torch.load(lmppo_act_save_path))
        print('parameters have been reload and test')
        env.TRAIN = False
        run_multiple_tests(env, args, agent1, agent2)
--- a/PPO_ramdom.py
+++ b/PPO_ramdom.py
@ -0,0 +1,371 @@
 import os
 import pickle
 from copy import deepcopy
 import numpy as np
 import pandas as pd
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from environment import ESSEnv
 from tools import get_episode_return, test_one_episode, optimization_base_result
 class ActorPPO(nn.Module):
    def __init__(self, mid_dim, state_dim, action_dim):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
                                 nn.Linear(mid_dim, mid_dim), nn.ReLU(),
                                 nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
                                 nn.Linear(mid_dim, action_dim))
        # the logarithm (log) of standard deviation (std) of action, it is a trainable parameter
        self.a_logstd = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True)
        self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi))
    def forward(self, state):
        return self.net(state).tanh()  # action.tanh() limit the data output of action
    def get_action(self, state):
        a_avg = self.forward(state)  # too big for the action
        a_std = self.a_logstd.exp()
        noise = torch.randn_like(a_avg)
        action = a_avg + noise * a_std
        return action, noise
    def get_logprob_entropy(self, state, action):
        a_avg = self.forward(state)
        a_std = self.a_logstd.exp()
        delta = ((a_avg - action) / a_std).pow(2) * 0.5
        logprob = -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1)  # new_logprob
        dist_entropy = (logprob.exp() * logprob).mean()  # policy entropy
        return logprob, dist_entropy
    def get_old_logprob(self, _action, noise):  # noise = action - a_noise
        delta = noise.pow(2) * 0.5
        return -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1)  # old_logprob
 class CriticAdv(nn.Module):
    def __init__(self, mid_dim, state_dim, _action_dim):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
                                 nn.Linear(mid_dim, mid_dim), nn.ReLU(),
                                 nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
                                 nn.Linear(mid_dim, 1))
    def forward(self, state):
        return self.net(state)  # Advantage value
 def smooth_rewards(rewards, window=10):
    rewards = rewards.unsqueeze(0).unsqueeze(0)  # 将 rewards 转为 [1, 1, len] 的形状以适应 conv1d
    kernel = torch.ones(1, 1, window, device=rewards.device) / window  # 创建一个均匀的滑动平均核
    smoothed_rewards = F.conv1d(rewards, kernel, padding='valid')  # 滑动平均
    smoothed_rewards = smoothed_rewards.squeeze(0).squeeze(0)  # 去掉多余的维度
    # 保持与原始奖励序列相同的长度，将前 window-1 个奖励保持不变
    return torch.cat((rewards[0, 0, :window-1], smoothed_rewards))
 class AgentPPO:
    def __init__(self):
        super().__init__()
        self.state = None
        self.device = None
        self.action_dim = None
        self.get_obj_critic = None
        self.criterion = torch.nn.SmoothL1Loss()
        self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
        self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
        '''init modify'''
        self.ClassCri = CriticAdv
        self.ClassAct = ActorPPO
        self.ratio_clip = 0.2  # ratio.clamp(1 - clip, 1 + clip)
        self.lambda_entropy = 0.02  # could be 0.01~0.05
        self.lambda_gae_adv = 0.98  # could be 0.95~0.99, (Generalized Advantage Estimation. ICLR.2016.)
        self.get_reward_sum = None  # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
        self.trajectory_list = None
    def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=True, gpu_id=0):
        self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
        self.trajectory_list = list()
        # choose whether to use gae or not
        self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
        self.cri = self.ClassCri(net_dim, state_dim, action_dim).to(self.device)
        self.act = self.ClassAct(net_dim, state_dim, action_dim).to(self.device) if self.ClassAct else self.cri
        self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
        self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
        self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
        self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
    def select_action(self, state):
        # states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
        # _, noises = self.act.get_action(states)
        actions = np.random.uniform(env.action_space.low, env.action_space.high)
        return actions, 0
    def explore_env(self, env, target_step):
        state = self.state  # sent state to agent and then agent sent state to method
        trajectory_temp = list()
        last_done = 0
        for i in range(target_step):
            action, noise = self.select_action(state)
            state, next_state, reward, done, = env.step(np.tanh(action))
            trajectory_temp.append((state, reward, done, action, noise))
            if done:
                state = env.reset()
                last_done = i
            else:
                state = next_state
        self.state = state
        '''splice list'''
        # store 0 trajectory information to list
        trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1]
        self.trajectory_list = trajectory_temp[last_done:]
        return trajectory_list
    def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
        """put data extract and update network together"""
        with torch.no_grad():
            buf_len = buffer[0].shape[0]
            # decompose buffer data
            buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
            '''get buf_r_sum, buf_logprob'''
            bs = 4096
            buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
            buf_value = torch.cat(buf_value, dim=0)
            # 不需要获取logprob，因为我们不再优化actor网络
            buf_logprob = torch.zeros_like(buf_value)  # 随机策略不会有有效的logprob
            buf_r_sum, buf_advantage = self.get_reward_sum(buf_len, buf_reward, buf_mask, buf_value)
            # normalize advantage
            buf_advantage = (buf_advantage - buf_advantage.mean()) / (buf_advantage.std() + 1e-5)
            buf_advantage = smooth_rewards(buf_advantage, window=10)
            del buf_noise, buffer[:]
        '''PPO: Surrogate objective of Trust Region'''
        obj_critic = obj_actor = None
        # 由于使用随机策略，更新网络的过程被跳过
        # 如果没有需要更新的策略网络，则只需要进行critic网络的简单优化
        for _ in range(int(buf_len / batch_size * repeat_times)):
            indices = torch.randint(buf_len, size=(batch_size,), requires_grad=False, device=self.device)
            state = buf_state[indices]
            action = buf_action[indices]
            r_sum = buf_r_sum[indices]
            logprob = buf_logprob[indices]
            advantage = buf_advantage[indices]
            # 只更新critic网络
            value = self.cri(state).squeeze(1)  # critic network预测状态的总奖励（Q值）
            obj_critic = self.criterion(value, r_sum)  # 计算value loss
            self.optim_update(self.cri_optim, obj_critic)  # 更新critic网络的参数
            # 选择是否使用软更新
            self.soft_update(self.cri_target, self.cri, soft_update_tau) if self.cri_target is not self.cri else None
        a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1))
        return obj_critic.item(), 0, a_std_log.mean().item()  # 未更新actor网络，因此obj_actor为0
    def get_reward_sum_raw(self, buf_len, buf_reward, buf_mask, buf_value) -> (torch.Tensor, torch.Tensor):
        buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device)  # reward sum
        pre_r_sum = 0
        for i in range(buf_len - 1, -1, -1):
            buf_r_sum[i] = buf_reward[i] + buf_mask[i] * pre_r_sum
            pre_r_sum = buf_r_sum[i]
        buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
        return buf_r_sum, buf_advantage
    def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value) -> (torch.Tensor, torch.Tensor):
        buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device)  # old policy value
        buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device)  # advantage value
        pre_r_sum = 0.0
        pre_advantage = 0.0  # advantage value of previous step
        for i in range(buf_len - 1, -1, -1):
            buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
            pre_r_sum = buf_r_sum[i]
            buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i])
            pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
        return buf_r_sum, buf_advantage
    @staticmethod
    def optim_update(optimizer, objective):
        optimizer.zero_grad()
        objective.backward()
        optimizer.step()
    @staticmethod
    def soft_update(target_net, current_net, tau):
        for tar, cur in zip(target_net.parameters(), current_net.parameters()):
            tar.data.copy_(cur.data.__mul__(tau) + tar.data.__mul__(1.0 - tau))
 class Arguments:
    def __init__(self, agent=None, env=None):
        self.agent = agent  # Deep Reinforcement Learning algorithm
        self.env = env  # the environment for training
        self.cwd = None  # current work directory. None means set automatically
        self.if_remove = False  # remove the cwd folder? (True, False, None:ask me)
        self.visible_gpu = '0'  # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
        self.num_threads = 32  # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
        '''Arguments for training'''
        self.num_episode = 1000  # to control the train episodes for PPO
        self.gamma = 0.995  # discount factor of future rewards
        self.learning_rate = 1e-4  # 1e-4 2 ** -14 2e-4
        self.soft_update_tau = 2 ** -8  # 1e-3 2 ** -8
        self.net_dim = 256  # the network width
        self.batch_size = 4096  # num of transitions sampled from replay buffer.
        self.repeat_times = 2 ** 5  # collect target_step, then update network
        self.target_step = 4096  # repeatedly update network to keep critic's loss small
        self.max_memo = self.target_step  # capacity of replay buffer
        self.if_gae_or_raw = True  # GAE for on-policy sparse reward: Generalized Advantage Estimation.
        '''Arguments for evaluate'''
        self.random_seed = 1234
        # self.random_seed_list = [1234, 2234, 3234, 4234, 5234]
        self.random_seed_list = [1]
        self.train = True
        self.save_network = True
        self.test_network = True
        self.save_test_data = True
        self.compare_with_gurobi = True
        self.plot_on = True
    def init_before_training(self):
        if self.cwd is None:
            agent_name = self.agent.__class__.__name__
            self.cwd = f'./{agent_name}'
        np.random.seed(self.random_seed)
        torch.manual_seed(self.random_seed)
        torch.set_num_threads(self.num_threads)
        torch.set_default_dtype(torch.float32)
        os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
 def update_buffer(_trajectory):
    _trajectory = list(map(list, zip(*_trajectory)))  # 2D-list transpose, here cut the trajectory into 5 parts 
    ten_state = torch.as_tensor(_trajectory[0])  # tensor state here
    ten_reward = torch.as_tensor(_trajectory[1], dtype=torch.float32)
    # _trajectory[2] = done, 将 done 替换为掩码，节省内存
    ten_mask = (1.0 - torch.as_tensor(_trajectory[2], dtype=torch.float32)) * gamma
    ten_action = torch.as_tensor(_trajectory[3])
    ten_noise = torch.as_tensor(_trajectory[4], dtype=torch.float32)
    buffer[:] = (ten_state, ten_action, ten_noise, ten_reward, ten_mask)  # list store tensors
    _steps = ten_reward.shape[0]  # how many steps are collected in all trajectories
    _r_exp = ten_reward.mean()  # the mean reward
    return _steps, _r_exp
 if __name__ == '__main__':
    args = Arguments()
    reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
    loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
    args.visible_gpu = '0'
    for seed in args.random_seed_list:
        args.random_seed = seed
        args.agent = AgentPPO()
        agent_name = f'{args.agent.__class__.__name__}'
        args.agent.cri_target = True
        args.env = ESSEnv()
        args.init_before_training()
        '''init agent and environment'''
        agent = args.agent
        env = args.env
        agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
                   args.if_gae_or_raw)
        cwd = args.cwd
        gamma = args.gamma
        batch_size = args.batch_size  # how much data should be used to update net
        target_step = args.target_step  # how manysteps of one episode should stop
        repeat_times = args.repeat_times  # how many times should update for one batch size data
        soft_update_tau = args.soft_update_tau
        num_episode = args.num_episode
        agent.state = env.reset()
        '''init buffer'''
        buffer = list()
        '''init training parameters'''
        # args.train = False
        # args.save_network = False
        # args.test_network = False
        # args.save_test_data = False
        # args.compare_with_gurobi = False
        # args.plot_on = False
        if args.train:
            for i_episode in range(num_episode):
                with torch.no_grad():
                    trajectory_list = agent.explore_env(env, target_step)
                    steps, r_exp = update_buffer(trajectory_list)
                critic_loss, actor_loss, entropy_loss = agent.update_net(buffer, batch_size, repeat_times,
                                                                         soft_update_tau)
                loss_record['critic_loss'].append(critic_loss)
                loss_record['actor_loss'].append(actor_loss)
                loss_record['entropy_loss'].append(entropy_loss)
                with torch.no_grad():
                    episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
                    reward_record['mean_episode_reward'].append(episode_reward)
                    reward_record['unbalance'].append(episode_unbalance)
                print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}')
    act_save_path = f'{args.cwd}/actor_ram.pth'
    loss_record_path = f'{args.cwd}/loss_ram.pkl'
    reward_record_path = f'{args.cwd}/reward_ram.pkl'
    if args.save_network:
        with open(loss_record_path, 'wb') as tf:
            pickle.dump(loss_record, tf)
        with open(reward_record_path, 'wb') as tf:
            pickle.dump(reward_record, tf)
        torch.save(agent.act.state_dict(), act_save_path)
        print('actor parameters have been saved')
    if args.test_network:
        args.cwd = agent_name
        agent.act.load_state_dict(torch.load(act_save_path))
        print('parameters have been reload and test')
        record = test_one_episode(env, agent.act, agent.device)
        eval_data = pd.DataFrame(record['system_info'])
        eval_data.columns = ['time_step', 'price', 'load', 'action', 'real_action', 'soc', 'battery',
                             'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
    if args.save_test_data:
        test_data_save_path = f'{args.cwd}/test_ram.pkl'
        with open(test_data_save_path, 'wb') as tf:
            pickle.dump(record, tf)
    '''compare with gurobi data and results'''
    if args.compare_with_gurobi:
        month = record['init_info'][0][0]
        day = record['init_info'][0][1]
        initial_soc = record['init_info'][0][3]
        base_result = optimization_base_result(env, month, day, initial_soc)
    if args.plot_on:
        from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
        plot_args = PlotArgs()
        plot_args.feature_change = 'ram'
        args.cwd = agent_name
        plot_dir = make_dir(args.cwd, plot_args.feature_change)
        plot_optimization_result(base_result, plot_dir)
        plot_evaluation_information(args.cwd + '/' + 'test_ram.pkl', plot_dir)
    '''compare the different cost get from gurobi and PPO'''
    ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
    print('rl_cost:', sum(eval_data['operation_cost']))
    print('gurobi_cost:', sum(base_result['step_cost']))
    print('ration:', ration)
--- a/SAC.py
+++ b/SAC.py
@ -0,0 +1,130 @@
 import pickle
 import pandas as pd
 import torch
 from agent import AgentSAC
 from environment import ESSEnv
 from tools import *
 def update_buffer(_trajectory):
    ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
    ary_other = torch.as_tensor([item[1] for item in _trajectory])
    ary_other[:, 0] = ary_other[:, 0]  # ten_reward
    ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma  # ten_mask = (1.0 - ary_done) * gamma
    buffer.extend_buffer(ten_state, ary_other)
    _steps = ten_state.shape[0]
    _r_exp = ary_other[:, 0].mean()  # other = (reward, mask, action)
    return _steps, _r_exp
 if __name__ == '__main__':
    args = Arguments()
    reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
    loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
    args.visible_gpu = '0'
    for seed in args.random_seed_list:
        args.random_seed = seed
        args.agent = AgentSAC()
        agent_name = f'{args.agent.__class__.__name__}'
        args.agent.cri_target = True
        args.env = ESSEnv()
        args.init_before_training(if_main=True)
        '''init agent and environment'''
        agent = args.agent
        env = args.env
        agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
                   args.if_per_or_gae)
        '''init replay buffer'''
        buffer = ReplayBuffer(max_len=args.max_memo, state_dim=env.state_space.shape[0],
                              action_dim=env.action_space.shape[0])
        '''start training'''
        cwd = args.cwd
        gamma = args.gamma
        batch_size = args.batch_size  # how much data should be used to update net
        target_step = args.target_step  # how manysteps of one episode should stop
        repeat_times = args.repeat_times  # how many times should update for one batch size data
        soft_update_tau = args.soft_update_tau
        agent.state = env.reset()
        '''collect data and train and update network'''
        num_episode = args.num_episode
        '''here record real unbalance'''
        # args.train = False
        # args.save_network = False
        # args.test_network = False
        # args.save_test_data = False
        # args.compare_with_gurobi = False
        if args.train:
            collect_data = True
            while collect_data:
                print(f'buffer:{buffer.now_len}')
                with torch.no_grad():
                    trajectory = agent.explore_env(env, target_step)
                    steps, r_exp = update_buffer(trajectory)
                    buffer.update_now_len()
                if buffer.now_len >= 10000:
                    collect_data = False
            for i_episode in range(num_episode):
                critic_loss, actor_loss, entropy_loss = agent.update_net(buffer, batch_size, repeat_times,
                                                                         soft_update_tau)
                loss_record['critic_loss'].append(critic_loss)
                loss_record['actor_loss'].append(actor_loss)
                loss_record['entropy_loss'].append(entropy_loss)
                with torch.no_grad():
                    episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
                    reward_record['mean_episode_reward'].append(episode_reward)
                    reward_record['unbalance'].append(episode_unbalance)
                print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance:{episode_unbalance}, buffer_length: {buffer.now_len}')
                if i_episode % 10 == 0:
                    # target_step
                    with torch.no_grad():
                        trajectory = agent.explore_env(env, target_step)
                        steps, r_exp = update_buffer(trajectory)
    act_save_path = f'{args.cwd}/actor.pth'
    loss_record_path = f'{args.cwd}/loss_data.pkl'
    reward_record_path = f'{args.cwd}/reward_data.pkl'
    if args.save_network:
        with open(loss_record_path, 'wb') as tf:
            pickle.dump(loss_record, tf)
        with open(reward_record_path, 'wb') as tf:
            pickle.dump(reward_record, tf)
        torch.save(agent.act.state_dict(), act_save_path)
        print('actor parameters have been saved')
    if args.test_network:
        args.cwd = agent_name
        agent.act.load_state_dict(torch.load(act_save_path))
        print('parameters have been reload and test')
        record = test_one_episode(env, agent.act, agent.device)
        eval_data = pd.DataFrame(record['system_info'])
        eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery',
                             'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
    if args.save_test_data:
        test_data_save_path = f'{args.cwd}/test_data.pkl'
        with open(test_data_save_path, 'wb') as tf:
            pickle.dump(record, tf)
    '''compare with gurobi data and results'''
    if args.compare_with_gurobi:
        month = record['init_info'][0][0]
        day = record['init_info'][0][1]
        initial_soc = record['init_info'][0][3]
        base_result = optimization_base_result(env, month, day, initial_soc)
    if args.plot_on:
        from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
        plot_args = PlotArgs()
        plot_args.feature_change = '10'
        args.cwd = agent_name
        plot_dir = make_dir(args.cwd, plot_args.feature_change)
        plot_optimization_result(base_result, plot_dir)
        plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
    '''compare the different cost get from gurobi and SAC'''
    ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
    print('rl_cost:', sum(eval_data['operation_cost']))
    print('gurobi_cost:', sum(base_result['step_cost']))
    print('ration:', ration)
--- a/TD3.py
+++ b/TD3.py
@ -0,0 +1,127 @@
 import pickle
 import pandas as pd
 import torch
 from agent import AgentTD3
 from environment import ESSEnv
 from tools import *
 def update_buffer(_trajectory):
    ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
    ary_other = torch.as_tensor([item[1] for item in _trajectory])
    ary_other[:, 0] = ary_other[:, 0]  # ten_reward
    ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma  # ten_mask = (1.0 - ary_done) * gamma
    buffer.extend_buffer(ten_state, ary_other)
    _steps = ten_state.shape[0]
    _r_exp = ary_other[:, 0].mean()  # other = (reward, mask, action)
    return _steps, _r_exp
 if __name__ == '__main__':
    args = Arguments()
    reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
    loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
    args.visible_gpu = '0'
    for seed in args.random_seed_list:
        args.random_seed = seed
        args.agent = AgentTD3()
        agent_name = f'{args.agent.__class__.__name__}'
        args.agent.cri_target = True
        args.env = ESSEnv()
        args.init_before_training(if_main=True)
        '''init agent and environment'''
        agent = args.agent
        env = args.env
        agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
                   args.if_per_or_gae)
        '''init replay buffer'''
        buffer = ReplayBuffer(max_len=args.max_memo, state_dim=env.state_space.shape[0],
                              action_dim=env.action_space.shape[0])
        '''start training'''
        cwd = args.cwd
        gamma = args.gamma
        batch_size = args.batch_size  # how much data should be used to update net
        target_step = args.target_step  # how manysteps of one episode should stop
        repeat_times = args.repeat_times  # how many times should update for one batch size data
        soft_update_tau = args.soft_update_tau
        agent.state = env.reset()
        '''collect data and train and update network'''
        num_episode = args.num_episode
        # args.train = False
        # args.save_network = False
        # args.test_network = False
        # args.save_test_data = False
        # args.compare_with_gurobi = False
        if args.train:
            collect_data = True
            while collect_data:
                print(f'buffer:{buffer.now_len}')
                with torch.no_grad():
                    trajectory = agent.explore_env(env, target_step)
                    steps, r_exp = update_buffer(trajectory)
                    buffer.update_now_len()
                if buffer.now_len >= 10000:
                    collect_data = False
            for i_episode in range(num_episode):
                critic_loss, actor_loss = agent.update_net(buffer, batch_size, repeat_times, soft_update_tau)
                loss_record['critic_loss'].append(critic_loss)
                loss_record['actor_loss'].append(actor_loss)
                with torch.no_grad():
                    episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
                    reward_record['mean_episode_reward'].append(episode_reward)
                    reward_record['unbalance'].append(episode_unbalance)
                print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}, buffer_length: {buffer.now_len}')
                if i_episode % 10 == 0:
                    # target_step
                    with torch.no_grad():
                        trajectory = agent.explore_env(env, target_step)
                        steps, r_exp = update_buffer(trajectory)
    loss_record_path = f'{args.cwd}/loss_data.pkl'
    reward_record_path = f'{args.cwd}/reward_data.pkl'
    act_save_path = f'{args.cwd}/actor.pth'
    if args.save_network:
        with open(loss_record_path, 'wb') as tf:
            pickle.dump(loss_record, tf)
        with open(reward_record_path, 'wb') as tf:
            pickle.dump(reward_record, tf)
        torch.save(agent.act.state_dict(), act_save_path)
        print('actor parameters have been saved')
    if args.test_network:
        args.cwd = agent_name
        agent.act.load_state_dict(torch.load(act_save_path))
        print('parameters have been reload and test')
        record = test_one_episode(env, agent.act, agent.device)
        eval_data = pd.DataFrame(record['system_info'])
        eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery',
                             'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
    if args.save_test_data:
        test_data_save_path = f'{args.cwd}/test_data.pkl'
        with open(test_data_save_path, 'wb') as tf:
            pickle.dump(record, tf)
    '''compare with gurobi data and results'''
    if args.compare_with_gurobi:
        month = record['init_info'][0][0]
        day = record['init_info'][0][1]
        initial_soc = record['init_info'][0][3]
        base_result = optimization_base_result(env, month, day, initial_soc)
    if args.plot_on:
        from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
        plot_args = PlotArgs()
        plot_args.feature_change = '10'
        args.cwd = agent_name
        plot_dir = make_dir(args.cwd, plot_args.feature_change)
        plot_optimization_result(base_result, plot_dir)
        plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
    '''compare the different cost get from gurobi and TD3'''
    ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
    print('rl_cost:', sum(eval_data['operation_cost']))
    print('gurobi_cost:', sum(base_result['step_cost']))
    print('ration:', ration)
--- a/agent.py
+++ b/agent.py
@ -0,0 +1,313 @@
 import numpy.random as rd
 import os
 import json
 from copy import deepcopy
 from net import *
 class AgentBase:
    def __init__(self):
        self.state = None
        self.device = None
        self.action_dim = None
        self.if_off_policy = None
        self.explore_noise = None
        self.trajectory_list = None
        self.criterion = torch.nn.SmoothL1Loss()
        self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
        self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
    def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, _if_per_or_gae=False, gpu_id=0):
        # 显式调用self.init()进行多进程
        self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
        self.action_dim = action_dim
        self.cri = self.ClassCri(net_dim, state_dim, action_dim).to(self.device)
        self.act = self.ClassAct(net_dim, state_dim, action_dim).to(self.device) if self.ClassAct else self.cri
        self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
        self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
        self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
        self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
        del self.ClassCri, self.ClassAct
    def select_action(self, state) -> np.ndarray:
        states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
        action = self.act(states)[0]
        action = (action + torch.randn_like(action) * self.explore_noise).clamp(-1, 1)
        return action.detach().cpu().numpy()
    def explore_env(self, env, target_step):
        state = self.state
        trajectory = list()
        for _ in range(target_step):
            action = self.select_action(state)
            state, next_state, reward, done, = env.step(action)
            trajectory.append((state, (reward, done, *action)))
            state = env.reset() if done else next_state
        self.state = state
        return trajectory
    @staticmethod
    def optim_update(optimizer, objective):
        optimizer.zero_grad()
        objective.backward()
        optimizer.step()
    @staticmethod
    def soft_update(target_net, current_net, tau):
        for tar, cur in zip(target_net.parameters(), current_net.parameters()):
            tar.data.copy_(cur.data * tau + tar.data * (1.0 - tau))
    def save_or_load_agent(self, cwd, if_save):
        def load_torch_file(model_or_optim, _path):
            state_dict = torch.load(_path, map_location=lambda storage, loc: storage)
            model_or_optim.load_state_dict(state_dict)
        name_obj_list = [('actor', self.act), ('act_target', self.act_target), ('act_optim', self.act_optim),
                         ('critic', self.cri), ('cri_target', self.cri_target), ('cri_optim', self.cri_optim), ]
        name_obj_list = [(name, obj) for name, obj in name_obj_list if obj is not None]
        if if_save:
            for name, obj in name_obj_list:
                save_path = f"{cwd}/{name}.pth"
                torch.save(obj.state_dict(), save_path)
        else:
            for name, obj in name_obj_list:
                save_path = f"{cwd}/{name}.pth"
                load_torch_file(obj, save_path) if os.path.isfile(save_path) else None
 class AgentDDPG(AgentBase):
    def __init__(self):
        super().__init__()
        self.explore_noise = 0.1
        self.if_use_cri_target = self.if_use_act_target = True
        self.ClassCri = Critic
        self.ClassAct = Actor
    def update_net(self, buffer, batch_size, repeat_times, soft_update_tau) -> (float, float):
        buffer.update_now_len()
        obj_critic = obj_actor = None
        for _ in range(int(buffer.now_len / batch_size * repeat_times)):
            obj_critic, state = self.get_obj_critic(buffer, batch_size)  # critic loss
            self.optim_update(self.cri_optim, obj_critic)
            self.soft_update(self.cri_target, self.cri, soft_update_tau)
            action_pg = self.act(state)  # policy gradient
            obj_actor = -self.cri(state, action_pg).mean()  # actor loss, makes it bigger
            self.optim_update(self.act_optim, obj_actor)
            self.soft_update(self.act_target, self.act, soft_update_tau)
        return obj_actor.item(), obj_critic.item()
    def get_obj_critic(self, buffer, batch_size) -> (torch.Tensor, torch.Tensor):
        with torch.no_grad():
            reward, mask, action, state, next_s = buffer.sample_batch(batch_size)
            next_q = self.cri_target(next_s, self.act_target(next_s))
            q_label = reward + mask * next_q
        q_value = self.cri(state, action)
        obj_critic = self.criterion(q_value, q_label)
        return obj_critic, state
 class AgentTD3(AgentBase):
    def __init__(self):
        super().__init__()
        self.explore_noise = 0.1  # standard deviation of exploration noise
        self.policy_noise = 0.2  # standard deviation of policy noise
        self.update_freq = 2  # delay update frequency
        self.if_use_cri_target = self.if_use_act_target = True
        self.ClassCri = CriticTwin
        self.ClassAct = Actor
    def update_net(self, buffer, batch_size, repeat_times, soft_update_tau) -> tuple:
        buffer.update_now_len()
        obj_critic = obj_actor = None
        for update_c in range(int(buffer.now_len / batch_size * repeat_times)):
            obj_critic, state = self.get_obj_critic(buffer, batch_size)
            self.optim_update(self.cri_optim, obj_critic)
            action_pg = self.act(state)  # policy gradient
            obj_actor = -self.cri_target(state, action_pg).mean()  # use cri_target instead of cri for stable training
            self.optim_update(self.act_optim, obj_actor)
            if update_c % self.update_freq == 0:  # delay update
                self.soft_update(self.cri_target, self.cri, soft_update_tau)
                self.soft_update(self.act_target, self.act, soft_update_tau)
        return obj_critic.item() / 2, obj_actor.item()
    def get_obj_critic(self, buffer, batch_size) -> (torch.Tensor, torch.Tensor):
        with torch.no_grad():
            reward, mask, action, state, next_s = buffer.sample_batch(batch_size)
            next_a = self.act_target.get_action(next_s, self.policy_noise)  # policy noise
            next_q = torch.min(*self.cri_target.get_q1_q2(next_s, next_a))  # twin critics
            q_label = reward + mask * next_q
        q1, q2 = self.cri.get_q1_q2(state, action)
        obj_critic = self.criterion(q1, q_label) + self.criterion(q2, q_label)  # twin critics
        return obj_critic, state
 class AgentSAC(AgentBase):
    def __init__(self):
        super().__init__()
        self.ClassCri = CriticTwin
        self.ClassAct = ActorSAC
        self.if_use_cri_target = True
        self.if_use_act_target = False
        self.alpha_log = None
        self.alpha_optim = None
        self.target_entropy = None
    def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, _if_use_per=False, gpu_id=0, env_num=1):
        super().init(net_dim, state_dim, action_dim, learning_rate, _if_use_per, gpu_id)
        self.alpha_log = torch.tensor((-np.log(action_dim) * np.e,), dtype=torch.float32,
                                      requires_grad=True, device=self.device)
        self.alpha_optim = torch.optim.Adam((self.alpha_log,), lr=learning_rate)
        self.target_entropy = np.log(action_dim)
    def select_action(self, state):
        states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
        actions = self.act.get_action(states)
        return actions.detach().cpu().numpy()[0]
    def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
        buffer.update_now_len()
        alpha = self.alpha_log.exp().detach()
        obj_critic = obj_actor = None
        for _ in range(int(buffer.now_len * repeat_times / batch_size)):
            '''objective of critic (loss function of critic)'''
            with torch.no_grad():
                reward, mask, action, state, next_s = buffer.sample_batch(batch_size)
                next_a, next_log_prob = self.act_target.get_action_logprob(next_s)
                next_q = torch.min(*self.cri_target.get_q1_q2(next_s, next_a))
                q_label = reward + mask * (next_q + next_log_prob * alpha)
            q1, q2 = self.cri.get_q1_q2(state, action)
            obj_critic = self.criterion(q1, q_label) + self.criterion(q2, q_label)
            self.optim_update(self.cri_optim, obj_critic)
            self.soft_update(self.cri_target, self.cri, soft_update_tau)
            '''objective of alpha (temperature parameter automatic adjustment)'''
            action_pg, log_prob = self.act.get_action_logprob(state)  # policy gradient
            obj_alpha = (self.alpha_log * (log_prob - self.target_entropy).detach()).mean()
            self.optim_update(self.alpha_optim, obj_alpha)
            '''objective of actor'''
            alpha = self.alpha_log.exp().detach()
            with torch.no_grad():
                self.alpha_log[:] = self.alpha_log.clamp(-20, 2)
            obj_actor = -(torch.min(*self.cri_target.get_q1_q2(state, action_pg)) + log_prob * alpha).mean()
            self.optim_update(self.act_optim, obj_actor)
            self.soft_update(self.act_target, self.act, soft_update_tau)
        return obj_critic.item(), obj_actor.item(), alpha.item()
 class AgentPPO(AgentBase):
    def __init__(self):
        super().__init__()
        self.ClassCri = CriticAdv
        self.ClassAct = ActorPPO
        self.if_off_policy = False
        self.ratio_clip = 0.2  # ratio.clamp(1 - clip, 1 + clip)
        self.lambda_entropy = 0.02  # could be 0.01~0.05
        self.lambda_gae_adv = 0.98  # could be 0.95~0.99, GAE (Generalized Advantage Estimation. ICLR.2016.)
        self.get_reward_sum = None  # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
    def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=False, gpu_id=0, env_num=1):
        super().init(net_dim, state_dim, action_dim, learning_rate, if_use_gae, gpu_id)
        self.trajectory_list = list()
        self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
    def select_action(self, state):
        states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
        actions, noises = self.act.get_action(states)
        return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
    def explore_env(self, env, target_step):
        state = self.state
        trajectory_temp = list()
        last_done = 0
        for i in range(target_step):
            action, noise = self.select_action(state)
            next_state, reward, done, _ = env.step(np.tanh(action))
            trajectory_temp.append((state, reward, done, action, noise))
            if done:
                state = env.reset()
                last_done = i
            else:
                state = next_state
        self.state = state
        '''splice list'''
        trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1]
        self.trajectory_list = trajectory_temp[last_done:]
        return trajectory_list
    def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
        with torch.no_grad():
            buf_len = buffer[0].shape[0]
            buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
            '''get buf_r_sum, buf_logprob'''
            bs = 2 ** 10
            buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
            buf_value = torch.cat(buf_value, dim=0)
            buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
            buf_r_sum, buf_advantage = self.get_reward_sum(buf_len, buf_reward, buf_mask, buf_value)  # detach()
            buf_advantage = (buf_advantage - buf_advantage.mean()) / (buf_advantage.std() + 1e-5)
            del buf_noise, buffer[:]
        '''PPO: Surrogate objective of Trust Region'''
        obj_critic = obj_actor = None
        for _ in range(int(buf_len / batch_size * repeat_times)):
            indices = torch.randint(buf_len, size=(batch_size,), requires_grad=False, device=self.device)
            state = buf_state[indices]
            action = buf_action[indices]
            r_sum = buf_r_sum[indices]
            logprob = buf_logprob[indices]
            advantage = buf_advantage[indices]
            new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action)  # it's obj_actor
            ratio = (new_logprob - logprob.detach()).exp()
            surrogate1 = advantage * ratio
            surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
            obj_surrogate = -torch.min(surrogate1, surrogate2).mean()
            obj_actor = obj_surrogate + obj_entropy * self.lambda_entropy
            self.optim_update(self.act_optim, obj_actor)
            value = self.cri(state).squeeze(1)  # critic network predicts the reward_sum (Q value) of state
            obj_critic = self.criterion(value, r_sum) / (r_sum.std() + 1e-6)
            self.optim_update(self.cri_optim, obj_critic)
            self.soft_update(self.cri_target, self.cri, soft_update_tau) if self.cri_target is not self.cri else None
        a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1))
        return obj_critic.item(), obj_actor.item(), a_std_log.mean().item()  # logging_tuple
    def get_reward_sum_raw(self, buf_len, buf_reward, buf_mask, buf_value) -> (torch.Tensor, torch.Tensor):
        buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device)  # reward sum
        pre_r_sum = 0
        for i in range(buf_len - 1, -1, -1):
            buf_r_sum[i] = buf_reward[i] + buf_mask[i] * pre_r_sum
            pre_r_sum = buf_r_sum[i]
        buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
        return buf_r_sum, buf_advantage
    def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value):
        """tensor, tensor """
        buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device)  # old policy value
        buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device)  # advantage value
        pre_r_sum = 0
        pre_advantage = 0  # advantage value of previous step
        for i in range(buf_len - 1, -1, -1):
            buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
            pre_r_sum = buf_r_sum[i]
            buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i])
            pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
        return buf_r_sum, buf_advantage
--- a/compare.png
+++ b/compare.png
--- a/data/houseload.csv
+++ b/data/houseload.csv
--- a/data/irradiance.csv
+++ b/data/irradiance.csv
--- a/data/prices.csv
+++ b/data/prices.csv
--- a/data/results.json
+++ b/data/results.json
--- a/data/results_day.json
+++ b/data/results_day.json
--- a/data/temper.csv
+++ b/data/temper.csv
--- a/data/wind.csv
+++ b/data/wind.csv
--- a/data_manager.py
+++ b/data_manager.py
@ -0,0 +1,70 @@
 class Constant:
    MONTHS_LEN = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
 class DataManager:
    def __init__(self) -> None:
        self.Prices = []
        self.Load_Consumption = []
        self.Temperature = []
        self.Irradiance = []
        self.Wind = []
        self.LLM = []
    def add_price_element(self, element): self.Prices.append(element)
    def add_load_element(self, element): self.Load_Consumption.append(element)
    def add_temperature_element(self, element): self.Temperature.append(element)
    def add_irradiance_element(self, element): self.Irradiance.append(element)
    def add_wind_element(self, element): self.Wind.append(element)
    def add_llm_element(self, element): self.LLM.append(element)
    # get current time data based on given month day, and day_time
    def get_price_data(self, month, day, day_time):
        return self.Prices[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
    def get_load_cons_data(self, month, day, day_time):
        return self.Load_Consumption[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
    def get_temperature_data(self, month, day, day_time):
        return self.Temperature[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
    def get_irradiance_data(self, month, day, day_time):
        return self.Irradiance[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
    def get_wind_data(self, month, day, day_time):
        return self.Wind[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
    def get_llm_data(self, month, day, day_time):
        index = (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time
        # print(f"Index: {index}, LLM length: {len(self.LLM)}, Month: {month}, Day: {day}, Day_time: {day_time}")
        return self.LLM[index]
    # get series data for one episode
    def get_series_price_data(self, month, day):
        return self.Prices[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
                           (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
    def get_series_load_cons_data(self, month, day):
        return self.Load_Consumption[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
                                     (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
    def get_series_temperature_data(self, month, day):
        return self.Temperature[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
                                (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
    def get_series_irradiance_data(self, month, day):
        return self.Irradiance[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
                               (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
    def get_series_wind_data(self, month, day):
        return self.Wind[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
                         (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
    # def get_series_llm_data(self, month, day):
    #     return self.LLM[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
    #                     (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
--- a/environment.py
+++ b/environment.py
@ -0,0 +1,164 @@
 import gym
 import pandas as pd
 from data_manager import *
 from module import *
 from parameters import *
 class ESSEnv(gym.Env):
    def __init__(self, **kwargs):
        super(ESSEnv, self).__init__()
        self.excess = None
        self.shedding = None
        self.unbalance = None
        self.real_unbalance = None
        self.operation_cost = None
        self.current_output = None
        self.final_step_outputs = None
        self.data_manager = DataManager()
        self._load_year_data()
        self.month = 1
        self.day = 1
        self.TRAIN = True
        self.current_time = None
        self.episode_length = 24
        self.penalty_coefficient = 50  # 约束惩罚系数
        self.sell_coefficient = 0.1  # 售出利润系数
        self.battery_parameters = kwargs.get('battery_parameters', battery_parameters)
        self.dg_parameters = kwargs.get('dg_parameters', dg_parameters)
        self.solar_parameters = kwargs.get('solar_parameters', solar_parameters)
        self.wind_parameters = kwargs.get('wind_parameters', wind_parameters)
        self.grid = Grid()
        self.battery = Battery(self.battery_parameters)
        self.dg1 = DG(self.dg_parameters['gen_1'])
        self.dg2 = DG(self.dg_parameters['gen_2'])
        self.dg3 = DG(self.dg_parameters['gen_3'])
        self.solar = Solar(self.solar_parameters)
        self.wind = Wind(self.wind_parameters)
        self.action_space = gym.spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float32)  # 已增加调节电压动作
        self.state_space = gym.spaces.Box(low=0, high=1, shape=(10,), dtype=np.float32)
    def reset(self, *args):
        self.month = np.random.randint(1, 13)  # choose 12 month
        if self.TRAIN:
            self.day = np.random.randint(1, 20)
        else:
            self.day = np.random.randint(20, Constant.MONTHS_LEN[self.month - 1])
        self.current_time = 0
        self.battery.reset()
        self.dg1.reset()
        self.dg2.reset()
        self.dg3.reset()
        self.solar.reset()
        self.wind.reset()
        return self._build_state()
    def _build_state(self):
        soc = self.battery.SOC()
        dg1_output = self.dg1.current_output
        dg2_output = self.dg2.current_output
        dg3_output = self.dg3.current_output
        time_step = self.current_time
        price = self.data_manager.get_price_data(self.month, self.day, self.current_time)
        houseload = self.data_manager.get_load_cons_data(self.month, self.day, self.current_time)
        temperature = self.data_manager.get_temperature_data(self.month, self.day, self.current_time)
        irradiance = self.data_manager.get_irradiance_data(self.month, self.day, self.current_time)
        windspeed = self.data_manager.get_wind_data(self.month, self.day, self.current_time)
        wind_gen = self.wind.step(windspeed)
        netload = houseload - wind_gen
        obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(soc), np.float32(netload),
                              np.float32(dg1_output), np.float32(dg2_output), np.float32(dg3_output),
                              np.float32(temperature), np.float32(irradiance), np.float32(windspeed)), axis=None)
        return obs
    def step(self, action):  # state transition: current_obs->take_action->get_reward->get_finish->next_obs
        # 在每个组件中添加动作
        current_obs = self._build_state()
        temperature = current_obs[7]
        irradiance = current_obs[8]
        self.wind.current_power = current_obs[9]
        self.battery.step(action[0])  # 执行状态转换，电池当前容量也改变
        self.dg1.step(action[1])
        self.dg2.step(action[2])
        self.dg3.step(action[3])
        self.solar.step(temperature, irradiance, action[4])
        self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
                                       -self.battery.energy_change, self.solar.current_power, self.wind.current_power))
        actual_production = sum(self.current_output)
        price = current_obs[1]
        netload = current_obs[3] - self.solar.output_change
        unbalance = actual_production - netload
        # reward = 0.0
        excess_penalty = 0
        deficient_penalty = 0
        sell_benefit, buy_cost = 0, 0
        self.excess, self.shedding = 0, 0
        if unbalance >= 0:  # 过剩
            if unbalance <= self.grid.exchange_ability:
                sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient
            else:
                sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient
                # real unbalance：超电网限值
                self.excess = unbalance - self.grid.exchange_ability
                excess_penalty = self.excess * self.penalty_coefficient
        else:  # unbalance <0, 缺少惩罚
            if abs(unbalance) <= self.grid.exchange_ability:
                buy_cost = self.grid.get_cost(price, abs(unbalance))
            else:
                buy_cost = self.grid.get_cost(price, self.grid.exchange_ability)
                self.shedding = abs(unbalance) - self.grid.exchange_ability
                deficient_penalty = self.shedding * self.penalty_coefficient
        battery_cost = self.battery.get_cost(self.battery.energy_change)
        dg1_cost = self.dg1.get_cost(self.dg1.current_output)
        dg2_cost = self.dg2.get_cost(self.dg2.current_output)
        dg3_cost = self.dg3.get_cost(self.dg3.current_output)
        solar_cost = self.solar.get_cost(self.solar.current_power)
        wind_cost = self.wind.gen_cost(self.wind.current_power)
        self.operation_cost = (battery_cost + dg1_cost + dg2_cost + dg3_cost + solar_cost + wind_cost
                               + excess_penalty + deficient_penalty - sell_benefit + buy_cost)
        reward = - self.operation_cost / 1e3
        self.unbalance = unbalance
        self.real_unbalance = self.shedding + self.excess
        final_step_outputs = [self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
                              self.battery.current_capacity, self.solar.current_power, self.wind.current_power]
        self.current_time += 1
        finish = (self.current_time == self.episode_length)
        if finish:
            self.final_step_outputs = final_step_outputs
            self.current_time = 0
            next_obs = self.reset()
        else:
            next_obs = self._build_state()
        return current_obs, next_obs, float(reward), finish
    def _load_year_data(self):
        price_df = pd.read_csv('data/prices.csv', sep=',')
        load_df = pd.read_csv('data/houseload.csv', sep=',')
        irradiance_df = pd.read_csv('data/irradiance.csv', sep=',')
        temperature_df = pd.read_csv('data/temper.csv', sep=',')
        wind_df = pd.read_csv('data/wind.csv', sep=',')
        price = price_df['price'].to_numpy(dtype=float)
        load = load_df['houseload'].to_numpy(dtype=float)
        irradiance = irradiance_df['irradiance'].to_numpy(dtype=float)
        temperature = temperature_df['t2m'].to_numpy(dtype=float)
        wind = wind_df['wind_speed'].to_numpy(dtype=float)
        '''重新设计价格和发电量以及需求的大小'''
        def process_elements(elements, transform_function, add_function):
            for element in elements:
                transformed_element = transform_function(element)
                add_function(transformed_element)
        process_elements(price, lambda x: max(x, 0.5), self.data_manager.add_price_element)
        process_elements(load, lambda x: x * 3, self.data_manager.add_load_element)
        process_elements(irradiance, lambda x: x, self.data_manager.add_irradiance_element)
        process_elements(temperature, lambda x: x, self.data_manager.add_temperature_element)
        process_elements(wind, lambda x: x, self.data_manager.add_wind_element)
--- a/excel.py
+++ b/excel.py
@ -0,0 +1,40 @@
 import pandas as pd
 from fuzzywuzzy import fuzz
 # 读取两个Excel文件
 df1 = pd.read_excel(r'C:\Users\97532\Desktop\陕西、福建_陈晓东\福建省.xlsx')
 df2 = pd.read_excel(r'C:\Users\97532\Desktop\陕西、福建_陈晓东\福建省_许可.xlsx')
 # 打印df2的列名,确保列名正确
 print("原始列名:", df2.columns)
 df2.columns = df2.columns.str.strip()
 # 确保列数据类型为字符串
 df1['电厂名称'] = df1['电厂名称'].astype(str)
 df2['电厂名称/项目名称'] = df2['电厂名称/项目名称'].astype(str)
 df1['发电装机容量'] = df1['发电装机容量'].astype(float)
 df2['总装机容量'] = df2['总装机容量'].astype(float)
 # 定义一个函数来进行模糊匹配
 def fuzzy_match(row, df2):
    best_match = None
    highest_score = 0
    for index2, row2 in df2.iterrows():
        score = fuzz.partial_ratio(row['电厂名称'], row2['电厂名称/项目名称'])
        if score > highest_score and abs(row['发电装机容量'] - row2['总装机容量']) < 0.01:
            highest_score = score
            best_match = row2
    return best_match
 # 进行模糊匹配并填充数据
 for index1, row1 in df1.iterrows():
    match_row = fuzzy_match(row1, df2)
    if match_row is not None:
        df1.at[index1, '单位名称'] = match_row['单位名称']
        df1.at[index1, '发电许可证编号'] = match_row['发电许可证编号']
        df1.at[index1, '电厂名称/项目名称'] = match_row['电厂名称/项目名称']
        df1.at[index1, '机组类型'] = match_row['机组类型']
 # 将结果保存回Excel文件
 df1.to_excel(r'C:\Users\97532\Desktop\陕西、福建_陈晓东\福建省_填充后_模糊匹配.xlsx', index=False)
 print("数据填充已保存")
--- a/generate_household_data.py
+++ b/generate_household_data.py
@ -0,0 +1,35 @@
 import pandas as pd
 import numpy as np
 import os
 from nltk import word_tokenize
 def func(i, j):
    directory_path = "//data/residential"
    entries = os.listdir(directory_path)[i:j]
    folders = [entry for entry in entries if os.path.join(directory_path, entry)]
    entries = [os.path.join(directory_path, folder) for folder in folders]
    combined_data = pd.DataFrame()
    for file_path in entries:
        current_data = pd.read_csv(file_path, encoding='utf-8')
        tokenize_col = current_data.columns
        tokenized_sentences_list = [word_tokenize(text) for text in tokenize_col]
        occurrences_electricity = np.array(
            [(i, j) for i, sublist in enumerate(tokenized_sentences_list) for j, word in enumerate(sublist) if
             word == 'Electricity'])[:, 0]
        Electricity_consumption_wind_solar = current_data[tokenize_col[occurrences_electricity]].sum(axis=1)
        current_data = Electricity_consumption_wind_solar.to_frame("Household_load")
        combined_data = pd.concat([combined_data, current_data], axis=1)
    Series_type = combined_data.sum(axis=1)
    Series_type.to_csv('houseload.csv', index=False)
    total_energy = np.array(Series_type).ravel()
    return total_energy
 if __name__ == '__main__':
    func(0, 38)
--- a/llm.txt
+++ b/llm.txt
@ -0,0 +1,50 @@
 目前的能源系统结构主要由太阳能发电站、风能发电站、发电机和电池模块通过电力网与住宅相连接。以下是中文和符号的对照：
 As：电池的充电状态动作；Ag1：发电机1的功率动作；Ag2：发电机2的功率动作；Ag3：发电机3的功率动作；Av：太阳能的电压动作；
 Sp：电价状态；Sc：电池荷电状态；Sl：住宅负载状态；Sg1：发电机1功率状态；Sg2：发电机2功率状态；Sg3：发电机3功率状态；St：温度状态；Si：太阳辐射度状态；Sw：风速状态；
 Pc：电池充放电功率；Pg1：发电机1功率；Pg2：发电机2功率；Pg3：发电机3功率；Pso：太阳能功率；Pw：风能功率；
 Cb：电池成本；Cg1：发电机1成本；Cg2：发电机2成本；Cg3：发电机3成本；Cs：太阳能成本；Cw：风能成本；Rs：销售收入；Cp：购买成本；Ee：过剩能源；Es：短缺能源；Pe：过剩惩罚；Ps：短缺惩罚。
 一、控制方案和目标：
 1、优先使用太阳能和风能发电，当无法满足满负荷时，请使用发电机和电池供电。
 2、在供电时考虑多种供电来源（太阳能、风能、发电机和电池），综合每个发电模块的成本，需满足下一时刻的住宅用电负载，且使供需差异最小化。
 3、减少超过公网交易限额的交易，以避免公共电网波动影响住宅用电的稳定性。
 4、最大化奖励函数并最小化能源不平衡。
 二、连续状态空间：是一个长度为9的向量，包含以下部分：
 Sp：由数据提供；Sc：初始为0.4，始终介于0.2和0.8；Sl：由数据提供；
 Sg1：初始功率为0；Sg2：初始功率为0；Sg3：初始功率为0；
 St：由数据提供；Si：由数据提供；Sw：由数据提供。
 其中Sc，Sg1、Sg2和Sg3始终不小于0。
 三、连续动作空间：一个长度为5的向量，x表示当前时间的取值, 其范围从-1到1，每个时刻的值都是在上一时刻基础上进行变化的，Ag1、Ag2和Ag3分别表示相对于各自最大瞬时功率100、100和200的变化值。
 1、Ac：电池总容量为500，初始电池容量为500*0.4=200，电池容量变化100*x；
 2、Ag1：发电量变化100*x；
 3、Ag2：发电量变化100*x；
 4、Ag3：发电量变化200*x；
 5、Av：太阳能电压变为(1+x)倍。
 可根据需求通过改变功率控制发电机的开关，当关闭时保持Sg=0，当打开时需满足Pg约束。根据成本，通常前一台发电机的最大功率不足以满足剩余负荷时，才开启后续发电机。
 四、发电量计算公式
 1、Pc = max(0.2, min(0.8, Sc + 0.2 * action[0] * 0.9))，其中0.8和0.2为电池荷电状态上下限；
 2、Pg1 = max(0, min(150, 100 * action[1] + Sg1))，其中150和0为发电机1开启时发电量上下限；
 3、Pg2 = max(0, min(375, 100 * action[2] + Sg2))，其中375和0为发电机2开启时发电量上下限；
 4、Pg3 = max(0, min(500, 200 * action[3] + Sg3))，其中500和0为发电机2开启时发电量上下限；
 5、Pso = max(0, (0.2 * Si + 0.05 * St - 9.25) * (1 + action[4]))，其中太阳能发电功率不小于0。
 6、Pw：与Ws有关
 if 3 <= Ws < 8, Pw = Ws^3 * 172.2625 / 1000
 elif 8 <= Ws < 12, Pw = 64 * 172.2625 / 125
 else Pw = 0
 五、成本和能源平衡计算
 P = Pc + Pg1 + Pg2 + Pg3 + Pso + Pw
 if P >= Sl , Ee = P – Sl, else Es = Sl - P
 1、Cb = 0.01 * Asoc + 0.1 * Sc
 2、Cg1 = 0.0034 * Pg1^2 + 3 * Pg1 + 30
 3、Cg2 = 0.001 * Pg2^2 + 10 * Pg2 + 40
 4、Cg3 = 0.001 * Pg3^2 + 15 * Pg3 + 70
 5、Cs = 0.01 * Pso
 6、Cw = 0.01 * Pw
 7、Rs = 0.5 * Sp * Ee
 8、Cp = Sp * Es
 9、Pe = (Ee - 100) * 50
 10、Ps = (Es - 100) * 50
 因为Rs、Pe和Cp、Ps对应能源过剩和能源短缺情况，只能同时存在一组。
 六、奖励函数
 Reward = - (Cb + Cg1 + Cg2 + Cg3 + Cs + Cw + Pe + Ps - Rs + Cp) / 1000
 七、请求动作数据
 读取数据并使用上述公式计算每个时刻的状态，请你使用最合适的优化方法，给出每个时刻的动作控制方案，每小时计算一次并生成一个长度为5的数组，表示Ac、Ag1、Ag2、Ag3和Av的动作决策。返回的数据应为以下Json格式：[{x1, x2, x3, x4, x5}, {x1, x2, x3, x4, x5}, ..., {x1, x2, x3, x4, x5}]
--- a/llm_connect.py
+++ b/llm_connect.py
@ -0,0 +1,73 @@
 import pandas as pd
 from langchain_community.chat_message_histories import ChatMessageHistory
 from langchain_core.chat_history import BaseChatMessageHistory
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 from langchain_core.runnables.history import RunnableWithMessageHistory
 from langchain_openai.chat_models import ChatOpenAI
 def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]
 def read_txt_file(file_path: str) -> str:
    with open(file_path, "r", encoding='ISO-8859-1') as file:
        content = file.read()
    return content
 def read_csv_file(file_path: str) -> pd.DataFrame:
    return pd.read_csv(file_path)
 def format_csv_data(df: pd.DataFrame, start: int, end: int) -> str:
    formatted_data = []
    for index, row in df.iloc[start:end].iterrows():
        formatted_data.append(
            f"时刻{index + 1}: Sp={row['price']}, Sl={row['load']},"
            f"St={row['temperature']}, Si={row['irradiance']}, Sw={row['wind_speed']}"
        )
    return "\n".join(formatted_data)
 system_content = read_txt_file('./llm.txt')
 df = read_csv_file('./data.csv')
 llm = ChatOpenAI(
    streaming=True,
    verbose=True,
    openai_api_key="none",
    # openai_api_base="http://0.0.0.0:5049/v1/models",
    openai_api_base="http://0.0.0.0:8501",
    model_name="Qwen1.5-32b"
 )
 prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_content,),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{input}"),
    ]
 )
 runnable = prompt | llm
 store = {}
 with_message_history = RunnableWithMessageHistory(
    runnable,
    get_session_history,
    input_messages_key="input",
    history_messages_key="history",
 )
 num_hours = len(df)
 for i in range(num_hours):
    start = i
    end = start + 1
    csv_data_chunk = format_csv_data(df, start, end)
    result = with_message_history.invoke(
        {"input": f"数据如下：\n{csv_data_chunk}"},
        config={
            "configurable": {"session_id": "cxd"}
        },
    )
    print(f"Hour {i + 1}:\n{result.content}\n")
--- a/llma/Reflexion.py
+++ b/llma/Reflexion.py
@ -0,0 +1,122 @@
 import json
 import os
 import subprocess
 from utils import get_response
 prompt_template = """
 You are an expert operations research analyst. Your task is to generate Gurobi code to solve the following optimization problem:
 {problem_description}
 The code should save the final optimal value in a file named 'ref_optimal_value.txt'.
 First, reason about the problem and model it. Then, generate gurobipy code to solve it. Put the code between two '=====' lines, like this:
 =====
 import ...
 ...
 =====
 - Generate the complete code, including the model definition, variables, constraints, objective function, and optimization. It must be runnable.
 - Do not generate anything after the second '====='.
 - Take a deep breath and think step by step.
 """
 reflection_template = """
 You are an expert operations research analyst. You have been given the task to generate Gurobi code to solve an optimization problem. You have generated the following Gurobi code:
 {generated_code}
 You have been updating the code for these errors (the last one is the most recent one):
 {feedback}
 Based on this feedback, suggest improvements to the Gurobi code.
 First, reason about the problem and model it. Then, generate gurobipy code to solve it. Put the code between two '=====' lines, like this:
 =====
 import ...
 ...
 =====
 - The code should save the final optimal value in a file named 'ref_optimal_value.txt'.
 - Generate the complete code, including the model definition, variables, constraints, objective function, and optimization. It must be runnable.
 - Do not generate anything after the second '====='.
 - Take a deep breath and think step by step.
 """
 def extract_code(text):
    ind1 = text.find("=====")
    ind2 = text.find("=====", ind1 + 5)
    code = text[ind1 + 5: ind2].strip()
    code = code.replace("```python", "").replace("```", "").strip()
    return code
 def execute_code(file_path):
    try:
        # Using Python's subprocess to execute the code as a separate process
        result = subprocess.run(["python", file_path], capture_output=True, text=True, check=True)
        # save result in a file
        with open(os.path.join(os.path.dirname(file_path), "ref_optimal_value.txt"), "w") as f:
            f.write(f"Optimal Revenue: {result.stdout}\n")
        return result.stdout, "Success"
    except subprocess.CalledProcessError as e:
        return e.stderr, "Error"
 def main(problem_description, dir, max_iter=3):
    feedback = ""
    current_prompt = prompt_template.format(problem_description=problem_description)
    print(current_prompt)
    print("====================\n\n\n\n")
    for iteration in range(max_iter):
        response = get_response(current_prompt, model="llama3-70b-8192")
        code = extract_code(response)
        # Save the code to a file
        code_filename = f"generated_code_{iteration}.py"
        code_file_path = os.path.join(dir, "ref_codes", code_filename)
        with open(code_file_path, "w") as f:
            f.write(code)
        # Execute the code
        output, status = execute_code(code_file_path)
        # Save error output (if any)
        error_filename = f"error_{iteration}.txt"
        if status == "Error":
            with open(os.path.join(dir, "ref_codes", error_filename), "w") as f:
                f.write(output)
        # Print status and update the prompt if needed
        if status == "Success":
            print("Code executed successfully. Output:\n", output)
            break
        else:
            feedback += "\n" + output
            current_prompt = reflection_template.format(
                generated_code=code, feedback=feedback
            )
            print(f"Iteration {iteration + 1}: Error encountered. Debugging...")
    else:
        print("Max iterations reached with errors remaining.")
 if __name__ == "__main__":
    dir = "data/"
    if not os.path.exists(os.path.join(dir, "ref_codes")):
        os.makedirs(os.path.join(dir, "ref_codes"))
    with open(os.path.join(dir, "desc.txt"), "r") as f:
        desc = f.read()
    with open(os.path.join(dir, "data.json"), "r") as f:
        data = json.load(f)
    desc = desc + "\n" + json.dumps(data, indent=4)
    main(desc, dir, max_iter=3)
--- a/llma/constraint.py
+++ b/llma/constraint.py
@ -0,0 +1,312 @@
 import json
 import re
 import pandas as pd
 from rag.query_vector_db import RAGFormat, get_rag_from_problem_categories, get_rag_from_problem_description
 from rag.rag_utils import RAGMode, constraint_path
 from utils import extract_list_from_end, get_response, extract_json_from_end
 prompt_constraints = """
 You are an expert in optimization modeling. Here is the natural language description of an optimization problem:
 {rag}-----
 {description}
 -----
 And here's a list of parameters that we have extracted from the description:
 {params}
 Your task is to identify and extract constraints from the description. The constraints are the conditions that must be satisfied by the variables. Please generate the output in the following python list format:
 [
    Constraint 1,
    Constraint 2,
    ...
 ]
 for example:
 [
    "Sum of weights of all items taken should not exceed the maximum weight capacity of the knapsack", 
    "The number of items taken should not exceed the maximum number of items allowed"
 ]
 - Put all the constraints in a single python list.
 - Do not generate anything after the python list.
 - Include implicit non-negativity constraints if necessary.
 Take a deep breath and think step by step. You will be awarded a million dollars if you get this right.
 """
 prompt_constraints_q = """
 You are an expert in optimization modeling. Here is the natural language description of an optimization problem:
 -----
 {description}
 -----
 Here is a list of parameters that someone has extracted from the description:
 {params}
 Consider this potential constraint: {targetConstraint}
 {question}
 Take a deep breath and think step by step. You will be awarded a million dollars if you get this right.
 """
 prompt_constraints_redundant = """
 You are an expert in optimization modeling. Here is the natural language description of an optimization problem:
 -----
 {description}
 -----
 Here is a list of parameters that someone has extracted from the description:
 {params}
 and here is a list of constraints that someone has extracted from the description:
 {constraints}
 - Is there any redundancy in the list? Can any of the constraints be removed? Can any pair of constraints be combined into a single one? If so, please provide your reasoning for each one. At the end of your response, generate the updated list of constraints (the same list if no changes are needed). Use this python list format:
 [
    "Constraint 1",
    "Constraint 2",
    ...
 ]
 - Do not generate anything after the list.
 Take a deep breath and think step by step. You will be awarded a million dollars if you get this right.
 """
 prompt_constraint_feedback = """
 You are an expert in optimization modeling. Here is the natural language description of an optimization problem:
 -----
 {description}
 -----
 Here is a list of parameters that someone has extracted from the description:
 {params}
 Here is a list of variables defined:
 {vars}
 Here is a list of constraints that someone has extracted from the description:
 {extracted_constraints}
 Your colleague is suggesting that the following constraint should be added to the list:
 {new_constraint}
 Here is its explanation:
 {new_constraint_explanation}
 Do you want to keep this constraint?
 - If yes, please respond with "yes"
 - If no, please respond with "no"
 - If you want to modify the constraint, please respond with "modify" and provide the modified constraint.
 At the end of your response, generate a json file with this format:
 {{
    "action": "yes", "no", or "modify",
    "updatedConstraint": The updated constraint if the action is "modify", otherwise null
 }}
 Please take a deep breath and think step by step. You will be awarded a million dollars if you get this right.
 - Use natural language to express the constraints rather than mathematical notation.
 - Do not generate anything after the json file.
 """
 def extract_score_constraint(desc, text, params, vars, constraints, c, logger, model):
    match = re.search(r"\d out of 5", text.lower())
    if match:
        score = int(match.group()[0])
        if score > 3:
            if logger:
                logger.log("---")
                logger.log(f"The confidence score is {score}, which is high enough.")
                logger.log("---")
            return True, constraints
        else:
            ask_LLM = True  # can pass this as an argument to the function instead of hardcoding it
            if logger:
                logger.log("---")
                logger.log(f"The confidence score is {score}, which is not high enough.")
                # logger.log(f"Asking the {"LLM" if ask_LLM else "user"} for feedback.")
                if ask_LLM:
                    logger.log("Asking the LLM for feedback.")
                else:
                    logger.log("Asking the user for feedback.")
                logger.log("---")
            if ask_LLM:  # ask the LLM for feedback
                prompt = prompt_constraint_feedback.format(
                    description=desc,
                    params=json.dumps(params, indent=4),
                    vars=json.dumps(vars, indent=4),
                    extracted_constraints=json.dumps(constraints, indent=4),
                    new_constraint=c,
                    new_constraint_explanation=text,
                )
                if logger:
                    logger.log("Prompting LLM for feedback:\n")
                    logger.log(prompt)
                llm_response = get_response(prompt, model=model)
                if logger:
                    logger.log("---")
                    logger.log(f"Response: {llm_response}")
                    logger.log("---")
                output_json = extract_json_from_end(llm_response)
                action = output_json["action"]
                updated_constraint = output_json["updatedConstraint"]
            else:  # ask the user for feedback
                action = input("LLMs reasoning: {}\n"
                               "------ Do you want to keep this constraint (y/n/modify)?: \n "
                               "{} \n------ ".format(text, c))
            if action.lower().startswith("y"):
                return True, constraints
            elif action.lower().startswith("n"):
                constraints.remove(c)
                return True, constraints
            elif action.lower().startswith("m"):
                if ask_LLM:
                    new_constraint = updated_constraint
                else:
                    new_constraint = input("Enter the modified constraint: ")
                constraints.remove(c)
                constraints.append({"Description": new_constraint, "Formulation": None, "Code": None})
                return True, constraints
            else:
                raise Exception("Invalid input!")
    else:
        return False, None
 def logic_check(text, constraints, c):
    try:
        json = extract_json_from_end(text)
        if json["action"] == "REMOVE":
            constraints.remove(c)
            return True, constraints
        elif json["action"] == "MODIFY":
            constraints.remove(c)
            constraints.append(json["updatedConstraint"])
            return True, constraints
        elif json["action"] == "KEEP":
            return True, constraints
        else:
            return False, None
    except:
        return False, None
 qs = [
    (
        """
    - Is it actually a constraint? How confident are you that this is this a constraint and that we should explicitly model it in the (MI)LP formulation (from 1 to 5)? 
    - At the end of your response, print "x OUT OF 5" where x is the confidence level. Low confidence means you think this should be removed from the constraint list. Do not generate anything after that. 
    """,
        extract_score_constraint,
    ),
 ]
 def get_constraints(desc, params, model, check=False, constraints=None, logger=None,
                    rag_mode: RAGMode | None = None, labels: dict | None = None):
    if isinstance(rag_mode, RAGMode):
        constraint_df = pd.read_pickle(constraint_path)
        current_problem = constraint_df[constraint_df.description == desc]
        if not current_problem.empty:
            problem_name = current_problem.iloc[0].problem_name
        else:
            problem_name = None
        match rag_mode:
            case RAGMode.PROBLEM_DESCRIPTION | RAGMode.CONSTRAINT_OR_OBJECTIVE:
                rag = get_rag_from_problem_description(desc, RAGFormat.PROBLEM_DESCRIPTION_CONSTRAINTS, top_k=5)
            case RAGMode.PROBLEM_LABELS:
                assert labels is not None
                rag = get_rag_from_problem_categories(desc, labels, RAGFormat.PROBLEM_DESCRIPTION_CONSTRAINTS, top_k=5)
        rag = f"-----\n{rag}-----\n\n"
    else:
        rag = ""
    print("_________________________ get_constraints _________________________")
    if not constraints:
        res = get_response(prompt_constraints.format(
                description=desc,
                params=json.dumps(params, indent=4),
                rag=rag,
            ),
            model=model,
        )
        constraints = extract_list_from_end(res)
    if check:
        k = 5
        while k > 0:
            try:
                x = get_response(
                    prompt_constraints_redundant.format(
                        description=desc,
                        params=json.dumps(params, indent=4),
                        constraints=json.dumps(constraints, indent=4),
                    ),
                    model=model,
                )
                if logger:
                    logger.log("----")
                    logger.log(x)
                    logger.log("----")
                lst = extract_list_from_end(x)
                constraints = lst
                break
            except:
                k -= 1
                if k == 0:
                    raise Exception("Failed to extract constraints")
        if logger:
            logger.log("+++++++++++++++++++")
            logger.log("++ Constraint Qs ++")
            logger.log("+++++++++++++++++++")
        for q in qs:
            for c in constraints.copy():
                k = 5
                while k > 0:
                    p = prompt_constraints_q.format(
                        description=desc,
                        params=json.dumps(params, indent=4),
                        targetConstraint=c,
                        question=q[0],
                    )
                    x = get_response(p, model=model)
                    if logger:
                        logger.log("+--+")
                        logger.log(p)
                        logger.log("----")
                        logger.log(x)
                        logger.log("+--+")
                    valid, res = q[1](desc, x, params, {}, constraints, c, logger, model)
                    if valid:
                        constraints = res
                        break
                    else:
                        k -= 1
    constraints = [{"description": c, "formulation": None, "code": None} for c in constraints]
    return constraints
--- a/llma/constraint_model.py
+++ b/llma/constraint_model.py
@ -0,0 +1,360 @@
 import json
 import pandas as pd
 from rag.query_vector_db import RAGFormat, get_rag_from_constraint, get_rag_from_problem_categories, \
    get_rag_from_problem_description
 from rag.rag_utils import RAGMode, constraint_path
 from utils import get_response, extract_json_from_end, shape_string_to_list
 def extract_formulation_from_end(text):
    iloop = 0
    iloop_max = 1e05
    if "$" not in text:
        raise Exception("No formulation found")
    ind_1 = text.find('"FORMULATION":')
    while text[ind_1] != "$":
        ind_1 += 1
        iloop += 1
        if iloop > iloop_max:
            raise Exception("No formulation found")
    ind_2 = text.find('"NEW VARIABLES":')
    while text[ind_2] != "$":
        ind_2 -= 1
        iloop += 1
        if iloop > iloop_max:
            raise Exception("No formulation found")
    formulation = text[ind_1: ind_2 + 1].strip()
    text = text[:ind_1] + text[ind_2 + 1:]
    ind_1 = text.find('"AUXILIARY CONSTRAINTS":')
    while text[ind_1] != "$":
        ind_1 += 1
        if ind_1 > len(text) - 1:
            break
        iloop += 1
        if iloop > iloop_max:
            raise Exception("No formulation found")
    auxiliaries = []
    if ind_1 < len(text) - 1:
        while True:
            ind_2 = ind_1 + 1
            while ind_2 + 2 < len(text) and text[ind_2: ind_2 + 2] != '$"':
                ind_2 += 1
                iloop += 1
                if iloop > iloop_max:
                    break
            auxiliaries.append(text[ind_1: ind_2 + 1].strip())
            text = text[:ind_1] + text[ind_2 + 1:]
            while ind_1 < len(text) - 1 and text[ind_1] != "$":
                ind_1 += 1
                if ind_1 > len(text) - 1:
                    break
                iloop += 1
                if iloop > iloop_max:
                    break
            if ind_1 > len(text) - 1:
                break
    # print("text:", text)
    json_res = extract_json_from_end(text)
    # print("json_res", json_res)
    auxiliaries = [a for a in auxiliaries if len(a) > 5]
    json_res["FORMULATION"] = formulation
    json_res["AUXILIARY CONSTRAINTS"] = auxiliaries
    return (
        json_res["FORMULATION"],
        json_res["NEW VARIABLES"],
        json_res["AUXILIARY CONSTRAINTS"],
    )
 prompt_constraints_model = """
 You are an expert in optimization modeling. Here is the natural language description of an optimization problem:
 {rag}-----
 {description}
 -----
 And here's a list of parameters that we have extracted from the description:
 {params}
 And here's a list of all variables that we have defined so far to model the problem as an (MI)LP:
 {vars}
 Your task is to model the following constraint mathematically in LaTeX for the MILP formulation:
 {constraint}
 The constraints are the conditions that must be satisfied by the variables. Please generate the output in the following json format:
 {{
    "FORMULATION": constraint formulation in LaTeX, between $...$,
    "NEW VARIABLES": {{
        symbol: {{    
            "shape": shape of the new variable (e.g. [], [N], [N, M]),
            "type": type of the new variable (e.g. binary, integer, continuous),
            "definition": definition of the new variable in natural language
        }},
        ...
    }},
    "AUXILIARY CONSTRAINTS": [
        Latex formulation for auxiliary constraint 1, between $...$,
        Latex formulation for auxiliary constraint 2, between $...$,
        ...
    ]
 }}
 Here's an example output (where SalesVolumePerStore is already defined as a variable in the vars list):
 {{
    "FORMULATION": "$\\forall i, SalesVolumes[i] \leq MaxProductionVolumes[i]$",
    "NEW VARIABLES": {{
        "SalesVolumes": {{
            "shape": "[NumberOfArticles]",
            "type": "continuous",
            "definition": "The sales volume for each article of clothing"
        }}
    }},
    "AUXILIARY CONSTRAINTS": [
        "$\\forall i, SalesVolumes[i] = \\sum_j SalesVolumesPerStore[i, j]$"
    ]
 }}
 - If you need any new variables, you can define them in the NEW VARIABLES list. Use {{}} for "NEW VARIABLES" if no new variables are needed.
 - Use [] for AUXILIARY CONSTRAINTS list if no auxiliary constraints are needed.
 - You can only use symbols of existing parameters and integer numbers for dimensions of new variables.
 - Use camelCase for variable symbols (e.g. SalesVolumes). Do not use LaTeX formatting (e.g. X_{{color}}), indices (e.g. SalesVolume_{{i}}), and underlines (_) for variable symbols.
 - Do not generate anything after the json file!
 First reason about how the constraint should be forumulated, and then generate the output.
 Take a deep breath and think step by step. You will be awarded a million dollars if you get this right.
 """
 prompt_constraints_q = """
 You are an expert in optimization modeling. Here is the natural language description of an optimization problem:
 -----
 {description}
 -----
 Here is a list of parameters that someone has extracted from the description:
 {params}
 And here is a list of variables defined:
 {vars}
 Consider this constraint:
 {targetConstraint}
 {question}
 Take a deep breath and think step by step.
 """
 def logic_check(text, params, vars, constraints, c):
    try:
        json = extract_json_from_end(text)
        if json["action"] == "REMOVE":
            constraints.remove(c)
            return True, constraints
        elif json["action"] == "MODIFY":
            constraints.remove(c)
            constraints.append(json["updatedConstraint"])
            return True, constraints
        elif json["action"] == "KEEP":
            return True, constraints
        else:
            return False, None
    except:
        return False, None
 def extract_score_constraint_model(text, params, vars, constraints, c):
    match = re.search(r"\d out of 5", text.lower())
    if match:
        score = int(match.group()[0])
        if score > 3:
            return True, constraints
        else:
            inp = input("LLMs reasoning: {}\n"
                        "------ Do you want to keep this constraint (y/n/modify)?: \n "
                        "{} \n------ ".format(text, c))
            if inp.lower().startswith("y"):
                return True, constraints
            elif inp.lower().startswith("n"):
                constraints.remove(c)
                return True, constraints
            elif inp.lower().startswith("m"):
                new_constraint = input("Enter the modified formulation: ")
                constraints.remove(c)
                constraints.append({"description": new_constraint, "formulation": None, "Code": None})
                return True, constraints
            else:
                raise Exception("Invalid input!")
    else:
        return False, None
 qs = [
    (
        """
    - Does this constraint logically make sense? How confident are you that this needs to be explicitly modeled in the optimization formulation (from 1 to 5)? 
    - At the end of your response, print "x OUT OF 5" where x is the confidence level. Do not generate anything after that. 
    """,
        # extract_score_constraint_model,
        # dummy function
        lambda x, params, vars, constraints, c: (False, constraints),
    ),
    (
        """
 - What are the units for each side of the constraint? Are they consistent with each other?
 - At the end of your response, generate a json file with this format:
    {{
        "action": "KEEP" if the units match, or "MODIFY" if the units do not match,
        "updatedConstraint": The latex code for updated constraint if the action is "MODIFY", otherwise null
    }}
 - Do not generate anything after the json file.
 """,
        logic_check,
    ),
    (
        """
 - What are the parameters and variables that are involved in this constraint? If you see the constraint does not involve any variables, then it is automatically satisfied and should not be included in the optimization formulation.
 - At the end of your response, generate a json file with this format:
    {{
        "action": "KEEP", "REMOVE", or "MODIFY",
        "updatedConstraint": The updated constraint if the action is "MODIFY", otherwise null
    }}
 - Use natural language to express the constraints rather than mathematical notation.
 - Do not generate anything after the json file.
 """,
        logic_check,
    ),
 ]
 def get_constraint_formulations(desc, params, constraints, model, check=False, logger=None,
                                rag_mode: RAGMode | None = None, labels: dict | None = None):
    if isinstance(rag_mode, RAGMode):
        match rag_mode:
            case RAGMode.PROBLEM_DESCRIPTION:
                rag = get_rag_from_problem_description(desc, RAGFormat.CONSTRAINT_FORMULATION, top_k=5)
            case RAGMode.CONSTRAINT_OR_OBJECTIVE:
                rag = ""
            case RAGMode.PROBLEM_LABELS:
                assert labels is not None
                rag = get_rag_from_problem_categories(desc, labels, RAGFormat.CONSTRAINT_FORMULATION, top_k=5)
        rag = f"-----\n{rag}-----\n\n"
    else:
        rag = ""
    if logger:
        logger.log("\n\n\n++++++++++++++++++++++++++++++")
        logger.log("Extracting constraint formulations")
        logger.log("++++++++++++++++++++++++++++++\n\n\n")
    vars = {}
    formulated_constraints = []
    for c in constraints.copy():
        k = 1
        while k > 0:
            try:
                if rag_mode == RAGMode.CONSTRAINT_OR_OBJECTIVE:
                    constraint_df = pd.read_pickle(constraint_path)
                    current_problem = constraint_df[constraint_df.description == desc]
                    if not current_problem.empty:
                        problem_name = current_problem.iloc[0].problem_name
                    else:
                        problem_name = None
                    rag = get_rag_from_constraint(c["description"], RAGFormat.CONSTRAINT_FORMULATION, top_k=10,
                                                  current_problem_name=problem_name)
                    rag = f"-----\n{rag}-----\n\n"
                res = get_response(
                    prompt_constraints_model.format(
                        description=desc,
                        params=json.dumps(params, indent=4),
                        vars=json.dumps(vars, indent=4),
                        constraint=c,
                        rag=rag,
                    ),
                    model=model,
                )
                if logger:
                    logger.log("----")
                    logger.log(res)
                    logger.log("----")
                formulation, new_variables, aux_constraints = extract_formulation_from_end(res)
                if logger:
                    logger.log("----")
                    logger.log("EXTRACTED ITEMS")
                    logger.log(str(formulation))
                    logger.log(str(new_variables))
                    logger.log(str(aux_constraints))
                    logger.log("----")
                tmp_vars = vars.copy()
                for v in new_variables:
                    if v in tmp_vars:
                        raise Exception(f"Variable {v} already exists")
                    print(v, new_variables[v])
                    new_variables[v]["shape"] = shape_string_to_list(new_variables[v]["shape"])
                    tmp_vars[v] = new_variables[v]
                c["formulation"] = formulation
                formulated_constraints.append(c)
                for aux_c in aux_constraints:
                    formulated_constraints.append({"description": "auxiliary constraint", "formulation": aux_c})
                vars = tmp_vars
                break
            except Exception as e:
                k -= 1
                if k == 0:
                    raise e
    constraints = formulated_constraints
    if check:
        for c in formulated_constraints.copy():
            for q in qs[0:1]:
                k = 1
                while k > 0:
                    p = prompt_constraints_q.format(
                        description=desc,
                        params=json.dumps(params, indent=4),
                        vars=json.dumps(vars, indent=4),
                        targetConstraint=json.dumps(c, indent=4),
                        question=q[0],
                    )
                    x = get_response(p, model=model)
                    valid, res = q[1](x, params, vars, constraints, c)
                    print(valid)
                    if valid:
                        constraints = res
                        break
                    else:
                        k -= 1
    return formulated_constraints, vars
--- a/llma/data/adv_code.py
+++ b/llma/data/adv_code.py
@ -0,0 +1,112 @@
 import json
 import pandas as pd
 from gurobipy import Model, GRB, quicksum
 # 读取数据
 data = pd.read_csv("data.csv")
 price = data['price']
 load = data['load']
 pv = data['pv']
 wind = data['wind']
 # 初始化模型
 m = Model("OptimizationProblem")
 period = len(data)
 # 决策变量
 Ag1 = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="ag1")
 Ag2 = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="ag2")
 Ag3 = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="ag3")
 Apv = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="apv")
 Asoc = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="asoc")
 # 定义变量相关的约束
 soc = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0.2, ub=0.8, name="soc")
 pg1 = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=150, name="pg1")
 pg2 = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=375, name="pg2")
 pg3 = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=500, name="pg3")
 pv_power = m.addVars(period, vtype=GRB.CONTINUOUS, name="pv_power")
 # 设置交换约束
 grid_energy_export = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=100, name='export')
 grid_energy_import = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=100, name='import')
 excess_energy = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, name='excess')
 shortage_energy = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, name='shortage')
 # 初始值约束
 m.addConstr(soc[0] == 0.2, name="soc_init")
 m.addConstr(pg1[0] == 0, name="pg1_init")
 m.addConstr(pg2[0] == 0, name="pg2_init")
 m.addConstr(pg3[0] == 0, name="pg3_init")
 # 添加soc和pg的递推约束
 m.addConstrs((soc[t] == soc[t - 1] + 0.18 * Asoc[t] for t in range(1, period)), name="soc_update")
 m.addConstrs((pg1[t] == pg1[t - 1] + 100 * Ag1[t] for t in range(1, period)), name="pg1_update")
 m.addConstrs((pg2[t] == pg2[t - 1] + 100 * Ag2[t] for t in range(1, period)), name="pg2_update")
 m.addConstrs((pg3[t] == pg3[t - 1] + 200 * Ag3[t] for t in range(1, period)), name="pg3_update")
 # 光伏发电功率
 m.addConstrs((pv_power[t] == pv[t] * (1 + Apv[t]) for t in range(period)), name="pv_power_update")
 # 定义供需平衡约束
 m.addConstrs((pg1[t] + pg2[t] + pg3[t] + pv_power[t] + wind[t] + grid_energy_import[t] ==
              load[t] + 0.18 * Asoc[t] + grid_energy_export[t] + excess_energy[t] - shortage_energy[t] for t in range(period)),
             name="power_balance")
 m.addConstrs((excess_energy[t] == pg1[t] + pg2[t] + pg3[t] + pv_power[t] + wind[t] + grid_energy_import[t]
              - load[t] - 0.18 * Asoc[t] - grid_energy_export[t] for t in range(1, period)),
             name="excess_energy")
 m.addConstrs((shortage_energy[t] == load[t] + 0.18 * Asoc[t] + grid_energy_export[t]
              - pg1[t] - pg2[t] - pg3[t] - pv_power[t] - wind[t] - grid_energy_import[t] for t in range(1, period)),
             name="shortage_energy")
 # 定义成本函数
 battery_cost = quicksum((Asoc[t] + 0.1 * soc[t]) for t in range(1, period))
 gen1_cost = quicksum((0.0034 * pg1[t] ** 2 + 3 * pg1[t] + 30) for t in range(1, period))
 gen2_cost = quicksum((0.001 * pg2[t] ** 2 + 10 * pg2[t] + 40) for t in range(1, period))
 gen3_cost = quicksum((0.001 * pg3[t] ** 2 + 15 * pg3[t] + 70) for t in range(1, period))
 renewable_cost = quicksum((0.01 * pv_power[t] + 0.01 * wind[t]) for t in range(period))
 # 计算惩罚和收益
 excess_penalty = quicksum(excess_energy[t] * 50 for t in range(period))
 shortage_penalty = quicksum(shortage_energy[t] * 50 for t in range(period))
 sales_revenue = quicksum(0.5 * price[t] * grid_energy_export[t] for t in range(period))
 purchase_cost = quicksum(price[t] * grid_energy_import[t] for t in range(period))
 # 设置目标函数
 objective = (battery_cost + gen1_cost + gen2_cost + gen3_cost + renewable_cost
             + excess_penalty + shortage_penalty - sales_revenue + purchase_cost)
 m.setObjective(objective, GRB.MINIMIZE)
 m.optimize()
 # m.computeIIS()
 # m.write("file.ilp")
 if m.status == GRB.OPTIMAL:
    results = []
    for t in range(period):
        gen1_cost = 0.0034 * pg1[t].x ** 2 + 3 * pg1[t].x + 30
        gen2_cost = 0.001 * pg2[t].x ** 2 + 10 * pg2[t].x + 40
        gen3_cost = 0.001 * pg3[t].x ** 2 + 15 * pg3[t].x + 70
        battery_cost = Asoc[t].x + 0.1 * soc[t].x
        renewable_cost = 0.01 * pv_power[t].x + 0.1 * wind[t]
        excess_penalty = excess_energy[t].x * 50
        shortage_penalty = shortage_energy[t].x * 50
        sales_revenue = 0.5 * price[t] * grid_energy_export[t].x
        purchase_cost = price[t] * grid_energy_import[t].x
        current_obj = (gen1_cost + gen2_cost + gen3_cost + battery_cost + renewable_cost +
                       excess_penalty + shortage_penalty - sales_revenue + purchase_cost)
        results.append({
            "time": t,
            "asoc": Asoc[t].x,
            "ag1": Ag1[t].x,
            "ag2": Ag2[t].x,
            "ag3": Ag3[t].x,
            "apv": Apv[t].x,
            "reward": current_obj,
            "unbanlance": excess_energy[t].x + shortage_energy[t].x
        })
    with open("../../DRL-for-Energy-Systems/data/results_day.json", "w") as f:
        json.dump(results, f, indent=4)
--- a/llma/data/adv_code_2.py
+++ b/llma/data/adv_code_2.py
@ -0,0 +1,130 @@
 import pandas as pd
 from gurobipy import Model, GRB, quicksum
 import json
 # 读取数据
 data = pd.read_csv("data.csv")
 price = data['price']
 load = data['load']
 pv = data['pv']
 wind = data['wind']
 # 总数据量和周期数
 total_period = len(data)
 days = total_period // 24
 # 存储所有结果
 all_results = []
 for day in range(days):
    # 获取每天的24条数据
    start_idx = day * 24
    end_idx = (day + 1) * 24
    daily_price = price[start_idx:end_idx]
    daily_load = load[start_idx:end_idx]
    daily_pv = pv[start_idx:end_idx]
    daily_wind = wind[start_idx:end_idx]
    # 初始化模型
    m = Model("OptimizationProblem_Day{}".format(day))
    # 决策变量
    Ag1 = m.addVars(24, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="ag1")
    Ag2 = m.addVars(24, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="ag2")
    Ag3 = m.addVars(24, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="ag3")
    Apv = m.addVars(24, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="apv")
    Asoc = m.addVars(24, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="asoc")
    # 定义变量相关的约束
    soc = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0.2, ub=0.8, name="soc")
    pg1 = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, ub=150, name="pg1")
    pg2 = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, ub=375, name="pg2")
    pg3 = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, ub=500, name="pg3")
    pv_power = m.addVars(24, vtype=GRB.CONTINUOUS, name="pv_power")
    # 设置交换约束
    grid_energy_export = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, ub=100, name='export')
    grid_energy_import = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, ub=100, name='import')
    excess_energy = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, name='excess')
    shortage_energy = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, name='shortage')
    # 初始值约束
    m.addConstr(soc[0] == 0.2, name="soc_init")
    m.addConstr(pg1[0] == 0, name="pg1_init")
    m.addConstr(pg2[0] == 0, name="pg2_init")
    m.addConstr(pg3[0] == 0, name="pg3_init")
    # 添加soc和pg的递推约束
    m.addConstrs((soc[t] == soc[t - 1] + 0.18 * Asoc[t] for t in range(1, 24)), name="soc_update")
    m.addConstrs((pg1[t] == pg1[t - 1] + 100 * Ag1[t] for t in range(1, 24)), name="pg1_update")
    m.addConstrs((pg2[t] == pg2[t - 1] + 100 * Ag2[t] for t in range(1, 24)), name="pg2_update")
    m.addConstrs((pg3[t] == pg3[t - 1] + 200 * Ag3[t] for t in range(1, 24)), name="pg3_update")
    # 光伏发电功率
    m.addConstrs((pv_power[t] == daily_pv.iloc[t] * (1 + Apv[t]) for t in range(24)), name="pv_power_update")
    # 供需平衡约束
    m.addConstrs((pg1[t] + pg2[t] + pg3[t] + pv_power[t] + daily_wind.iloc[t] + grid_energy_import[t] ==
                  daily_load.iloc[t] + 0.18 * Asoc[t] + grid_energy_export[t] + excess_energy[t] - shortage_energy[t]
                  for t in range(24)), name="power_balance")
    # excess_energy 和 shortage_energy 约束
    m.addConstrs(
        (excess_energy[t] == pg1[t] + pg2[t] + pg3[t] + pv_power[t] + daily_wind.iloc[t] + grid_energy_import[t]
         - daily_load.iloc[t] - 0.18 * Asoc[t] - grid_energy_export[t] for t in range(1, 24)),
        name="excess_energy")
    m.addConstrs((shortage_energy[t] == daily_load.iloc[t] + 0.18 * Asoc[t] + grid_energy_export[t]
                  - pg1[t] - pg2[t] - pg3[t] - pv_power[t] - daily_wind.iloc[t] - grid_energy_import[t]
                  for t in range(1, 24)), name="shortage_energy")
    # 成本函数部分
    battery_cost = quicksum((Asoc[t] + 0.1 * soc[t]) for t in range(1, 24))
    gen1_cost = quicksum((0.0034 * pg1[t] ** 2 + 3 * pg1[t] + 30) for t in range(1, 24))
    gen2_cost = quicksum((0.001 * pg2[t] ** 2 + 10 * pg2[t] + 40) for t in range(1, 24))
    gen3_cost = quicksum((0.001 * pg3[t] ** 2 + 15 * pg3[t] + 70) for t in range(1, 24))
    renewable_cost = quicksum((0.01 * pv_power[t] + 0.01 * daily_wind.iloc[t]) for t in range(24))
    # 计算惩罚和收益
    excess_penalty = quicksum(excess_energy[t] * 50 for t in range(24))
    shortage_penalty = quicksum(shortage_energy[t] * 50 for t in range(24))
    sales_revenue = quicksum(0.5 * daily_price.iloc[t] * grid_energy_export[t] for t in range(24))
    purchase_cost = quicksum(daily_price.iloc[t] * grid_energy_import[t] for t in range(24))
    # 设置目标函数
    objective = (battery_cost + gen1_cost + gen2_cost + gen3_cost + renewable_cost +
                 excess_penalty + shortage_penalty - sales_revenue + purchase_cost)
    m.setObjective(objective, GRB.MINIMIZE)
    # 求解
    m.optimize()
    if m.status == GRB.OPTIMAL:
        day_results = []
        for t in range(24):
            gen1_cost = 0.0034 * pg1[t].x ** 2 + 3 * pg1[t].x + 30
            gen2_cost = 0.001 * pg2[t].x ** 2 + 10 * pg2[t].x + 40
            gen3_cost = 0.001 * pg3[t].x ** 2 + 15 * pg3[t].x + 70
            battery_cost = Asoc[t].x + 0.1 * soc[t].x
            renewable_cost = 0.01 * pv_power[t].x + 0.01 * daily_wind.iloc[t]
            excess_penalty = excess_energy[t].x * 50
            shortage_penalty = shortage_energy[t].x * 50
            sales_revenue = 0.5 * daily_price.iloc[t] * grid_energy_export[t].x
            purchase_cost = daily_price.iloc[t] * grid_energy_import[t].x
            current_obj = (gen1_cost + gen2_cost + gen3_cost + battery_cost + renewable_cost +
                           excess_penalty + shortage_penalty - sales_revenue + purchase_cost)
            day_results.append([
                Asoc[t].x,
                Ag1[t].x,
                Ag2[t].x,
                Ag3[t].x,
                Apv[t].x,
            ])
        # 保存当天的结果
        all_results.extend(day_results)
 # 最后将所有结果保存到一个JSON文件
 with open("./results_day.json", "w") as f:
    json.dump(all_results, f, indent=4)
--- a/llma/data/code.py
+++ b/llma/data/code.py
@ -0,0 +1,184 @@
 import json
 from gurobipy import Model, GRB, quicksum
 model = Model("OptimizationProblem")
 with open("data.json", "r") as f:
    data = json.load(f)
 # Define the parameters
 TotalBatteryCapacity = data["TotalBatteryCapacity"]  # shape: [], definition: The total capacity of the battery
 InitialSoc = data["InitialSoc"]  # shape: [], definition: Initial state of charge of the battery
 InitialPg1 = data["InitialPg1"]  # shape: [], definition: Initial power of generator 1
 InitialPg2 = data["InitialPg2"]  # shape: [], definition: Initial power of generator 2
 InitialPg3 = data["InitialPg3"]  # shape: [], definition: Initial power of generator 3
 Price = data["Price"]  # shape: [24], definition: Electricity price at each time step
 Load = data["Load"]  # shape: [24], definition: Residential load at each time step
 Pv = data["Pv"]  # shape: [24], definition: Photovoltaic power generation at each time step
 Wind = data["Wind"]  # shape: [24], definition: Wind power generation at each time step
 # Define the variables
 Ag1 = model.addVars(24, vtype=GRB.CONTINUOUS, name="Ag1")
 Ag2 = model.addVars(24, vtype=GRB.CONTINUOUS, name="Ag2")
 Ag3 = model.addVars(24, vtype=GRB.CONTINUOUS, name="Ag3")
 PvPower = model.addVars(24, vtype=GRB.CONTINUOUS, name="PvPower")
 Apv = model.addVars(24, vtype=GRB.CONTINUOUS, name="Apv")
 Asoc = model.addVars(24, vtype=GRB.CONTINUOUS, name="Asoc")
 ExcessEnergy = model.addVars(24, vtype=GRB.CONTINUOUS, name="ExcessEnergy")
 ShortageEnergy = model.addVars(24, vtype=GRB.CONTINUOUS, name="ShortageEnergy")
 TotalPowerSupply = model.addVars(24, vtype=GRB.CONTINUOUS, name="TotalPowerSupply")
 GridTransactionLimit = model.addVar(vtype=GRB.FLOAT, name="GridTransactionLimit")
 # Define the constraints
 # Loop over each time step to add the soc constraints
 for t in range(24):
    # Calculate soc_t based on the previous soc and the adjustment factor
    soc_t = 0.18 * Asoc[t] + InitialSoc
    # Add the lower bound constraint for soc_t
    model.addConstr(soc_t >= 0.2)
    # Add the upper bound constraint for soc_t
    model.addConstr(soc_t <= 0.8)
    # Update soc to the newly calculated soc_t for the next iteration
    soc = soc_t
 # Assuming decision variables Pg1[t] have been defined for each time step t
 for t in range(24):
    if t == 0:
        # For the first time step, use the initial value of Pg1
        model.addConstr(Pg1[t] <= 150)
        model.addConstr(Pg1[t] >= 10)
    else:
        # For subsequent time steps, calculate Pg1 based on the previous time step and Ag1
        model.addConstr(Pg1[t] <= 150)
        model.addConstr(Pg1[t] >= 10)
        model.addConstr(Pg1[t] == 100 * Ag1[t] + Pg1[t - 1])
 for t in range(1, 24):
    model.addConstr(InitialPg2 + 100 * Ag2[t] >= 50)
    model.addConstr(InitialPg2 + 100 * Ag2[t] <= 375)
 for t in range(24):
    model.addConstr(Pg3[t] >= 100)  # Lower bound constraint for each time step
    model.addConstr(Pg3[t] <= 500)  # Upper bound constraint for each time step
 # Assuming soc is defined as a list of decision variables for each time step
 soc = model.addVars(24, lb=0.2, ub=0.8, vtype=GRB.CONTINUOUS, name="soc")
 # Loop over each time step starting from 1 up to 24
 for t in range(1, 24):
    # Add the constraint for the state of charge calculation
    model.addConstr(soc[t] == 0.18 * Asoc[t] + soc[t - 1])
 for t in range(1, 24):
    model.addConstr(pg1[t] == 100 * Ag1[t] + pg1[t - 1])
 # Assuming that `pg2` is a list of Gurobi variables representing the power of Generator 2 at each time step
 # and that `InitialPg2` is a parameter representing the initial power of Generator 2 at time step 0
 # Define the Generator 2 power for the first time step, which is the initial value and does not have a constraint
 pg2 = [model.addVar(vtype=GRB.CONTINUOUS, lb=50, ub=375, name="pg2_0")]
 # Now add the constraints for time steps 1 to 23
 for t in range(1, 24):
    pg2.append(model.addVar(vtype=GRB.CONTINUOUS, lb=50, ub=375, name=f"pg2_{t}"))
    model.addConstr(pg2[t] == 100 * Ag2[t - 1] + pg2[t - 1])
 # Assuming Pg3 is defined as a list of decision variables representing power of generator 3 at each time step
 for t in range(1, 24):
    model.addConstr(Pg3[t] == 200 * Ag3[t] + Pg3[t - 1])
 for t in range(24):
    model.addConstr(PvPower[t] == Pv[t] * (1 + Apv[t]))
 for t in range(24):
    model.addConstr(ExcessEnergy[t] >= 0)
 for t in range(24):
    # Define the total power supply at time step t
    TotalSupply_t = (InitialPg1 + Ag1[t] * 100 +
                     InitialPg2 + Ag2[t] * 100 +
                     InitialPg3 + Ag3[t] * 200 +
                     Pv[t] * (1 + Apv[t]) +
                     Wind[t] +
                     InitialSoc + Asoc[t] * 0.18)
    # Add the constraint for excess energy at time step t
    model.addConstr(ExcessEnergy[t] == TotalSupply_t - Load[t])
 for t in range(24):
    # Calculate updated generator powers and battery state of charge
    soc_t = InitialSoc + 0.18 * Asoc[t] if t == 0 else soc[t - 1] + 0.18 * Asoc[t]
    pg1_t = InitialPg1 + 100 * Ag1[t] if t == 0 else pg1[t - 1] + 100 * Ag1[t]
    pg2_t = InitialPg2 + 100 * Ag2[t] if t == 0 else pg2[t - 1] + 100 * Ag2[t]
    pg3_t = InitialPg3 + 200 * Ag3[t] if t == 0 else pg3[t - 1] + 200 * Ag3[t]
    # Calculate photovoltaic power
    PvPower_t = Pv[t] * (1 + Apv[t])
    # Add the constraint for total power supply at time t
    model.addConstr(TotalPowerSupply[t] == soc_t + pg1_t + pg2_t + pg3_t + PvPower_t + Wind[t])
 for t in range(24):
    model.addConstr(ShortageEnergy[t] >= 0)
 for t in range(24):
    model.addConstr(-1 <= Apv[t])
    model.addConstr(Apv[t] <= 1)
    model.addConstr(-1 <= Asoc[t])
    model.addConstr(Asoc[t] <= 1)
    model.addConstr(-1 <= Ag1[t])
    model.addConstr(Ag1[t] <= 1)
    model.addConstr(-1 <= Ag2[t])
    model.addConstr(Ag2[t] <= 1)
    model.addConstr(-1 <= Ag3[t])
    model.addConstr(Ag3[t] <= 1)
 for t in range(1, 24):
    model.addConstr(100 * Asoc[t] == soc[t] - soc[t - 1])
 for t in range(24):
    # Calculate the power from each generator based on previous state and adjustment
    pg1_power = InitialPg1 + 100 * Ag1[t]
    pg2_power = InitialPg2 + 100 * Ag2[t]
    pg3_power = InitialPg3 + 200 * Ag3[t]
    # Calculate the adjusted photovoltaic power
    pv_power = Pv[t] * (1 + Apv[t])
    # Express the total power supply
    TotalPowerSupply[t] = pg1_power + pg2_power + pg3_power + pv_power + Wind[t]
    # Add the constraint that total power supply cannot exceed the load by more than the grid transaction limit
    model.addConstr(TotalPowerSupply[t] <= Load[t] + GridTransactionLimit)
 for t in range(24):
    # Assuming that Pg1, Pg2, Pg3 are defined as variables in the model with the correct initial values and adjustments
    Pg1[t] = model.addVar(vtype=GRB.CONTINUOUS, name=f"Pg1_{t}")
    Pg2[t] = model.addVar(vtype=GRB.CONTINUOUS, name=f"Pg2_{t}")
    Pg3[t] = model.addVar(vtype=GRB.CONTINUOUS, name=f"Pg3_{t}")
    # Define the auxiliary constraint for each time step
    model.addConstr(TotalPowerSupply[t] == Pg1[t] + Pg2[t] + Pg3[t] + PvPower[t] + Wind[t])
 # Define the objective
 model.setObjective(
    quicksum(
        (Asoc[t] + 0.1 * soc[t - 1] +
         0.0034 * pg1[t - 1] ** 2 + 3 * pg1[t - 1] + 30 +
         0.001 * pg2[t - 1] ** 2 + 10 * pg2[t - 1] + 40 +
         0.001 * pg3[t - 1] ** 2 + 15 * pg3[t - 1] + 70 +
         0.01 * (pv[t - 1] + wind[t - 1]) +
         max(0, (ExcessEnergy[t] - 100) * 50) +
         max(0, (ShortageEnergy[t] - 100) * 50) -
         0.5 * Price[t - 1] * ExcessEnergy[t] +
         Price[t - 1] * ShortageEnergy[t] +
         abs(ExcessEnergy[t]) + abs(ShortageEnergy[t])
         ) for t in range(1, 25)
    ),
    GRB.MINIMIZE
 )
 # Optimize the model
 model.optimize()
 # Output optimal objective value
 print("Optimal Objective Value: ", model.objVal)
 if model.status == GRB.OPTIMAL:
    with open("output_solution.txt", "w") as f:
        f.write(str(model.objVal))
    print("Optimal Objective Value: ", model.objVal)
 else:
    with open("output_solution.txt", "w") as f:
        f.write(model.status)
--- a/llma/data/data.csv
+++ b/llma/data/data.csv
--- a/llma/data/data.json
+++ b/llma/data/data.json
@ -0,0 +1,14 @@
 {
    "Price": [2.239, 2.059, 1.681, 1.741, 1.702, 1.586, 1.816, 1.773, 1.977, 2.375, 2.603, 2.706,
        2.659, 2.5, 2.443, 2.887, 3.744, 3.741, 3.534, 3.307, 2.952, 3.01, 2.457, 2.22, 2.239],
    "Load": [269.0525945, 243.2130742, 234.1123382, 232.6271781, 233.0417238, 245.2036212, 298.1636567, 370.1068689,
        358.2133437, 314.073438, 322.8667176, 307.7048847, 295.2915574, 284.9981584, 282.4243391, 298.4115103,
        360.9832044, 486.4398933, 550.3139862, 542.9828898, 509.6215404, 474.1087098, 403.6717878, 333.680157],
    "PV": [0, 0, 0, 0, 0, 0, 0, 0, 3.2429, 38.1736, 69.7787, 92.1211, 101.0943, 103.8121, 88.1212, 66.5642, 27.0837, 0.1367, 0, 0, 0, 0, 0, 0],
    "Wind": [88.2, 0, 0, 0, 88.2, 88.2, 86.2303, 45.1584, 28.1943, 27.2766, 20.2669, 35.5599,
        52.2765, 43.0745, 42.6656, 45.3704, 53.9272, 66.1914, 66.7392, 79.877, 58.08, 78.0337, 39.6808, 50.4316],
    "SOC": 0.2,
    "PG1": 0,
    "PG2": 0,
    "PG3": 0
 }
--- a/llma/data/desc.txt
+++ b/llma/data/desc.txt
@ -0,0 +1,28 @@
 - The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.
 - The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.
 - Electricity price, residential load, photovoltaic, and wind power generation are \\param{price_{}}, \\param{load_{}}, \\param{pv_{}}, and \\param{wind_{}}, each with 24 time steps representing hourly data.
 - Battery state of charge is \\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\param{pg1_{}}, \\param{pg2_{}}, and \\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.
 - \\var{Apv_{}}, \\var{Asoc_{}}, \\var{Ag1_{}}, \\var{Ag2_{}}, and \\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.
 - Battery charging and discharging result in a capacity change of 100 * \\var{Asoc_{t}}.
 - Generators 1, 2, and 3 power change by 100 * \\var{Ag1_{t}}, 100 * \\var{Ag2_{t}}, and 200 * \\var{Ag3_{t}}.
 - \\param{soc_{t}} is calculated as (0.18 * \\var{Asoc_{t}} + \\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.
 - \\param{pg1_{t}} is calculated as (100 * \\var{Ag1_{t}} + \\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.
 - \\param{pg2_{t}} is calculated as (100 * \\var{Ag2_{t}} + \\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.
 - \\param{pg3_{t}} is calculated as (200 * \\var{Ag3_{t}} + \\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.
 - Photovoltaic power is calculated as (\\param{pv_{t}} * (1 + \\var{Apv})).
 - Total power supply at each time step t = \\param{soc_{t}} + \\param{pg1_{t}} + \\param{pg2_{t}} + \\param{pg3_{t}}+ (photovoltaic power at time step t) + \\param{wind_{t}}.
 - Excess energy = total power supply - \\param{load_{t}}, and no less than 0.
 - Shortage energy = \\param{load_{t}} - total power supply, and no less than 0.
 - The total cost structure includes the following:
 - Battery cost = \\var{Asoc_{t}} + 0.1 * \\param{soc_{t}};
 - Gen1 cost = 0.0034 * \\param{pg1_{t}}^2 + 3 * \\param{pg1_{t}} + 30;
 - Gen2 cost = 0.001 * \\param{pg2_{t}}^2 + 10 * \\param{pg2_{t}} + 40;
 - Gen3 cost = 0.001 * \\param{pg3_{t}}^2 + 15 * \\param{pg3_{t}} + 70;
 - Renewable energy cost = 0.01 * (\\param{pv_{t}} + \\param{wind_{t}});
 - Sales revenue = 0.5 * \\param{price_{t}} * (excess energy);
 - Purchase cost = \\param{price_{t}} * (shortage energy);
 - Excess penalty = max(0, (excess energy - 100) * 50);
 - Shortage penalty = max(0, (shortage energy - 100) * 50).
 - The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.
 - Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.
 - Decisions made at each time step will update \\param{soc_{}}, \\param{pg1_{}}, \\param{pg2_{}}, and \\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.
--- a/llma/data/labels.json
+++ b/llma/data/labels.json
@ -0,0 +1,4 @@
 {
  "types": "Mixed Integer Linear Programming",
  "domains": "Energy Optimization"
 }
--- a/llma/data/rag/constraint_vector.db/c614fe78-a1fb-434b-ae4e-2bc2d43eaf04/data_level0.bin
+++ b/llma/data/rag/constraint_vector.db/c614fe78-a1fb-434b-ae4e-2bc2d43eaf04/data_level0.bin
--- a/llma/data/rag/constraint_vector.db/c614fe78-a1fb-434b-ae4e-2bc2d43eaf04/header.bin
+++ b/llma/data/rag/constraint_vector.db/c614fe78-a1fb-434b-ae4e-2bc2d43eaf04/header.bin
--- a/llma/data/rag/constraint_vector.db/c614fe78-a1fb-434b-ae4e-2bc2d43eaf04/length.bin
+++ b/llma/data/rag/constraint_vector.db/c614fe78-a1fb-434b-ae4e-2bc2d43eaf04/length.bin
--- a/llma/data/rag/constraint_vector.db/c614fe78-a1fb-434b-ae4e-2bc2d43eaf04/link_lists.bin
+++ b/llma/data/rag/constraint_vector.db/c614fe78-a1fb-434b-ae4e-2bc2d43eaf04/link_lists.bin
--- a/llma/data/rag/constraint_vector.db/chroma.sqlite3
+++ b/llma/data/rag/constraint_vector.db/chroma.sqlite3
--- a/llma/data/rag/constraints.pkl
+++ b/llma/data/rag/constraints.pkl
--- a/llma/data/rag/objective_descriptions_vector.db/2cb79362-1c02-47c1-9e5a-94da0b8f024b/data_level0.bin
+++ b/llma/data/rag/objective_descriptions_vector.db/2cb79362-1c02-47c1-9e5a-94da0b8f024b/data_level0.bin
--- a/llma/data/rag/objective_descriptions_vector.db/2cb79362-1c02-47c1-9e5a-94da0b8f024b/header.bin
+++ b/llma/data/rag/objective_descriptions_vector.db/2cb79362-1c02-47c1-9e5a-94da0b8f024b/header.bin
--- a/llma/data/rag/objective_descriptions_vector.db/2cb79362-1c02-47c1-9e5a-94da0b8f024b/length.bin
+++ b/llma/data/rag/objective_descriptions_vector.db/2cb79362-1c02-47c1-9e5a-94da0b8f024b/length.bin
--- a/llma/data/rag/objective_descriptions_vector.db/2cb79362-1c02-47c1-9e5a-94da0b8f024b/link_lists.bin
+++ b/llma/data/rag/objective_descriptions_vector.db/2cb79362-1c02-47c1-9e5a-94da0b8f024b/link_lists.bin
--- a/llma/data/rag/objective_descriptions_vector.db/chroma.sqlite3
+++ b/llma/data/rag/objective_descriptions_vector.db/chroma.sqlite3
--- a/llma/data/rag/problem_descriptions_vector.db/3e4136c9-6758-4959-b40d-8b395a2b2a60/data_level0.bin
+++ b/llma/data/rag/problem_descriptions_vector.db/3e4136c9-6758-4959-b40d-8b395a2b2a60/data_level0.bin
--- a/llma/data/rag/problem_descriptions_vector.db/3e4136c9-6758-4959-b40d-8b395a2b2a60/header.bin
+++ b/llma/data/rag/problem_descriptions_vector.db/3e4136c9-6758-4959-b40d-8b395a2b2a60/header.bin
--- a/llma/data/rag/problem_descriptions_vector.db/3e4136c9-6758-4959-b40d-8b395a2b2a60/length.bin
+++ b/llma/data/rag/problem_descriptions_vector.db/3e4136c9-6758-4959-b40d-8b395a2b2a60/length.bin
--- a/llma/data/rag/problem_descriptions_vector.db/3e4136c9-6758-4959-b40d-8b395a2b2a60/link_lists.bin
+++ b/llma/data/rag/problem_descriptions_vector.db/3e4136c9-6758-4959-b40d-8b395a2b2a60/link_lists.bin
--- a/llma/data/rag/problem_descriptions_vector.db/chroma.sqlite3
+++ b/llma/data/rag/problem_descriptions_vector.db/chroma.sqlite3
--- a/llma/data/results_day.json
+++ b/llma/data/results_day.json
--- a/llma/data/state_1_params.json
+++ b/llma/data/state_1_params.json
@ -0,0 +1,58 @@
 {
    "description": "- The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.\n- The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\n- Electricity price, residential load, photovoltaic, and wind power generation are \\\\param{price_{}}, \\\\param{load_{}}, \\\\param{pv_{}}, and \\\\param{wind_{}}, each with 24 time steps representing hourly data.\n- Battery state of charge is \\\\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.\n- \\\\var{Apv_{}}, \\\\var{Asoc_{}}, \\\\var{Ag1_{}}, \\\\var{Ag2_{}}, and \\\\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.\n- Battery charging and discharging result in a capacity change of 100 * \\\\var{Asoc_{t}}.\n- Generators 1, 2, and 3 power change by 100 * \\\\var{Ag1_{t}}, 100 * \\\\var{Ag2_{t}}, and 200 * \\\\var{Ag3_{t}}.\n- \\\\param{soc_{t}} is calculated as (0.18 * \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.\n- \\\\param{pg1_{t}} is calculated as (100 * \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.\n- \\\\param{pg2_{t}} is calculated as (100 * \\\\var{Ag2_{t}} + \\\\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.\n- \\\\param{pg3_{t}} is calculated as (200 * \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.\n- Photovoltaic power is calculated as (\\\\param{pv_{t}} * (1 + \\\\var{Apv})).\n- Total power supply at each time step t = \\\\param{soc_{t}} + \\\\param{pg1_{t}} + \\\\param{pg2_{t}} + \\\\param{pg3_{t}}+ (photovoltaic power at time step t) + \\\\param{wind_{t}}.\n- Excess energy = total power supply - \\\\param{load_{t}}, and no less than 0.\n- Shortage energy = \\\\param{load_{t}} - total power supply, and no less than 0.\n- The total cost structure includes the following:\n- Battery cost = \\\\var{Asoc_{t}} + 0.1 * \\\\param{soc_{t}};\n- Gen1 cost = 0.0034 * \\\\param{pg1_{t}}^2 + 3 * \\\\param{pg1_{t}} + 30;\n- Gen2 cost = 0.001 * \\\\param{pg2_{t}}^2 + 10 * \\\\param{pg2_{t}} + 40;\n- Gen3 cost = 0.001 * \\\\param{pg3_{t}}^2 + 15 * \\\\param{pg3_{t}} + 70;\n- Renewable energy cost = 0.01 * (\\\\param{pv_{t}} + \\\\param{wind_{t}});\n- Sales revenue = 0.5 * \\\\param{price_{t}} * (excess energy);\n- Purchase cost = \\\\param{price_{t}} * (shortage energy);\n- Excess penalty = max(0, (excess energy - 100) * 50);\n- Shortage penalty = max(0, (shortage energy - 100) * 50).\n- The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.\n- Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.\n- Decisions made at each time step will update \\\\param{soc_{}}, \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.\n",
    "parameters": {
        "TotalBatteryCapacity": {
            "shape": [],
            "definition": "The total capacity of the battery",
            "type": "int"
        },
        "InitialSoc": {
            "shape": [],
            "definition": "Initial state of charge of the battery",
            "type": "float"
        },
        "InitialPg1": {
            "shape": [],
            "definition": "Initial power of generator 1",
            "type": "float"
        },
        "InitialPg2": {
            "shape": [],
            "definition": "Initial power of generator 2",
            "type": "float"
        },
        "InitialPg3": {
            "shape": [],
            "definition": "Initial power of generator 3",
            "type": "float"
        },
        "Price": {
            "shape": [
                24
            ],
            "definition": "Electricity price at each time step",
            "type": "float"
        },
        "Load": {
            "shape": [
                24
            ],
            "definition": "Residential load at each time step",
            "type": "float"
        },
        "Pv": {
            "shape": [
                24
            ],
            "definition": "Photovoltaic power generation at each time step",
            "type": "float"
        },
        "Wind": {
            "shape": [
                24
            ],
            "definition": "Wind power generation at each time step",
            "type": "float"
        }
    }
 }
--- a/llma/data/state_2_objective.json
+++ b/llma/data/state_2_objective.json
@ -0,0 +1,63 @@
 {
    "description": "- The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.\n- The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\n- Electricity price, residential load, photovoltaic, and wind power generation are \\\\param{price_{}}, \\\\param{load_{}}, \\\\param{pv_{}}, and \\\\param{wind_{}}, each with 24 time steps representing hourly data.\n- Battery state of charge is \\\\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.\n- \\\\var{Apv_{}}, \\\\var{Asoc_{}}, \\\\var{Ag1_{}}, \\\\var{Ag2_{}}, and \\\\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.\n- Battery charging and discharging result in a capacity change of 100 * \\\\var{Asoc_{t}}.\n- Generators 1, 2, and 3 power change by 100 * \\\\var{Ag1_{t}}, 100 * \\\\var{Ag2_{t}}, and 200 * \\\\var{Ag3_{t}}.\n- \\\\param{soc_{t}} is calculated as (0.18 * \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.\n- \\\\param{pg1_{t}} is calculated as (100 * \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.\n- \\\\param{pg2_{t}} is calculated as (100 * \\\\var{Ag2_{t}} + \\\\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.\n- \\\\param{pg3_{t}} is calculated as (200 * \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.\n- Photovoltaic power is calculated as (\\\\param{pv_{t}} * (1 + \\\\var{Apv})).\n- Total power supply at each time step t = \\\\param{soc_{t}} + \\\\param{pg1_{t}} + \\\\param{pg2_{t}} + \\\\param{pg3_{t}}+ (photovoltaic power at time step t) + \\\\param{wind_{t}}.\n- Excess energy = total power supply - \\\\param{load_{t}}, and no less than 0.\n- Shortage energy = \\\\param{load_{t}} - total power supply, and no less than 0.\n- The total cost structure includes the following:\n- Battery cost = \\\\var{Asoc_{t}} + 0.1 * \\\\param{soc_{t}};\n- Gen1 cost = 0.0034 * \\\\param{pg1_{t}}^2 + 3 * \\\\param{pg1_{t}} + 30;\n- Gen2 cost = 0.001 * \\\\param{pg2_{t}}^2 + 10 * \\\\param{pg2_{t}} + 40;\n- Gen3 cost = 0.001 * \\\\param{pg3_{t}}^2 + 15 * \\\\param{pg3_{t}} + 70;\n- Renewable energy cost = 0.01 * (\\\\param{pv_{t}} + \\\\param{wind_{t}});\n- Sales revenue = 0.5 * \\\\param{price_{t}} * (excess energy);\n- Purchase cost = \\\\param{price_{t}} * (shortage energy);\n- Excess penalty = max(0, (excess energy - 100) * 50);\n- Shortage penalty = max(0, (shortage energy - 100) * 50).\n- The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.\n- Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.\n- Decisions made at each time step will update \\\\param{soc_{}}, \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.\n",
    "parameters": {
        "TotalBatteryCapacity": {
            "shape": [],
            "definition": "The total capacity of the battery",
            "type": "int"
        },
        "InitialSoc": {
            "shape": [],
            "definition": "Initial state of charge of the battery",
            "type": "float"
        },
        "InitialPg1": {
            "shape": [],
            "definition": "Initial power of generator 1",
            "type": "float"
        },
        "InitialPg2": {
            "shape": [],
            "definition": "Initial power of generator 2",
            "type": "float"
        },
        "InitialPg3": {
            "shape": [],
            "definition": "Initial power of generator 3",
            "type": "float"
        },
        "Price": {
            "shape": [
                24
            ],
            "definition": "Electricity price at each time step",
            "type": "float"
        },
        "Load": {
            "shape": [
                24
            ],
            "definition": "Residential load at each time step",
            "type": "float"
        },
        "Pv": {
            "shape": [
                24
            ],
            "definition": "Photovoltaic power generation at each time step",
            "type": "float"
        },
        "Wind": {
            "shape": [
                24
            ],
            "definition": "Wind power generation at each time step",
            "type": "float"
        }
    },
    "objective": {
        "description": "\"The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\"",
        "formulation": null,
        "code": null
    }
 }
--- a/llma/data/state_3_constraints.json
+++ b/llma/data/state_3_constraints.json
@ -0,0 +1,135 @@
 {
    "description": "- The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.\n- The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\n- Electricity price, residential load, photovoltaic, and wind power generation are \\\\param{price_{}}, \\\\param{load_{}}, \\\\param{pv_{}}, and \\\\param{wind_{}}, each with 24 time steps representing hourly data.\n- Battery state of charge is \\\\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.\n- \\\\var{Apv_{}}, \\\\var{Asoc_{}}, \\\\var{Ag1_{}}, \\\\var{Ag2_{}}, and \\\\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.\n- Battery charging and discharging result in a capacity change of 100 * \\\\var{Asoc_{t}}.\n- Generators 1, 2, and 3 power change by 100 * \\\\var{Ag1_{t}}, 100 * \\\\var{Ag2_{t}}, and 200 * \\\\var{Ag3_{t}}.\n- \\\\param{soc_{t}} is calculated as (0.18 * \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.\n- \\\\param{pg1_{t}} is calculated as (100 * \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.\n- \\\\param{pg2_{t}} is calculated as (100 * \\\\var{Ag2_{t}} + \\\\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.\n- \\\\param{pg3_{t}} is calculated as (200 * \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.\n- Photovoltaic power is calculated as (\\\\param{pv_{t}} * (1 + \\\\var{Apv})).\n- Total power supply at each time step t = \\\\param{soc_{t}} + \\\\param{pg1_{t}} + \\\\param{pg2_{t}} + \\\\param{pg3_{t}}+ (photovoltaic power at time step t) + \\\\param{wind_{t}}.\n- Excess energy = total power supply - \\\\param{load_{t}}, and no less than 0.\n- Shortage energy = \\\\param{load_{t}} - total power supply, and no less than 0.\n- The total cost structure includes the following:\n- Battery cost = \\\\var{Asoc_{t}} + 0.1 * \\\\param{soc_{t}};\n- Gen1 cost = 0.0034 * \\\\param{pg1_{t}}^2 + 3 * \\\\param{pg1_{t}} + 30;\n- Gen2 cost = 0.001 * \\\\param{pg2_{t}}^2 + 10 * \\\\param{pg2_{t}} + 40;\n- Gen3 cost = 0.001 * \\\\param{pg3_{t}}^2 + 15 * \\\\param{pg3_{t}} + 70;\n- Renewable energy cost = 0.01 * (\\\\param{pv_{t}} + \\\\param{wind_{t}});\n- Sales revenue = 0.5 * \\\\param{price_{t}} * (excess energy);\n- Purchase cost = \\\\param{price_{t}} * (shortage energy);\n- Excess penalty = max(0, (excess energy - 100) * 50);\n- Shortage penalty = max(0, (shortage energy - 100) * 50).\n- The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.\n- Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.\n- Decisions made at each time step will update \\\\param{soc_{}}, \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.\n",
    "parameters": {
        "TotalBatteryCapacity": {
            "shape": [],
            "definition": "The total capacity of the battery",
            "type": "int"
        },
        "InitialSoc": {
            "shape": [],
            "definition": "Initial state of charge of the battery",
            "type": "float"
        },
        "InitialPg1": {
            "shape": [],
            "definition": "Initial power of generator 1",
            "type": "float"
        },
        "InitialPg2": {
            "shape": [],
            "definition": "Initial power of generator 2",
            "type": "float"
        },
        "InitialPg3": {
            "shape": [],
            "definition": "Initial power of generator 3",
            "type": "float"
        },
        "Price": {
            "shape": [
                24
            ],
            "definition": "Electricity price at each time step",
            "type": "float"
        },
        "Load": {
            "shape": [
                24
            ],
            "definition": "Residential load at each time step",
            "type": "float"
        },
        "Pv": {
            "shape": [
                24
            ],
            "definition": "Photovoltaic power generation at each time step",
            "type": "float"
        },
        "Wind": {
            "shape": [
                24
            ],
            "definition": "Wind power generation at each time step",
            "type": "float"
        }
    },
    "objective": {
        "description": "\"The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\"",
        "formulation": null,
        "code": null
    },
    "constraints": [
        {
            "description": "Battery state of charge (soc_t) must be between 0.2 and 0.8",
            "formulation": null,
            "code": null
        },
        {
            "description": "Generator 1 power (pg1_t) must be between 10 and 150",
            "formulation": null,
            "code": null
        },
        {
            "description": "Generator 2 power (pg2_t) must be between 50 and 375",
            "formulation": null,
            "code": null
        },
        {
            "description": "Generator 3 power (pg3_t) must be between 100 and 500",
            "formulation": null,
            "code": null
        },
        {
            "description": "Battery state of charge (soc_t) must be calculated as (0.18 * Asoc_t + soc_t-1)",
            "formulation": null,
            "code": null
        },
        {
            "description": "Generator 1 power (pg1_t) must be calculated as (100 * Ag1_t + pg1_t-1)",
            "formulation": null,
            "code": null
        },
        {
            "description": "Generator 2 power (pg2_t) must be calculated as (100 * Ag2_t + pg2_t-1)",
            "formulation": null,
            "code": null
        },
        {
            "description": "Generator 3 power (pg3_t) must be calculated as (200 * Ag3_t + pg3_t-1)",
            "formulation": null,
            "code": null
        },
        {
            "description": "Photovoltaic power must be calculated as (pv_t * (1 + Apv))",
            "formulation": null,
            "code": null
        },
        {
            "description": "Excess energy must be no less than 0",
            "formulation": null,
            "code": null
        },
        {
            "description": "Shortage energy must be no less than 0",
            "formulation": null,
            "code": null
        },
        {
            "description": "Adjustment factors (Apv, Asoc, Ag1, Ag2, Ag3) must range from -1 to 1",
            "formulation": null,
            "code": null
        },
        {
            "description": "Battery charging and discharging must result in a capacity change of 100 * Asoc_t",
            "formulation": null,
            "code": null
        },
        {
            "description": "The sum of power from generators and renewable sources must not exceed the residential load by more than the grid transaction limit to avoid instability",
            "formulation": null,
            "code": null
        }
    ]
 }
--- a/llma/data/state_4_constraints_modeled.json
+++ b/llma/data/state_4_constraints_modeled.json
@ -0,0 +1,217 @@
 {
    "description": "- The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.\n- The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\n- Electricity price, residential load, photovoltaic, and wind power generation are \\\\param{price_{}}, \\\\param{load_{}}, \\\\param{pv_{}}, and \\\\param{wind_{}}, each with 24 time steps representing hourly data.\n- Battery state of charge is \\\\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.\n- \\\\var{Apv_{}}, \\\\var{Asoc_{}}, \\\\var{Ag1_{}}, \\\\var{Ag2_{}}, and \\\\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.\n- Battery charging and discharging result in a capacity change of 100 * \\\\var{Asoc_{t}}.\n- Generators 1, 2, and 3 power change by 100 * \\\\var{Ag1_{t}}, 100 * \\\\var{Ag2_{t}}, and 200 * \\\\var{Ag3_{t}}.\n- \\\\param{soc_{t}} is calculated as (0.18 * \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.\n- \\\\param{pg1_{t}} is calculated as (100 * \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.\n- \\\\param{pg2_{t}} is calculated as (100 * \\\\var{Ag2_{t}} + \\\\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.\n- \\\\param{pg3_{t}} is calculated as (200 * \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.\n- Photovoltaic power is calculated as (\\\\param{pv_{t}} * (1 + \\\\var{Apv})).\n- Total power supply at each time step t = \\\\param{soc_{t}} + \\\\param{pg1_{t}} + \\\\param{pg2_{t}} + \\\\param{pg3_{t}}+ (photovoltaic power at time step t) + \\\\param{wind_{t}}.\n- Excess energy = total power supply - \\\\param{load_{t}}, and no less than 0.\n- Shortage energy = \\\\param{load_{t}} - total power supply, and no less than 0.\n- The total cost structure includes the following:\n- Battery cost = \\\\var{Asoc_{t}} + 0.1 * \\\\param{soc_{t}};\n- Gen1 cost = 0.0034 * \\\\param{pg1_{t}}^2 + 3 * \\\\param{pg1_{t}} + 30;\n- Gen2 cost = 0.001 * \\\\param{pg2_{t}}^2 + 10 * \\\\param{pg2_{t}} + 40;\n- Gen3 cost = 0.001 * \\\\param{pg3_{t}}^2 + 15 * \\\\param{pg3_{t}} + 70;\n- Renewable energy cost = 0.01 * (\\\\param{pv_{t}} + \\\\param{wind_{t}});\n- Sales revenue = 0.5 * \\\\param{price_{t}} * (excess energy);\n- Purchase cost = \\\\param{price_{t}} * (shortage energy);\n- Excess penalty = max(0, (excess energy - 100) * 50);\n- Shortage penalty = max(0, (shortage energy - 100) * 50).\n- The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.\n- Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.\n- Decisions made at each time step will update \\\\param{soc_{}}, \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.\n",
    "parameters": {
        "TotalBatteryCapacity": {
            "shape": [],
            "definition": "The total capacity of the battery",
            "type": "int"
        },
        "InitialSoc": {
            "shape": [],
            "definition": "Initial state of charge of the battery",
            "type": "float"
        },
        "InitialPg1": {
            "shape": [],
            "definition": "Initial power of generator 1",
            "type": "float"
        },
        "InitialPg2": {
            "shape": [],
            "definition": "Initial power of generator 2",
            "type": "float"
        },
        "InitialPg3": {
            "shape": [],
            "definition": "Initial power of generator 3",
            "type": "float"
        },
        "Price": {
            "shape": [
                24
            ],
            "definition": "Electricity price at each time step",
            "type": "float"
        },
        "Load": {
            "shape": [
                24
            ],
            "definition": "Residential load at each time step",
            "type": "float"
        },
        "Pv": {
            "shape": [
                24
            ],
            "definition": "Photovoltaic power generation at each time step",
            "type": "float"
        },
        "Wind": {
            "shape": [
                24
            ],
            "definition": "Wind power generation at each time step",
            "type": "float"
        }
    },
    "objective": {
        "description": "\"The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\"",
        "formulation": null,
        "code": null
    },
    "constraints": [
        {
            "description": "Battery state of charge (soc_t) must be between 0.2 and 0.8",
            "formulation": "$0.2 \\leq soc_t \\leq 0.8$",
            "code": null
        },
        {
            "description": "Generator 1 power (pg1_t) must be between 10 and 150",
            "formulation": "$10 \\leq pg1_t \\leq 150$",
            "code": null
        },
        {
            "description": "Generator 2 power (pg2_t) must be between 50 and 375",
            "formulation": "$50 \\leq pg2_t \\leq 375$",
            "code": null
        },
        {
            "description": "Generator 3 power (pg3_t) must be between 100 and 500",
            "formulation": "$100 \\\\leq pg3_t \\\\leq 500$",
            "code": null
        },
        {
            "description": "Battery state of charge (soc_t) must be calculated as (0.18 * Asoc_t + soc_t-1)",
            "formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{soc_{t}} = 0.18 \\\\cdot \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}$",
            "code": null
        },
        {
            "description": "Generator 1 power (pg1_t) must be calculated as (100 * Ag1_t + pg1_t-1)",
            "formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{pg1_{t}} = 100 \\\\cdot \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}$",
            "code": null
        },
        {
            "description": "Generator 2 power (pg2_t) must be calculated as (100 * Ag2_t + pg2_t-1)",
            "formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{pg2_{t}} = 100 \\\\times Ag2_t + \\\\param{pg2_{t-1}}$",
            "code": null
        },
        {
            "description": "Generator 3 power (pg3_t) must be calculated as (200 * Ag3_t + pg3_t-1)",
            "formulation": "$\\\\forall t \\\\in \\\\{1,\\\\ldots,24\\\\}, \\\\param{pg3_{t}} = 200 \\\\cdot \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}$",
            "code": null
        },
        {
            "description": "Photovoltaic power must be calculated as (pv_t * (1 + Apv))",
            "formulation": "$\\\\forall t, PvPower_t = Pv[t] \\\\times (1 + Apv_t)$",
            "code": null
        },
        {
            "description": "Excess energy must be no less than 0",
            "formulation": "$\\\\forall t, ExcessEnergy[t] \\\\geq 0$",
            "code": null
        },
        {
            "description": "auxiliary constraint",
            "formulation": "$\\\\forall t, ExcessEnergy[t] = TotalSupply[t] - Load[t]$"
        },
        {
            "description": "auxiliary constraint",
            "formulation": "$\\\\forall t, TotalSupply[t] = soc[t] + pg1[t] + pg2[t] + pg3[t] + PvPower[t] + Wind[t]$"
        },
        {
            "description": "Shortage energy must be no less than 0",
            "formulation": "$\\\\forall t, ShortageEnergy[t] \\\\geq 0$",
            "code": null
        },
        {
            "description": "Adjustment factors (Apv, Asoc, Ag1, Ag2, Ag3) must range from -1 to 1",
            "formulation": "$-1 \\leq Apv_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Asoc_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag1_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag2_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag3_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$",
            "code": null
        },
        {
            "description": "Battery charging and discharging must result in a capacity change of 100 * Asoc_t",
            "formulation": "$\\\\forall t, 100 \\\\times Asoc_t = soc_{t} - soc_{t-1}$",
            "code": null
        },
        {
            "description": "The sum of power from generators and renewable sources must not exceed the residential load by more than the grid transaction limit to avoid instability",
            "formulation": "$\\\\forall t, TotalPowerSupply[t] \\\\leq Load[t] + GridTransactionLimit$",
            "code": null
        },
        {
            "description": "auxiliary constraint",
            "formulation": "$\\\\forall t, TotalPowerSupply[t] = Pg1[t] + Pg2[t] + Pg3[t] + PvPower[t] + Wind[t]$"
        }
    ],
    "variables": {
        "Ag1": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "Adjustment factor for generator 1 power at each time step"
        },
        "Ag2": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "Adjustment factor for generator 2 power at each time step"
        },
        "Ag3": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "Adjustment factor for generator 3 power at each time step"
        },
        "PvPower": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "The adjusted photovoltaic power generation at each time step"
        },
        "Apv": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "The adjustment factor for photovoltaic power at each time step"
        },
        "ExcessEnergy": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "The excess energy at each time step"
        },
        "ShortageEnergy": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "The shortage energy at each time step"
        },
        "Asoc": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "The adjustment factor for the battery state of charge at each time step"
        },
        "TotalPowerSupply": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "The total power supply from generators and renewable sources at each time step"
        },
        "GridTransactionLimit": {
            "shape": [],
            "type": "float",
            "definition": "The maximum allowable excess power supply over the residential load to maintain grid stability"
        }
    }
 }
--- a/llma/data/state_5_objective_modeled.json
+++ b/llma/data/state_5_objective_modeled.json
@ -0,0 +1,216 @@
 {
    "description": "- The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.\n- The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\n- Electricity price, residential load, photovoltaic, and wind power generation are \\\\param{price_{}}, \\\\param{load_{}}, \\\\param{pv_{}}, and \\\\param{wind_{}}, each with 24 time steps representing hourly data.\n- Battery state of charge is \\\\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.\n- \\\\var{Apv_{}}, \\\\var{Asoc_{}}, \\\\var{Ag1_{}}, \\\\var{Ag2_{}}, and \\\\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.\n- Battery charging and discharging result in a capacity change of 100 * \\\\var{Asoc_{t}}.\n- Generators 1, 2, and 3 power change by 100 * \\\\var{Ag1_{t}}, 100 * \\\\var{Ag2_{t}}, and 200 * \\\\var{Ag3_{t}}.\n- \\\\param{soc_{t}} is calculated as (0.18 * \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.\n- \\\\param{pg1_{t}} is calculated as (100 * \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.\n- \\\\param{pg2_{t}} is calculated as (100 * \\\\var{Ag2_{t}} + \\\\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.\n- \\\\param{pg3_{t}} is calculated as (200 * \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.\n- Photovoltaic power is calculated as (\\\\param{pv_{t}} * (1 + \\\\var{Apv})).\n- Total power supply at each time step t = \\\\param{soc_{t}} + \\\\param{pg1_{t}} + \\\\param{pg2_{t}} + \\\\param{pg3_{t}}+ (photovoltaic power at time step t) + \\\\param{wind_{t}}.\n- Excess energy = total power supply - \\\\param{load_{t}}, and no less than 0.\n- Shortage energy = \\\\param{load_{t}} - total power supply, and no less than 0.\n- The total cost structure includes the following:\n- Battery cost = \\\\var{Asoc_{t}} + 0.1 * \\\\param{soc_{t}};\n- Gen1 cost = 0.0034 * \\\\param{pg1_{t}}^2 + 3 * \\\\param{pg1_{t}} + 30;\n- Gen2 cost = 0.001 * \\\\param{pg2_{t}}^2 + 10 * \\\\param{pg2_{t}} + 40;\n- Gen3 cost = 0.001 * \\\\param{pg3_{t}}^2 + 15 * \\\\param{pg3_{t}} + 70;\n- Renewable energy cost = 0.01 * (\\\\param{pv_{t}} + \\\\param{wind_{t}});\n- Sales revenue = 0.5 * \\\\param{price_{t}} * (excess energy);\n- Purchase cost = \\\\param{price_{t}} * (shortage energy);\n- Excess penalty = max(0, (excess energy - 100) * 50);\n- Shortage penalty = max(0, (shortage energy - 100) * 50).\n- The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.\n- Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.\n- Decisions made at each time step will update \\\\param{soc_{}}, \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.\n",
    "parameters": {
        "TotalBatteryCapacity": {
            "shape": [],
            "definition": "The total capacity of the battery",
            "type": "int"
        },
        "InitialSoc": {
            "shape": [],
            "definition": "Initial state of charge of the battery",
            "type": "float"
        },
        "InitialPg1": {
            "shape": [],
            "definition": "Initial power of generator 1",
            "type": "float"
        },
        "InitialPg2": {
            "shape": [],
            "definition": "Initial power of generator 2",
            "type": "float"
        },
        "InitialPg3": {
            "shape": [],
            "definition": "Initial power of generator 3",
            "type": "float"
        },
        "Price": {
            "shape": [
                24
            ],
            "definition": "Electricity price at each time step",
            "type": "float"
        },
        "Load": {
            "shape": [
                24
            ],
            "definition": "Residential load at each time step",
            "type": "float"
        },
        "Pv": {
            "shape": [
                24
            ],
            "definition": "Photovoltaic power generation at each time step",
            "type": "float"
        },
        "Wind": {
            "shape": [
                24
            ],
            "definition": "Wind power generation at each time step",
            "type": "float"
        }
    },
    "objective": {
        "description": "\"The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\"",
        "formulation": "$\\min \\sum_{t=1}^{24} \\left( \\text{Battery cost}_t + \\text{Gen1 cost}_t + \\text{Gen2 cost}_t + \\text{Gen3 cost}_t + \\text{Renewable energy cost}_t + \\text{Excess penalty}_t + \\text{Shortage penalty}_t - \\text{Sales revenue}_t + \\text{Purchase cost}_t + |\\text{ExcessEnergy}_t| + |\\text{ShortageEnergy}_t| \\right)$"
    },
    "constraints": [
        {
            "description": "Battery state of charge (soc_t) must be between 0.2 and 0.8",
            "formulation": "$0.2 \\leq soc_t \\leq 0.8$",
            "code": null
        },
        {
            "description": "Generator 1 power (pg1_t) must be between 10 and 150",
            "formulation": "$10 \\leq pg1_t \\leq 150$",
            "code": null
        },
        {
            "description": "Generator 2 power (pg2_t) must be between 50 and 375",
            "formulation": "$50 \\leq pg2_t \\leq 375$",
            "code": null
        },
        {
            "description": "Generator 3 power (pg3_t) must be between 100 and 500",
            "formulation": "$100 \\\\leq pg3_t \\\\leq 500$",
            "code": null
        },
        {
            "description": "Battery state of charge (soc_t) must be calculated as (0.18 * Asoc_t + soc_t-1)",
            "formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{soc_{t}} = 0.18 \\\\cdot \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}$",
            "code": null
        },
        {
            "description": "Generator 1 power (pg1_t) must be calculated as (100 * Ag1_t + pg1_t-1)",
            "formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{pg1_{t}} = 100 \\\\cdot \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}$",
            "code": null
        },
        {
            "description": "Generator 2 power (pg2_t) must be calculated as (100 * Ag2_t + pg2_t-1)",
            "formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{pg2_{t}} = 100 \\\\times Ag2_t + \\\\param{pg2_{t-1}}$",
            "code": null
        },
        {
            "description": "Generator 3 power (pg3_t) must be calculated as (200 * Ag3_t + pg3_t-1)",
            "formulation": "$\\\\forall t \\\\in \\\\{1,\\\\ldots,24\\\\}, \\\\param{pg3_{t}} = 200 \\\\cdot \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}$",
            "code": null
        },
        {
            "description": "Photovoltaic power must be calculated as (pv_t * (1 + Apv))",
            "formulation": "$\\\\forall t, PvPower_t = Pv[t] \\\\times (1 + Apv_t)$",
            "code": null
        },
        {
            "description": "Excess energy must be no less than 0",
            "formulation": "$\\\\forall t, ExcessEnergy[t] \\\\geq 0$",
            "code": null
        },
        {
            "description": "auxiliary constraint",
            "formulation": "$\\\\forall t, ExcessEnergy[t] = TotalSupply[t] - Load[t]$"
        },
        {
            "description": "auxiliary constraint",
            "formulation": "$\\\\forall t, TotalSupply[t] = soc[t] + pg1[t] + pg2[t] + pg3[t] + PvPower[t] + Wind[t]$"
        },
        {
            "description": "Shortage energy must be no less than 0",
            "formulation": "$\\\\forall t, ShortageEnergy[t] \\\\geq 0$",
            "code": null
        },
        {
            "description": "Adjustment factors (Apv, Asoc, Ag1, Ag2, Ag3) must range from -1 to 1",
            "formulation": "$-1 \\leq Apv_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Asoc_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag1_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag2_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag3_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$",
            "code": null
        },
        {
            "description": "Battery charging and discharging must result in a capacity change of 100 * Asoc_t",
            "formulation": "$\\\\forall t, 100 \\\\times Asoc_t = soc_{t} - soc_{t-1}$",
            "code": null
        },
        {
            "description": "The sum of power from generators and renewable sources must not exceed the residential load by more than the grid transaction limit to avoid instability",
            "formulation": "$\\\\forall t, TotalPowerSupply[t] \\\\leq Load[t] + GridTransactionLimit$",
            "code": null
        },
        {
            "description": "auxiliary constraint",
            "formulation": "$\\\\forall t, TotalPowerSupply[t] = Pg1[t] + Pg2[t] + Pg3[t] + PvPower[t] + Wind[t]$"
        }
    ],
    "variables": {
        "Ag1": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "Adjustment factor for generator 1 power at each time step"
        },
        "Ag2": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "Adjustment factor for generator 2 power at each time step"
        },
        "Ag3": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "Adjustment factor for generator 3 power at each time step"
        },
        "PvPower": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "The adjusted photovoltaic power generation at each time step"
        },
        "Apv": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "The adjustment factor for photovoltaic power at each time step"
        },
        "ExcessEnergy": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "The excess energy at each time step"
        },
        "ShortageEnergy": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "The shortage energy at each time step"
        },
        "Asoc": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "The adjustment factor for the battery state of charge at each time step"
        },
        "TotalPowerSupply": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "The total power supply from generators and renewable sources at each time step"
        },
        "GridTransactionLimit": {
            "shape": [],
            "type": "float",
            "definition": "The maximum allowable excess power supply over the residential load to maintain grid stability"
        }
    }
 }
--- a/llma/data/state_6_code.json
+++ b/llma/data/state_6_code.json
@ -0,0 +1,220 @@
 {
    "description": "- The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.\n- The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\n- Electricity price, residential load, photovoltaic, and wind power generation are \\\\param{price_{}}, \\\\param{load_{}}, \\\\param{pv_{}}, and \\\\param{wind_{}}, each with 24 time steps representing hourly data.\n- Battery state of charge is \\\\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.\n- \\\\var{Apv_{}}, \\\\var{Asoc_{}}, \\\\var{Ag1_{}}, \\\\var{Ag2_{}}, and \\\\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.\n- Battery charging and discharging result in a capacity change of 100 * \\\\var{Asoc_{t}}.\n- Generators 1, 2, and 3 power change by 100 * \\\\var{Ag1_{t}}, 100 * \\\\var{Ag2_{t}}, and 200 * \\\\var{Ag3_{t}}.\n- \\\\param{soc_{t}} is calculated as (0.18 * \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.\n- \\\\param{pg1_{t}} is calculated as (100 * \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.\n- \\\\param{pg2_{t}} is calculated as (100 * \\\\var{Ag2_{t}} + \\\\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.\n- \\\\param{pg3_{t}} is calculated as (200 * \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.\n- Photovoltaic power is calculated as (\\\\param{pv_{t}} * (1 + \\\\var{Apv})).\n- Total power supply at each time step t = \\\\param{soc_{t}} + \\\\param{pg1_{t}} + \\\\param{pg2_{t}} + \\\\param{pg3_{t}}+ (photovoltaic power at time step t) + \\\\param{wind_{t}}.\n- Excess energy = total power supply - \\\\param{load_{t}}, and no less than 0.\n- Shortage energy = \\\\param{load_{t}} - total power supply, and no less than 0.\n- The total cost structure includes the following:\n- Battery cost = \\\\var{Asoc_{t}} + 0.1 * \\\\param{soc_{t}};\n- Gen1 cost = 0.0034 * \\\\param{pg1_{t}}^2 + 3 * \\\\param{pg1_{t}} + 30;\n- Gen2 cost = 0.001 * \\\\param{pg2_{t}}^2 + 10 * \\\\param{pg2_{t}} + 40;\n- Gen3 cost = 0.001 * \\\\param{pg3_{t}}^2 + 15 * \\\\param{pg3_{t}} + 70;\n- Renewable energy cost = 0.01 * (\\\\param{pv_{t}} + \\\\param{wind_{t}});\n- Sales revenue = 0.5 * \\\\param{price_{t}} * (excess energy);\n- Purchase cost = \\\\param{price_{t}} * (shortage energy);\n- Excess penalty = max(0, (excess energy - 100) * 50);\n- Shortage penalty = max(0, (shortage energy - 100) * 50).\n- The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.\n- Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.\n- Decisions made at each time step will update \\\\param{soc_{}}, \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.\n",
    "parameters": {
        "TotalBatteryCapacity": {
            "shape": [],
            "definition": "The total capacity of the battery",
            "type": "int"
        },
        "InitialSoc": {
            "shape": [],
            "definition": "Initial state of charge of the battery",
            "type": "float"
        },
        "InitialPg1": {
            "shape": [],
            "definition": "Initial power of generator 1",
            "type": "float"
        },
        "InitialPg2": {
            "shape": [],
            "definition": "Initial power of generator 2",
            "type": "float"
        },
        "InitialPg3": {
            "shape": [],
            "definition": "Initial power of generator 3",
            "type": "float"
        },
        "Price": {
            "shape": [
                24
            ],
            "definition": "Electricity price at each time step",
            "type": "float"
        },
        "Load": {
            "shape": [
                24
            ],
            "definition": "Residential load at each time step",
            "type": "float"
        },
        "Pv": {
            "shape": [
                24
            ],
            "definition": "Photovoltaic power generation at each time step",
            "type": "float"
        },
        "Wind": {
            "shape": [
                24
            ],
            "definition": "Wind power generation at each time step",
            "type": "float"
        }
    },
    "objective": {
        "description": "\"The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\"",
        "formulation": "$\\min \\sum_{t=1}^{24} \\left( \\text{Battery cost}_t + \\text{Gen1 cost}_t + \\text{Gen2 cost}_t + \\text{Gen3 cost}_t + \\text{Renewable energy cost}_t + \\text{Excess penalty}_t + \\text{Shortage penalty}_t - \\text{Sales revenue}_t + \\text{Purchase cost}_t + |\\text{ExcessEnergy}_t| + |\\text{ShortageEnergy}_t| \\right)$",
        "code": "model.setObjective(\n    quicksum(\n        (Asoc[t] + 0.1 * soc[t-1] +\n         0.0034 * pg1[t-1] ** 2 + 3 * pg1[t-1] + 30 +\n         0.001 * pg2[t-1] ** 2 + 10 * pg2[t-1] + 40 +\n         0.001 * pg3[t-1] ** 2 + 15 * pg3[t-1] + 70 +\n         0.01 * (pv[t-1] + wind[t-1]) +\n         max(0, (ExcessEnergy[t] - 100) * 50) +\n         max(0, (ShortageEnergy[t] - 100) * 50) -\n         0.5 * Price[t-1] * ExcessEnergy[t] +\n         Price[t-1] * ShortageEnergy[t] +\n         abs(ExcessEnergy[t]) + abs(ShortageEnergy[t])\n        ) for t in range(1, 25)\n    ),\n    GRB.MINIMIZE\n)"
    },
    "constraints": [
        {
            "description": "Battery state of charge (soc_t) must be between 0.2 and 0.8",
            "formulation": "$0.2 \\leq soc_t \\leq 0.8$",
            "code": "# Preceding state of charge variable initialization\nsoc = InitialSoc\n\n# Loop over each time step to add the soc constraints\nfor t in range(24):\n    # Calculate soc_t based on the previous soc and the adjustment factor\n    soc_t = 0.18 * Asoc[t] + soc\n    \n    # Add the lower bound constraint for soc_t\n    model.addConstr(soc_t >= 0.2)\n    \n    # Add the upper bound constraint for soc_t\n    model.addConstr(soc_t <= 0.8)\n    \n    # Update soc to the newly calculated soc_t for the next iteration\n    soc = soc_t"
        },
        {
            "description": "Generator 1 power (pg1_t) must be between 10 and 150",
            "formulation": "$10 \\leq pg1_t \\leq 150$",
            "code": "# Assuming decision variables Pg1[t] have been defined for each time step t\nfor t in range(24):\n    if t == 0:\n        # For the first time step, use the initial value of Pg1\n        model.addConstr(Pg1[t] <= 150)\n        model.addConstr(Pg1[t] >= 10)\n    else:\n        # For subsequent time steps, calculate Pg1 based on the previous time step and Ag1\n        model.addConstr(Pg1[t] <= 150)\n        model.addConstr(Pg1[t] >= 10)\n        model.addConstr(Pg1[t] == 100 * Ag1[t] + Pg1[t-1])"
        },
        {
            "description": "Generator 2 power (pg2_t) must be between 50 and 375",
            "formulation": "$50 \\leq pg2_t \\leq 375$",
            "code": "for t in range(1, 24):\n    model.addConstr(InitialPg2 + 100 * Ag2[t] >= 50)\n    model.addConstr(InitialPg2 + 100 * Ag2[t] <= 375)"
        },
        {
            "description": "Generator 3 power (pg3_t) must be between 100 and 500",
            "formulation": "$100 \\\\leq pg3_t \\\\leq 500$",
            "code": "for t in range(24):\n    model.addConstr(Pg3[t] >= 100)  # Lower bound constraint for each time step\n    model.addConstr(Pg3[t] <= 500)  # Upper bound constraint for each time step"
        },
        {
            "description": "Battery state of charge (soc_t) must be calculated as (0.18 * Asoc_t + soc_t-1)",
            "formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{soc_{t}} = 0.18 \\\\cdot \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}$",
            "code": "# Assuming soc is defined as a list of decision variables for each time step\nsoc = model.addVars(24, lb=0.2, ub=0.8, vtype=GRB.CONTINUOUS, name=\"soc\")\n\n# Loop over each time step starting from 1 up to 24\nfor t in range(1, 24):\n    # Add the constraint for the state of charge calculation\n    model.addConstr(soc[t] == 0.18 * Asoc[t] + soc[t-1])"
        },
        {
            "description": "Generator 1 power (pg1_t) must be calculated as (100 * Ag1_t + pg1_t-1)",
            "formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{pg1_{t}} = 100 \\\\cdot \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}$",
            "code": "for t in range(1, 24):\n    model.addConstr(pg1[t] == 100 * Ag1[t] + pg1[t-1])"
        },
        {
            "description": "Generator 2 power (pg2_t) must be calculated as (100 * Ag2_t + pg2_t-1)",
            "formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{pg2_{t}} = 100 \\\\times Ag2_t + \\\\param{pg2_{t-1}}$",
            "code": "# Assuming that `pg2` is a list of Gurobi variables representing the power of Generator 2 at each time step\n# and that `InitialPg2` is a parameter representing the initial power of Generator 2 at time step 0\n\n# Define the Generator 2 power for the first time step, which is the initial value and does not have a constraint\npg2 = [model.addVar(vtype=GRB.CONTINUOUS, lb=50, ub=375, name=\"pg2_0\")]\n\n# Now add the constraints for time steps 1 to 23\nfor t in range(1, 24):\n    pg2.append(model.addVar(vtype=GRB.CONTINUOUS, lb=50, ub=375, name=f\"pg2_{t}\"))\n    model.addConstr(pg2[t] == 100 * Ag2[t-1] + pg2[t-1])"
        },
        {
            "description": "Generator 3 power (pg3_t) must be calculated as (200 * Ag3_t + pg3_t-1)",
            "formulation": "$\\\\forall t \\\\in \\\\{1,\\\\ldots,24\\\\}, \\\\param{pg3_{t}} = 200 \\\\cdot \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}$",
            "code": "# Assuming Pg3 is defined as a list of decision variables representing power of generator 3 at each time step\nfor t in range(1, 24):\n    model.addConstr(Pg3[t] == 200 * Ag3[t] + Pg3[t-1])"
        },
        {
            "description": "Photovoltaic power must be calculated as (pv_t * (1 + Apv))",
            "formulation": "$\\\\forall t, PvPower_t = Pv[t] \\\\times (1 + Apv_t)$",
            "code": "for t in range(24):\n    model.addConstr(PvPower[t] == Pv[t] * (1 + Apv[t]))"
        },
        {
            "description": "Excess energy must be no less than 0",
            "formulation": "$\\\\forall t, ExcessEnergy[t] \\\\geq 0$",
            "code": "for t in range(24):\n    model.addConstr(ExcessEnergy[t] >= 0)"
        },
        {
            "description": "auxiliary constraint",
            "formulation": "$\\\\forall t, ExcessEnergy[t] = TotalSupply[t] - Load[t]$",
            "code": "for t in range(24):\n    # Define the total power supply at time step t\n    TotalSupply_t = (InitialPg1 + Ag1[t] * 100 +\n                     InitialPg2 + Ag2[t] * 100 +\n                     InitialPg3 + Ag3[t] * 200 +\n                     Pv[t] * (1 + Apv[t]) +\n                     Wind[t] +\n                     InitialSoc + Asoc[t] * 0.18)\n    # Add the constraint for excess energy at time step t\n    model.addConstr(ExcessEnergy[t] == TotalSupply_t - Load[t])"
        },
        {
            "description": "auxiliary constraint",
            "formulation": "$\\\\forall t, TotalSupply[t] = soc[t] + pg1[t] + pg2[t] + pg3[t] + PvPower[t] + Wind[t]$",
            "code": "for t in range(24):\n    # Calculate updated generator powers and battery state of charge\n    soc_t = InitialSoc + 0.18 * Asoc[t] if t == 0 else soc[t-1] + 0.18 * Asoc[t]\n    pg1_t = InitialPg1 + 100 * Ag1[t] if t == 0 else pg1[t-1] + 100 * Ag1[t]\n    pg2_t = InitialPg2 + 100 * Ag2[t] if t == 0 else pg2[t-1] + 100 * Ag2[t]\n    pg3_t = InitialPg3 + 200 * Ag3[t] if t == 0 else pg3[t-1] + 200 * Ag3[t]\n\n    # Calculate photovoltaic power\n    PvPower_t = Pv[t] * (1 + Apv[t])\n\n    # Add the constraint for total power supply at time t\n    model.addConstr(TotalPowerSupply[t] == soc_t + pg1_t + pg2_t + pg3_t + PvPower_t + Wind[t])"
        },
        {
            "description": "Shortage energy must be no less than 0",
            "formulation": "$\\\\forall t, ShortageEnergy[t] \\\\geq 0$",
            "code": "for t in range(24):\n    model.addConstr(ShortageEnergy[t] >= 0)"
        },
        {
            "description": "Adjustment factors (Apv, Asoc, Ag1, Ag2, Ag3) must range from -1 to 1",
            "formulation": "$-1 \\leq Apv_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Asoc_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag1_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag2_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag3_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$",
            "code": "for t in range(24):\n    model.addConstr(-1 <= Apv[t])\n    model.addConstr(Apv[t] <= 1)\n    model.addConstr(-1 <= Asoc[t])\n    model.addConstr(Asoc[t] <= 1)\n    model.addConstr(-1 <= Ag1[t])\n    model.addConstr(Ag1[t] <= 1)\n    model.addConstr(-1 <= Ag2[t])\n    model.addConstr(Ag2[t] <= 1)\n    model.addConstr(-1 <= Ag3[t])\n    model.addConstr(Ag3[t] <= 1)"
        },
        {
            "description": "Battery charging and discharging must result in a capacity change of 100 * Asoc_t",
            "formulation": "$\\\\forall t, 100 \\\\times Asoc_t = soc_{t} - soc_{t-1}$",
            "code": "for t in range(1, 24):\n    model.addConstr(100 * Asoc[t] == soc[t] - soc[t-1])"
        },
        {
            "description": "The sum of power from generators and renewable sources must not exceed the residential load by more than the grid transaction limit to avoid instability",
            "formulation": "$\\\\forall t, TotalPowerSupply[t] \\\\leq Load[t] + GridTransactionLimit$",
            "code": "for t in range(24):\n    # Calculate the power from each generator based on previous state and adjustment\n    pg1_power = InitialPg1 + 100 * Ag1[t]\n    pg2_power = InitialPg2 + 100 * Ag2[t]\n    pg3_power = InitialPg3 + 200 * Ag3[t]\n\n    # Calculate the adjusted photovoltaic power\n    pv_power = Pv[t] * (1 + Apv[t])\n\n    # Express the total power supply\n    TotalPowerSupply[t] = pg1_power + pg2_power + pg3_power + pv_power + Wind[t]\n\n    # Add the constraint that total power supply cannot exceed the load by more than the grid transaction limit\n    model.addConstr(TotalPowerSupply[t] <= Load[t] + GridTransactionLimit)"
        },
        {
            "description": "auxiliary constraint",
            "formulation": "$\\\\forall t, TotalPowerSupply[t] = Pg1[t] + Pg2[t] + Pg3[t] + PvPower[t] + Wind[t]$",
            "code": "for t in range(24):\n    # Assuming that Pg1, Pg2, Pg3 are defined as variables in the model with the correct initial values and adjustments\n    Pg1[t] = model.addVar(vtype=GRB.CONTINUOUS, name=f\"Pg1_{t}\")\n    Pg2[t] = model.addVar(vtype=GRB.CONTINUOUS, name=f\"Pg2_{t}\")\n    Pg3[t] = model.addVar(vtype=GRB.CONTINUOUS, name=f\"Pg3_{t}\")\n\n    # Define the auxiliary constraint for each time step\n    model.addConstr(TotalPowerSupply[t] == Pg1[t] + Pg2[t] + Pg3[t] + PvPower[t] + Wind[t])"
        }
    ],
    "variables": {
        "Ag1": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "Adjustment factor for generator 1 power at each time step"
        },
        "Ag2": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "Adjustment factor for generator 2 power at each time step"
        },
        "Ag3": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "Adjustment factor for generator 3 power at each time step"
        },
        "PvPower": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "The adjusted photovoltaic power generation at each time step"
        },
        "Apv": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "The adjustment factor for photovoltaic power at each time step"
        },
        "ExcessEnergy": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "The excess energy at each time step"
        },
        "ShortageEnergy": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "The shortage energy at each time step"
        },
        "Asoc": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "The adjustment factor for the battery state of charge at each time step"
        },
        "TotalPowerSupply": {
            "shape": [
                24
            ],
            "type": "continuous",
            "definition": "The total power supply from generators and renewable sources at each time step"
        },
        "GridTransactionLimit": {
            "shape": [],
            "type": "float",
            "definition": "The maximum allowable excess power supply over the residential load to maintain grid stability"
        }
    }
 }
--- a/Show More
+++ b/Show More