论文提交

This commit is contained in:
chenxiaodong 2024-11-22 10:03:31 +08:00
commit 83b1abbf76
129 changed files with 359057 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
__pycache__/
.idea/
gurobi.log

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 125 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 146 KiB

BIN
AgentDDPG/actor.pth Normal file

Binary file not shown.

BIN
AgentDDPG/loss_data.pkl Normal file

Binary file not shown.

BIN
AgentDDPG/reward_data.pkl Normal file

Binary file not shown.

BIN
AgentDDPG/test_data.pkl Normal file

Binary file not shown.

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 125 KiB

4910
AgentPPO/DRL_10_plots/rl.svg Normal file

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 141 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 451 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 126 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 513 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 144 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 160 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 125 KiB

BIN
AgentPPO/actor_10.pth Normal file

Binary file not shown.

BIN
AgentPPO/actor_llm_1015.pth Normal file

Binary file not shown.

BIN
AgentPPO/actor_ram.pth Normal file

Binary file not shown.

BIN
AgentPPO/loss_10.pkl Normal file

Binary file not shown.

BIN
AgentPPO/loss_llm_1015.pkl Normal file

Binary file not shown.

BIN
AgentPPO/loss_ram.pkl Normal file

Binary file not shown.

BIN
AgentPPO/melt_llm.pkl Normal file

Binary file not shown.

BIN
AgentPPO/melt_lmppo.pkl Normal file

Binary file not shown.

BIN
AgentPPO/melt_milp.pkl Normal file

Binary file not shown.

BIN
AgentPPO/melt_ppo.pkl Normal file

Binary file not shown.

BIN
AgentPPO/melt_ram.pkl Normal file

Binary file not shown.

BIN
AgentPPO/reward_10.pkl Normal file

Binary file not shown.

Binary file not shown.

BIN
AgentPPO/reward_ram.pkl Normal file

Binary file not shown.

BIN
AgentPPO/test_10.pkl Normal file

Binary file not shown.

BIN
AgentPPO/test_llm_1015.pkl Normal file

Binary file not shown.

BIN
AgentPPO/test_ram.pkl Normal file

Binary file not shown.

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 125 KiB

4474
AgentSAC/DRL_10_plots/rl.svg Normal file

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 143 KiB

BIN
AgentSAC/actor.pth Normal file

Binary file not shown.

BIN
AgentSAC/loss_data.pkl Normal file

Binary file not shown.

BIN
AgentSAC/reward_data.pkl Normal file

Binary file not shown.

BIN
AgentSAC/test_data.pkl Normal file

Binary file not shown.

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 125 KiB

4526
AgentTD3/DRL_10_plots/rl.svg Normal file

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 146 KiB

BIN
AgentTD3/actor.pth Normal file

Binary file not shown.

BIN
AgentTD3/loss_data.pkl Normal file

Binary file not shown.

BIN
AgentTD3/reward_data.pkl Normal file

Binary file not shown.

BIN
AgentTD3/test_data.pkl Normal file

Binary file not shown.

127
DDPG.py Normal file
View File

@ -0,0 +1,127 @@
import pickle
import pandas as pd
import torch
from agent import AgentDDPG
from environment import ESSEnv
from tools import *
def update_buffer(_trajectory):
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
ary_other = torch.as_tensor([item[1] for item in _trajectory])
ary_other[:, 0] = ary_other[:, 0] # ten_reward
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
buffer.extend_buffer(ten_state, ary_other)
_steps = ten_state.shape[0]
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
return _steps, _r_exp
if __name__ == '__main__':
args = Arguments()
'''record real unbalance'''
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
args.visible_gpu = '0'
for seed in args.random_seed_list:
args.random_seed = seed
args.agent = AgentDDPG()
agent_name = f'{args.agent.__class__.__name__}'
args.agent.cri_target = True
args.env = ESSEnv()
args.init_before_training(if_main=True)
'''init agent and environment'''
agent = args.agent
env = args.env
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
args.if_per_or_gae)
'''init replay buffer'''
buffer = ReplayBuffer(max_len=args.max_memo, state_dim=env.state_space.shape[0],
action_dim=env.action_space.shape[0])
'''start training'''
cwd = args.cwd
gamma = args.gamma
batch_size = args.batch_size
target_step = args.target_step # how manysteps of one episode should stop
repeat_times = args.repeat_times # how many times should update for one batch size data
soft_update_tau = args.soft_update_tau
num_episode = args.num_episode
agent.state = env.reset()
'''collect data, train and update network'''
# args.train = False
# args.save_network = False
# args.test_network = False
# args.save_test_data = False
# args.compare_with_gurobi = False
if args.train:
collect_data = True
while collect_data:
print(f'buffer:{buffer.now_len}')
with torch.no_grad():
trajectory = agent.explore_env(env, target_step)
steps, r_exp = update_buffer(trajectory)
buffer.update_now_len()
if buffer.now_len >= 10000:
collect_data = False
for i_episode in range(num_episode):
critic_loss, actor_loss = agent.update_net(buffer, batch_size, repeat_times, soft_update_tau)
loss_record['critic_loss'].append(critic_loss)
loss_record['actor_loss'].append(actor_loss)
with torch.no_grad():
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
reward_record['mean_episode_reward'].append(episode_reward)
reward_record['unbalance'].append(episode_unbalance)
print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}, buffer_length: {buffer.now_len}')
if i_episode % 10 == 0: # target_step
with torch.no_grad():
trajectory = agent.explore_env(env, target_step)
steps, r_exp = update_buffer(trajectory)
loss_record_path = f'{args.cwd}/loss_data.pkl'
reward_record_path = f'{args.cwd}/reward_data.pkl'
act_save_path = f'{args.cwd}/actor.pth'
if args.save_network:
with open(loss_record_path, 'wb') as tf:
pickle.dump(loss_record, tf)
with open(reward_record_path, 'wb') as tf:
pickle.dump(reward_record, tf)
torch.save(agent.act.state_dict(), act_save_path)
print('actor parameters have been saved')
if args.test_network:
args.cwd = agent_name
agent.act.load_state_dict(torch.load(act_save_path))
print('parameters have been reload and test')
record = test_one_episode(env, agent.act, agent.device)
eval_data = pd.DataFrame(record['system_info'])
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery',
'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
if args.save_test_data:
test_data_save_path = f'{args.cwd}/test_data.pkl'
with open(test_data_save_path, 'wb') as tf:
pickle.dump(record, tf)
'''compare with gurobi data and results'''
if args.compare_with_gurobi:
month = record['init_info'][0][0]
day = record['init_info'][0][1]
initial_soc = record['init_info'][0][3]
base_result = optimization_base_result(env, month, day, initial_soc)
if args.plot_on:
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
plot_args = PlotArgs()
plot_args.feature_change = '10'
args.cwd = agent_name
plot_dir = make_dir(args.cwd, plot_args.feature_change)
plot_optimization_result(base_result, plot_dir)
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
'''compare the different cost get from gurobi and DDPG'''
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
print('rl_cost:', sum(eval_data['operation_cost']))
print('gurobi_cost:', sum(base_result['step_cost']))
print('ration:', ration)

374
PPO.py Normal file
View File

@ -0,0 +1,374 @@
import os
import pickle
from copy import deepcopy
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from environment import ESSEnv
from tools import get_episode_return, test_one_episode, optimization_base_result
class ActorPPO(nn.Module):
def __init__(self, mid_dim, state_dim, action_dim):
super().__init__()
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, action_dim))
# the logarithm (log) of standard deviation (std) of action, it is a trainable parameter
self.a_logstd = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True)
self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi))
def forward(self, state):
return self.net(state).tanh() # action.tanh() limit the data output of action
def get_action(self, state):
a_avg = self.forward(state) # too big for the action
a_std = self.a_logstd.exp()
noise = torch.randn_like(a_avg)
action = a_avg + noise * a_std
return action, noise
def get_logprob_entropy(self, state, action):
a_avg = self.forward(state)
a_std = self.a_logstd.exp()
delta = ((a_avg - action) / a_std).pow(2) * 0.5
logprob = -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1) # new_logprob
dist_entropy = (logprob.exp() * logprob).mean() # policy entropy
return logprob, dist_entropy
def get_old_logprob(self, _action, noise): # noise = action - a_noise
delta = noise.pow(2) * 0.5
return -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1) # old_logprob
class CriticAdv(nn.Module):
def __init__(self, mid_dim, state_dim, _action_dim):
super().__init__()
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, 1))
def forward(self, state):
return self.net(state) # Advantage value
def smooth_rewards(rewards, window=10):
rewards = rewards.unsqueeze(0).unsqueeze(0) # 将 rewards 转为 [1, 1, len] 的形状以适应 conv1d
kernel = torch.ones(1, 1, window, device=rewards.device) / window # 创建一个均匀的滑动平均核
smoothed_rewards = F.conv1d(rewards, kernel, padding='valid') # 滑动平均
smoothed_rewards = smoothed_rewards.squeeze(0).squeeze(0) # 去掉多余的维度
# 保持与原始奖励序列相同的长度,将前 window-1 个奖励保持不变
return torch.cat((rewards[0, 0, :window-1], smoothed_rewards))
class AgentPPO:
def __init__(self):
super().__init__()
self.state = None
self.device = None
self.action_dim = None
self.get_obj_critic = None
self.criterion = torch.nn.SmoothL1Loss()
self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
'''init modify'''
self.ClassCri = CriticAdv
self.ClassAct = ActorPPO
self.ratio_clip = 0.2 # ratio.clamp(1 - clip, 1 + clip)
self.lambda_entropy = 0.02 # could be 0.01~0.05
self.lambda_gae_adv = 0.98 # could be 0.95~0.99, (Generalized Advantage Estimation. ICLR.2016.)
self.get_reward_sum = None # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
self.trajectory_list = None
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=True, gpu_id=0):
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
self.trajectory_list = list()
# choose whether to use gae or not
self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
self.cri = self.ClassCri(net_dim, state_dim, action_dim).to(self.device)
self.act = self.ClassAct(net_dim, state_dim, action_dim).to(self.device) if self.ClassAct else self.cri
self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
def select_action(self, state):
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
actions, noises = self.act.get_action(states)
return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
def explore_env(self, env, target_step):
state = self.state # sent state to agent and then agent sent state to method
trajectory_temp = list()
last_done = 0
for i in range(target_step):
action, noise = self.select_action(state)
state, next_state, reward, done, = env.step(np.tanh(action))
trajectory_temp.append((state, reward, done, action, noise))
if done:
state = env.reset()
last_done = i
else:
state = next_state
self.state = state
'''splice list'''
# store 0 trajectory information to list
trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1]
self.trajectory_list = trajectory_temp[last_done:]
return trajectory_list
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
"""put data extract and update network together"""
with torch.no_grad():
buf_len = buffer[0].shape[0]
# decompose buffer data
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
'''get buf_r_sum, buf_logprob'''
bs = 4096
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
buf_value = torch.cat(buf_value, dim=0)
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
buf_r_sum, buf_advantage = self.get_reward_sum(buf_len, buf_reward, buf_mask, buf_value) # detach()
# normalize advantage
buf_advantage = (buf_advantage - buf_advantage.mean()) / (buf_advantage.std() + 1e-5)
buf_advantage = smooth_rewards(buf_advantage, window=10)
del buf_noise, buffer[:]
'''PPO: Surrogate objective of Trust Region'''
obj_critic = obj_actor = None
for _ in range(int(buf_len / batch_size * repeat_times)):
indices = torch.randint(buf_len, size=(batch_size,), requires_grad=False, device=self.device)
state = buf_state[indices]
action = buf_action[indices]
r_sum = buf_r_sum[indices]
logprob = buf_logprob[indices]
advantage = buf_advantage[indices]
new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action) # it is obj_actor
ratio = (new_logprob - logprob.detach()).exp()
surrogate1 = advantage * ratio
surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
obj_surrogate = -torch.min(surrogate1, surrogate2).mean()
obj_actor = obj_surrogate + obj_entropy * self.lambda_entropy
self.optim_update(self.act_optim, obj_actor) # update actor
value = self.cri(state).squeeze(1) # critic network predicts the reward_sum (Q value) of state
# use smoothloss L1 to evaluate the value loss
# obj_critic = self.criterion(value, r_sum) / (r_sum.std() + 1e-6)
obj_critic = self.criterion(value, r_sum)
self.optim_update(self.cri_optim, obj_critic) # calculate and update the back propogation of value loss
# choose whether to use soft update
self.soft_update(self.cri_target, self.cri, soft_update_tau) if self.cri_target is not self.cri else None
a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1))
return obj_critic.item(), obj_actor.item(), a_std_log.mean().item() # logging_tuple
def get_reward_sum_raw(self, buf_len, buf_reward, buf_mask, buf_value) -> (torch.Tensor, torch.Tensor):
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # reward sum
pre_r_sum = 0
for i in range(buf_len - 1, -1, -1):
buf_r_sum[i] = buf_reward[i] + buf_mask[i] * pre_r_sum
pre_r_sum = buf_r_sum[i]
buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
return buf_r_sum, buf_advantage
def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value) -> (torch.Tensor, torch.Tensor):
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # old policy value
buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device) # advantage value
pre_r_sum = 0.0
pre_advantage = 0.0 # advantage value of previous step
for i in range(buf_len - 1, -1, -1):
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
pre_r_sum = buf_r_sum[i]
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i])
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
return buf_r_sum, buf_advantage
@staticmethod
def optim_update(optimizer, objective):
optimizer.zero_grad()
objective.backward()
optimizer.step()
@staticmethod
def soft_update(target_net, current_net, tau):
for tar, cur in zip(target_net.parameters(), current_net.parameters()):
tar.data.copy_(cur.data.__mul__(tau) + tar.data.__mul__(1.0 - tau))
class Arguments:
def __init__(self, agent=None, env=None):
self.agent = agent # Deep Reinforcement Learning algorithm
self.env = env # the environment for training
self.cwd = None # current work directory. None means set automatically
self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
self.visible_gpu = '0' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
'''Arguments for training'''
self.num_episode = 1000 # to control the train episodes for PPO
self.gamma = 0.995 # discount factor of future rewards
self.learning_rate = 1e-4 # 1e-4 2 ** -14 2e-4
self.soft_update_tau = 2 ** -8 # 1e-3 2 ** -8
self.net_dim = 256 # the network width
self.batch_size = 4096 # num of transitions sampled from replay buffer.
self.repeat_times = 2 ** 5 # collect target_step, then update network
self.target_step = 4096 # repeatedly update network to keep critic's loss small
self.max_memo = self.target_step # capacity of replay buffer
self.if_gae_or_raw = True # GAE for on-policy sparse reward: Generalized Advantage Estimation.
'''Arguments for evaluate'''
self.random_seed = 1234
# self.random_seed_list = [1234, 2234, 3234, 4234, 5234]
self.random_seed_list = [1567]
self.train = True
self.save_network = True
self.test_network = True
self.save_test_data = True
self.compare_with_gurobi = True
self.plot_on = True
def init_before_training(self):
if self.cwd is None:
agent_name = self.agent.__class__.__name__
self.cwd = f'./{agent_name}'
np.random.seed(self.random_seed)
torch.manual_seed(self.random_seed)
torch.set_num_threads(self.num_threads)
torch.set_default_dtype(torch.float32)
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
def update_buffer(_trajectory):
_trajectory = list(map(list, zip(*_trajectory))) # 2D-list transpose, here cut the trajectory into 5 parts
ten_state = torch.as_tensor(_trajectory[0]) # tensor state here
ten_reward = torch.as_tensor(_trajectory[1], dtype=torch.float32)
# _trajectory[2] = done, 将 done 替换为掩码,节省内存
ten_mask = (1.0 - torch.as_tensor(_trajectory[2], dtype=torch.float32)) * gamma
ten_action = torch.as_tensor(_trajectory[3])
ten_noise = torch.as_tensor(_trajectory[4], dtype=torch.float32)
buffer[:] = (ten_state, ten_action, ten_noise, ten_reward, ten_mask) # list store tensors
_steps = ten_reward.shape[0] # how many steps are collected in all trajectories
_r_exp = ten_reward.mean() # the mean reward
return _steps, _r_exp
if __name__ == '__main__':
args = Arguments()
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
args.visible_gpu = '0'
for seed in args.random_seed_list:
args.random_seed = seed
args.agent = AgentPPO()
agent_name = f'{args.agent.__class__.__name__}'
args.agent.cri_target = True
args.env = ESSEnv()
args.init_before_training()
'''init agent and environment'''
agent = args.agent
env = args.env
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
args.if_gae_or_raw)
cwd = args.cwd
gamma = args.gamma
batch_size = args.batch_size # how much data should be used to update net
target_step = args.target_step # how manysteps of one episode should stop
repeat_times = args.repeat_times # how many times should update for one batch size data
soft_update_tau = args.soft_update_tau
num_episode = args.num_episode
agent.state = env.reset()
'''init buffer'''
buffer = list()
'''init training parameters'''
args.train = False
args.save_network = False
# args.test_network = False
# args.save_test_data = False
# args.compare_with_gurobi = False
# args.plot_on = False
if args.train:
for i_episode in range(num_episode):
with torch.no_grad():
trajectory_list = agent.explore_env(env, target_step)
steps, r_exp = update_buffer(trajectory_list)
critic_loss, actor_loss, entropy_loss = agent.update_net(buffer, batch_size, repeat_times,
soft_update_tau)
loss_record['critic_loss'].append(critic_loss)
loss_record['actor_loss'].append(actor_loss)
loss_record['entropy_loss'].append(entropy_loss)
with torch.no_grad():
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
reward_record['mean_episode_reward'].append(episode_reward)
reward_record['unbalance'].append(episode_unbalance)
print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}')
act_save_path = f'{args.cwd}/actor_10.pth'
loss_record_path = f'{args.cwd}/loss_10.pkl'
reward_record_path = f'{args.cwd}/reward_10.pkl'
if args.save_network:
with open(loss_record_path, 'wb') as tf:
pickle.dump(loss_record, tf)
with open(reward_record_path, 'wb') as tf:
pickle.dump(reward_record, tf)
torch.save(agent.act.state_dict(), act_save_path)
print('actor parameters have been saved')
if args.test_network:
args.cwd = agent_name
agent.act.load_state_dict(torch.load(act_save_path))
print('parameters have been reload and test')
record = test_one_episode(env, agent.act, agent.device)
eval_data = pd.DataFrame(record['system_info'])
eval_data.columns = ['time_step', 'price', 'load', 'action', 'real_action', 'soc', 'battery',
'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
if args.save_test_data:
test_data_save_path = f'{args.cwd}/test_10.pkl'
with open(test_data_save_path, 'wb') as tf:
pickle.dump(record, tf)
'''compare with gurobi data and results'''
if args.compare_with_gurobi:
month = record['init_info'][0][0]
day = record['init_info'][0][1]
initial_soc = record['init_info'][0][3]
base_result = optimization_base_result(env, month, day, initial_soc)
if args.plot_on:
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
plot_args = PlotArgs()
plot_args.feature_change = '10'
args.cwd = agent_name
plot_dir = make_dir(args.cwd, plot_args.feature_change)
plot_optimization_result(base_result, plot_dir)
plot_evaluation_information(args.cwd + '/' + 'test_10.pkl', plot_dir)
'''compare the different cost get from gurobi and PPO'''
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
print('rl_cost:', sum(eval_data['operation_cost']))
print('gurobi_cost:', sum(base_result['step_cost']))
print('ration:', ration)

472
PPO_llm.py Normal file
View File

@ -0,0 +1,472 @@
import json
import os
import pickle
from copy import deepcopy
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from data_manager import *
from environment import ESSEnv
from tools import optimization_base_result
def load_llm_actions(file_path):
with open(file_path, 'r') as file:
data = json.load(file)
return data
class ActorPPO(nn.Module):
def __init__(self, mid_dim, state_dim, action_dim):
super().__init__()
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, action_dim))
# the logarithm (log) of standard deviation (std) of action, it is a trainable parameter
self.a_logstd = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True)
self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi))
def forward(self, state):
return self.net(state).tanh() # action.tanh() limit the data output of action
def get_action(self, state):
a_avg = self.forward(state) # too big for the action
a_std = self.a_logstd.exp()
noise = torch.randn_like(a_avg)
action = a_avg + noise * a_std
return action, noise
def get_logprob_entropy(self, state, action):
a_avg = self.forward(state)
a_std = self.a_logstd.exp()
delta = ((a_avg - action) / a_std).pow(2) * 0.5
logprob = -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1) # new_logprob
dist_entropy = (logprob.exp() * logprob).mean() # policy entropy
return logprob, dist_entropy
def get_old_logprob(self, _action, noise): # noise = action - a_noise
delta = noise.pow(2) * 0.5
return -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1) # old_logprob
class CriticAdv(nn.Module):
def __init__(self, mid_dim, state_dim, _action_dim):
super().__init__()
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, 1)
)
def forward(self, state):
return self.net(state) # Advantage value
class AgentPPO:
def __init__(self):
super().__init__()
self.state = None
self.device = None
self.action_dim = None
self.get_obj_critic = None
self.current_index = 0
self.criterion = torch.nn.SmoothL1Loss()
self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
self.ClassCri = CriticAdv
self.ClassAct = ActorPPO
self.ratio_clip = 0.2 # ratio.clamp(1 - clip, 1 + clip)
self.lambda_entropy = 0.02 # could be 0.01~0.05
self.lambda_gae_adv = 0.98 # could be 0.95~0.99, GAE (Generalized Advantage Estimation. ICLR.2016.)
self.get_reward_sum = None # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
self.trajectory_list = None
self.llm_actions = load_llm_actions('data/results.json')
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=False, gpu_id=0):
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
self.trajectory_list = list()
# choose whether to use gae or not
self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
self.cri = self.ClassCri(net_dim, state_dim, action_dim).to(self.device)
self.act = self.ClassAct(net_dim, state_dim, action_dim).to(self.device) if self.ClassAct else self.cri
self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
def select_action(self, state):
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
actions, noises = self.act.get_action(states)
return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
def explore_env(self, env, target_step, index):
state = self.state # sent state to agent and then agent sent state to method
trajectory_temp = list()
last_done = 0
for i in range(target_step):
action, noise = self.select_action(state)
llm_action = self.llm_actions[index]
action = [0.95 * action[i] + 0.05 * llm_action[i] for i in range(5)]
state, next_state, reward, done, = env.step(np.tanh(action))
trajectory_temp.append((state, reward, done, action, noise))
if done:
state = env.reset()
last_done = i
else:
state = next_state
self.state = state
'''splice list'''
# store 0 trajectory information to list
trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1]
self.trajectory_list = trajectory_temp[last_done:]
return trajectory_list
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
"""put data extract and update network together"""
with torch.no_grad():
buf_len = buffer[0].shape[0]
# decompose buffer data
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
'''get buf_r_sum, buf_logprob'''
bs = 4096
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
buf_value = torch.cat(buf_value, dim=0)
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
buf_r_sum, buf_advantage = self.get_reward_sum(buf_len, buf_reward, buf_mask, buf_value) # detach()
# normalize advantage
buf_advantage = (buf_advantage - buf_advantage.mean()) / (buf_advantage.std() + 1e-5)
del buf_noise, buffer[:]
'''PPO: Surrogate objective of Trust Region'''
obj_critic = obj_actor = None
for _ in range(int(buf_len / batch_size * repeat_times)):
indices = torch.randint(buf_len, size=(batch_size,), requires_grad=False, device=self.device)
state = buf_state[indices]
action = buf_action[indices]
r_sum = buf_r_sum[indices]
logprob = buf_logprob[indices]
advantage = buf_advantage[indices]
new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action) # it's obj_actor
ratio = (new_logprob - logprob.detach()).exp()
surrogate1 = advantage * ratio
surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
obj_surrogate = -torch.min(surrogate1, surrogate2).mean()
obj_actor = obj_surrogate + obj_entropy * self.lambda_entropy
self.optim_update(self.act_optim, obj_actor) # update actor
value = self.cri(state).squeeze(1) # critic network predicts the reward_sum (Q value) of state
# use smoothloss L1 to evaluate the value loss
# obj_critic = self.criterion(value, r_sum) / (r_sum.std() + 1e-6)
obj_critic = self.criterion(value, r_sum)
self.optim_update(self.cri_optim, obj_critic) # calculate and update the back propogation of value loss
# choose whether to use soft update
self.soft_update(self.cri_target, self.cri, soft_update_tau) if self.cri_target is not self.cri else None
a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1))
return obj_critic.item(), obj_actor.item(), a_std_log.mean().item() # logging_tuple
def get_reward_sum_raw(self, buf_len, buf_reward, buf_mask, buf_value) -> (torch.Tensor, torch.Tensor):
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # reward sum
pre_r_sum = 0
for i in range(buf_len - 1, -1, -1):
buf_r_sum[i] = buf_reward[i] + buf_mask[i] * pre_r_sum
pre_r_sum = buf_r_sum[i]
buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
return buf_r_sum, buf_advantage
def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value) -> (torch.Tensor, torch.Tensor):
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # old policy value
buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device) # advantage value
pre_r_sum = 0.0
pre_advantage = 0.0 # advantage value of previous step
for i in range(buf_len - 1, -1, -1):
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
pre_r_sum = buf_r_sum[i]
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i]) # fix a bug here
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
return buf_r_sum, buf_advantage
@staticmethod
def optim_update(optimizer, objective):
optimizer.zero_grad()
objective.backward()
optimizer.step()
@staticmethod
def soft_update(target_net, current_net, tau):
for tar, cur in zip(target_net.parameters(), current_net.parameters()):
tar.data.copy_(cur.data.__mul__(tau) + tar.data.__mul__(1.0 - tau))
class Arguments:
def __init__(self, agent=None, env=None):
self.agent = agent # Deep Reinforcement Learning algorithm
self.env = env # the environment for training
self.cwd = None # current work directory. None means set automatically
self.visible_gpu = '0' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
'''Arguments for training'''
self.num_episode = 1000 # to control the train episodes for PPO
self.gamma = 0.995 # discount factor of future rewards
self.learning_rate = 1e-4 # 2e-4 / 6e-5 / 2 ** -4
self.soft_update_tau = 2 ** -8 # 5e-3 / 4e-4 / 2 ** -8 1e-3
self.net_dim = 256 # the network width
self.batch_size = 4096 # num of transitions sampled from replay buffer.
self.repeat_times = 2 ** 5 # collect target_step, then update network
self.target_step = 4096 # repeatedly update network to keep critic's loss small
self.max_memo = self.target_step # capacity of replay buffer
self.if_gae_or_raw = True # GAE for on-policy sparse reward: Generalized Advantage Estimation.
'''Arguments for evaluate'''
self.random_seed = 1234 # initialize random seed in self.init_before_training()
# self.random_seed_list = [1234, 2234, 3234]
self.random_seed_list = [3234]
self.train = True
self.save_network = True
self.test_network = True
self.save_test_data = True
self.compare_with_gurobi = True
self.plot_on = True
def init_before_training(self):
if self.cwd is None:
agent_name = self.agent.__class__.__name__
self.cwd = f'./{agent_name}'
np.random.seed(self.random_seed)
torch.manual_seed(self.random_seed)
torch.set_num_threads(self.num_threads)
torch.set_default_dtype(torch.float32)
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
def update_buffer(_trajectory):
_trajectory = list(map(list, zip(*_trajectory))) # 2D-list transpose, here cut the trajectory into 5 parts
ten_state = torch.as_tensor(_trajectory[0], dtype=torch.float32) # tensor state here
ten_reward = torch.as_tensor(_trajectory[1], dtype=torch.float32)
# _trajectory[2] = done, replace done by mask, save memory
ten_mask = (1.0 - torch.as_tensor(_trajectory[2], dtype=torch.float32)) * gamma
ten_action = torch.as_tensor(_trajectory[3], dtype=torch.float32)
ten_noise = torch.as_tensor(_trajectory[4], dtype=torch.float32)
buffer[:] = (ten_state, ten_action, ten_noise, ten_reward, ten_mask) # list store tensors
_steps = ten_reward.shape[0] # how many steps are collected in all trajectories
_r_exp = ten_reward.mean() # the mean reward
return _steps, _r_exp
def test_one_episode(env, act, device, index):
"""to get evaluate information, here record the unbalance of after taking action"""
record_state = []
record_action = []
record_reward = []
record_unbalance = []
record_system_info = [] # [time, price, netload, action, real action, soc, output*4, unbalance, cost]
record_init_info = [] # include month,day,time,intial soc
env.TRAIN = False
state = env.reset()
record_init_info.append([env.month, env.day, env.current_time, env.battery.current_capacity])
print(f'current testing month is {env.month}, day is {env.day},initial_soc is {env.battery.current_capacity}')
llm_actions = load_llm_actions('data/results.json')
for i in range(24):
s_tensor = torch.as_tensor((state,), device=device)
a_tensor = act(s_tensor)
rl_action = a_tensor.detach().cpu().numpy()[0]
llm_action = llm_actions[index]
action = [0.95 * rl_action[i] + 0.05 * llm_action[i] for i in range(5)]
real_action = action
state, next_state, reward, done = env.step(action)
record_system_info.append([state[0], state[1], state[3] + env.wind.current_power, action, real_action,
env.battery.SOC(), env.battery.energy_change, next_state[4], next_state[5],
next_state[6], env.solar.current_power, env.wind.current_power, env.unbalance,
env.operation_cost, reward])
record_state.append(state)
record_action.append(real_action)
record_reward.append(reward)
record_unbalance.append(env.unbalance)
state = next_state
# add information of last step dg1, dh2, dg3, soc, tem, irr
record_system_info[-1][7:12] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2],
env.final_step_outputs[4], env.final_step_outputs[5]]
record_system_info[-1][5] = env.final_step_outputs[3]
record = {'init_info': record_init_info, 'system_info': record_system_info, 'state': record_state,
'action': record_action, 'reward': record_reward, 'unbalance': record_unbalance}
return record
def test_llm(env, index):
record_system_info = []
record_init_info = []
env.TRAIN = False
record_init_info.append([env.month, env.day, env.current_time, env.battery.current_capacity])
llm_actions = load_llm_actions('data/results.json')
cumulative_reward = 0
for i in range(24):
action = llm_actions[index + i]
real_action = action
state, next_state, reward, done = env.step(action)
record_system_info.append([state[0], state[1], state[3] + env.wind.current_power, action, real_action,
env.battery.SOC(), env.battery.energy_change, next_state[4], next_state[5],
next_state[6], env.solar.current_power, env.wind.current_power, env.unbalance,
env.operation_cost, reward])
cumulative_reward += reward
if done:
break
record_system_info[-1][7:12] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2],
env.final_step_outputs[4], env.final_step_outputs[5]]
record_system_info[-1][5] = env.final_step_outputs[3]
record = {'system_info': record_system_info, 'cumulative_reward': cumulative_reward}
return record
def get_episode_return(env, act, device, index):
episode_reward = 0.0
episode_unbalance = 0.0
state = env.reset()
llm_actions = load_llm_actions('data/results.json')
for i in range(24):
s_tensor = torch.as_tensor((state,), device=device)
a_tensor = act(s_tensor)
rl_action = a_tensor.detach().cpu().numpy()[0]
llm_action = llm_actions[index]
action = [0.95 * rl_action[i] + 0.05 * llm_action[i] for i in range(5)]
state, next_state, reward, done, = env.step(action)
state = next_state
episode_reward += reward
episode_unbalance += env.real_unbalance
if done:
break
return episode_reward, episode_unbalance
if __name__ == '__main__':
args = Arguments()
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
args.visible_gpu = '0,1'
for seed in args.random_seed_list:
args.random_seed = seed
args.agent = AgentPPO()
args.agent.cri_target = True
agent_name = f'{args.agent.__class__.__name__}'
args.env = ESSEnv()
args.init_before_training()
'''init agent and environment'''
agent = args.agent
env = args.env
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
args.if_gae_or_raw)
cwd = args.cwd
gamma = args.gamma
batch_size = args.batch_size # how much data should be used to update net
target_step = args.target_step # how many steps of one episode should stop
repeat_times = args.repeat_times # how many times should update for one batch size data
soft_update_tau = args.soft_update_tau
agent.state = env.reset()
'''init buffer'''
buffer = list()
'''init training parameters'''
num_episode = args.num_episode
args.train = False
args.save_network = False
# args.test_network = False
# args.save_test_data = False
# args.compare_with_gurobi = False
# args.plot_on = False
if args.train:
for i_episode in range(num_episode):
with torch.no_grad():
index = (sum(Constant.MONTHS_LEN[:env.month - 1]) + env.day - 1) * 24 + env.current_time
trajectory_list = agent.explore_env(env, target_step, index)
steps, r_exp = update_buffer(trajectory_list)
critic_loss, actor_loss, entropy_loss = agent.update_net(buffer, batch_size, repeat_times,
soft_update_tau)
loss_record['critic_loss'].append(critic_loss)
loss_record['actor_loss'].append(actor_loss)
loss_record['entropy_loss'].append(entropy_loss)
with torch.no_grad():
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device, index)
reward_record['mean_episode_reward'].append(episode_reward)
reward_record['unbalance'].append(episode_unbalance)
print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}')
act_save_path = f'{args.cwd}/actor_llm_1015.pth'
loss_record_path = f'{args.cwd}/loss_llm_1015.pkl'
reward_record_path = f'{args.cwd}/reward_llm_1015.pkl'
if args.save_network:
with open(loss_record_path, 'wb') as tf:
pickle.dump(loss_record, tf)
with open(reward_record_path, 'wb') as tf:
pickle.dump(reward_record, tf)
torch.save(agent.act.state_dict(), act_save_path)
print('actor parameters have been saved')
if args.test_network:
args.cwd = agent_name
agent.act.load_state_dict(torch.load(act_save_path))
print('parameters have been reload and test')
index = (sum(Constant.MONTHS_LEN[:env.month - 1]) + env.day - 1) * 24 + env.current_time
record = test_one_episode(env, agent.act, agent.device, index)
# re = test_llm(env, index)
rl_data = pd.DataFrame(record['system_info'])
# llm_data = pd.DataFrame(re['system_info'])
rl_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery',
'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
if args.save_test_data:
test_data_save_path = f'{args.cwd}/test_llm_1015.pkl'
# test_llm_save_path = f'{args.cwd}/test_only_llm_1015.pkl'
with open(test_data_save_path, 'wb') as tf:
pickle.dump(record, tf)
# with open(test_llm_save_path, 'wb') as f:
# pickle.dump(re, f)
'''compare with gurobi data and results'''
if args.compare_with_gurobi:
month = record['init_info'][0][0]
day = record['init_info'][0][1]
initial_soc = record['init_info'][0][3]
base_result = optimization_base_result(env, month, day, initial_soc)
if args.plot_on:
from plotDRL import *
plot_args = PlotArgs()
plot_args.feature_change = 'llm_1015'
args.cwd = agent_name
plot_dir = make_dir(args.cwd, plot_args.feature_change)
# plot_optimization_result(base_result, plot_dir)
# plot_evaluation_information(args.cwd + '/' + 'test_llm_1015.pkl', plot_dir)
plot_soc(base_result, args.cwd + '/' + 'test_llm_1015.pkl', plot_dir)
plot_energy(base_result, args.cwd + '/' + 'test_llm_1015.pkl', plot_dir)
'''compare the different cost get from gurobi and PPO'''
print('rl_cost:', sum(rl_data['operation_cost']))
print('gurobi_cost:', sum(base_result['step_cost']))
print('ration:', sum(rl_data['operation_cost']) / sum(base_result['step_cost']))

136
PPO_llm_melt.py Normal file
View File

@ -0,0 +1,136 @@
import random
from PPO_llm import *
from plotDRL import *
def generate_test_dates(num):
dates = []
months_days = {1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30, 7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31}
while len(dates) < num:
month = random.randint(1, 12)
day = random.randint(20, months_days[month])
if (month, day) not in dates:
dates.append((month, day))
return dates
def test_llm(env, llm_actions, index):
sum_reward = 0
sum_unbalance = 0
for i in range(24):
action = llm_actions[index + i]
state, next_state, reward, done = env.step(action)
sum_reward += reward
sum_unbalance += env.real_unbalance
record = {'reward': [sum_reward], 'unbalance': [sum_unbalance]}
return record
def test_one_episode(env, act, device):
state = env.reset()
sum_reward = 0
sum_unbalance = 0
for i in range(24):
s_tensor = torch.as_tensor((state,), device=device)
a_tensor = act(s_tensor)
action = a_tensor.detach().cpu().numpy()[0]
# rl_action = a_tensor.detach().cpu().numpy()[0]
# llm_action = llm_actions[index + i]
# action = 0.95 * np.array(rl_action) + 0.05 * np.array(llm_action)
state, next_state, reward, done = env.step(action)
state = next_state
sum_reward += reward
sum_unbalance += env.real_unbalance
record = {'reward': [sum_reward], 'unbalance': [sum_unbalance]}
return record
def run_multiple_tests(env, args, agent1, agent2, num_tests=10):
all_ppo = pd.DataFrame()
all_llm = pd.DataFrame()
all_lmppo = pd.DataFrame()
all_milp = pd.DataFrame()
test_dates = generate_test_dates(num_tests)
llm_actions = load_llm_actions('data/results.json')
for month, day in test_dates:
print(f'current testing month is {month}, day is {day}')
index = (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24
ppo_res = test_one_episode(env, agent1.act, agent1.device)
base_res = optimization_base_result(env, month, day)
llm_res = test_llm(env, llm_actions, index)
lmppo_res = test_one_episode(env, agent2.act, agent2.device)
ppo_data = pd.DataFrame(ppo_res)
m_reward = - base_res['step_cost'].sum()/1e3
milp_data = pd.DataFrame({'reward': [m_reward]})
llm_data = pd.DataFrame(llm_res)
lmppo_data = pd.DataFrame(lmppo_res)
all_ppo = pd.concat([all_ppo, ppo_data], ignore_index=True)
all_milp = pd.concat([all_milp, milp_data], ignore_index=True)
all_llm = pd.concat([all_llm, llm_data], ignore_index=True)
all_lmppo = pd.concat([all_lmppo, lmppo_data], ignore_index=True)
test_ppo_path = f'{args.cwd}/melt_ppo.pkl'
test_llm_path = f'{args.cwd}/melt_llm.pkl'
test_lmppo_path = f'{args.cwd}/melt_lmppo.pkl'
test_milp_path = f'{args.cwd}/melt_milp.pkl'
with open(test_ppo_path, 'wb') as tf:
pickle.dump(all_ppo, tf)
with open(test_milp_path, 'wb') as tf:
pickle.dump(all_milp, tf)
with open(test_llm_path, 'wb') as f:
pickle.dump(all_llm, f)
with open(test_lmppo_path, 'wb') as tf:
pickle.dump(all_lmppo, tf)
plot_args = PlotArgs()
plot_args.feature_change = 'llm_1015'
args.cwd = agent_name
plot_dir = make_dir(args.cwd, plot_args.feature_change)
plot_melt(test_ppo_path, test_llm_path, test_lmppo_path, test_milp_path, plot_dir)
if __name__ == '__main__':
args = Arguments()
args.visible_gpu = '0'
for seed in args.random_seed_list:
args.random_seed = seed
args.agent1 = args.agent2 = AgentPPO()
args.agent1.cri_target = args.agent2.cri_target = True
agent_name = f'{args.agent1.__class__.__name__}'
args.env = ESSEnv()
args.init_before_training()
agent1 = args.agent1
agent2 = args.agent2
env = args.env
agent1.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
args.if_gae_or_raw)
agent2.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
args.if_gae_or_raw)
cwd = args.cwd
gamma = args.gamma
batch_size = args.batch_size
target_step = args.target_step
repeat_times = args.repeat_times
soft_update_tau = args.soft_update_tau
agent1.state = agent2.state = env.reset()
buffer = list()
num_episode = args.num_episode
# args.test_network = False
if args.test_network:
args.cwd = agent_name
ppo_act_save_path = f'{args.cwd}/actor_10.pth'
lmppo_act_save_path = f'{args.cwd}/actor_llm_1015.pth'
agent1.act.load_state_dict(torch.load(ppo_act_save_path))
agent2.act.load_state_dict(torch.load(lmppo_act_save_path))
print('parameters have been reload and test')
env.TRAIN = False
run_multiple_tests(env, args, agent1, agent2, num_tests=12)

200
PPO_llm_melt_2.py Normal file
View File

@ -0,0 +1,200 @@
import random
from PPO_llm import *
from plotDRL import *
test_day = 9 # 测试天数
def test_ram_episode(env):
state = env.reset()
sum_reward = 0
sum_unbalance = 0
for i in range(24):
action = np.random.uniform(env.action_space.low, env.action_space.high)
state, next_state, reward, done = env.step(action)
state = next_state
sum_reward += reward
sum_unbalance += env.real_unbalance
record = {'reward': [sum_reward], 'unbalance': [sum_unbalance]}
return record
def test_llm(env, llm_actions, index):
sum_reward = 0
sum_unbalance = 0
for i in range(24):
action = llm_actions[index + i]
state, next_state, reward, done = env.step(action)
sum_reward += reward
sum_unbalance += env.real_unbalance
record = {'reward': [sum_reward], 'unbalance': [sum_unbalance]}
return record
def test_one_episode(env, act, device):
state = env.reset()
sum_reward = 0
sum_unbalance = 0
for i in range(24):
s_tensor = torch.as_tensor((state,), device=device)
a_tensor = act(s_tensor)
action = a_tensor.detach().cpu().numpy()[0]
# rl_action = a_tensor.detach().cpu().numpy()[0]
# llm_action = llm_actions[index + i]
# action = 0.95 * np.array(rl_action) + 0.05 * np.array(llm_action)
state, next_state, reward, done = env.step(action)
state = next_state
sum_reward += reward
sum_unbalance += env.real_unbalance
record = {'reward': [sum_reward], 'unbalance': [sum_unbalance]}
return record
def generate_test_dates():
# months_days = {1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30, 7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31}
dates = []
for month in range(1, 13): # 遍历12个月
start_day = 20
# days_in_month = months_days[month] # 确保每个月至少有9天
# if days_in_month >= start_day + 9: # 生成8个连续的天
for day in range(start_day, start_day + test_day):
dates.append((month, day))
return dates
def run_multiple_tests(env, args, agent1, agent2):
all_ppo = pd.DataFrame()
all_llm = pd.DataFrame()
all_lmppo = pd.DataFrame()
all_milp = pd.DataFrame()
all_ram = pd.DataFrame()
test_dates = generate_test_dates() # 获取测试日期
llm_actions = load_llm_actions('data/results_day.json')
# 对每个日期进行测试
for idx in range(0, len(test_dates), test_day):
current_dates = test_dates[idx:idx + test_day]
print(f'Start testing for {current_dates[0]} to {current_dates[-1]}')
ppo_rewards = []
ppo_unbalances = []
llm_rewards = []
llm_unbalances = []
lmppo_rewards = []
lmppo_unbalances = []
milp_rewards = []
ram_rewards = []
ram_unbalances = []
# 对当前这x天的日期进行逐个测试
for month, day in current_dates:
index = (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24
ppo_res = test_one_episode(env, agent1.act, agent1.device)
base_res = optimization_base_result(env, month, day)
llm_res = test_llm(env, llm_actions, index)
lmppo_res = test_one_episode(env, agent2.act, agent2.device)
ram_res = test_ram_episode(env)
# 存储每一天的结果
ppo_rewards.append(ppo_res['reward'][0])
ppo_unbalances.append(ppo_res['unbalance'][0])
llm_rewards.append(llm_res['reward'][0])
llm_unbalances.append(llm_res['unbalance'][0])
lmppo_rewards.append(lmppo_res['reward'][0])
lmppo_unbalances.append(lmppo_res['unbalance'][0])
m_reward = - base_res['step_cost'].sum() / 1e3
milp_rewards.append(m_reward)
ram_rewards.append(ram_res['reward'][0])
ram_unbalances.append(ram_res['unbalance'][0])
# 计算x天的平均结果
avg_ppo_reward = sum(ppo_rewards) / len(ppo_rewards)
avg_ppo_unbalance = sum(ppo_unbalances) / len(ppo_unbalances)
avg_llm_reward = sum(llm_rewards) / len(llm_rewards)
avg_llm_unbalance = sum(llm_unbalances) / len(llm_unbalances)
avg_lmppo_reward = sum(lmppo_rewards) / len(lmppo_rewards)
avg_lmppo_unbalance = sum(lmppo_unbalances) / len(lmppo_unbalances)
avg_milp_reward = sum(milp_rewards) / len(milp_rewards)
avg_ram_reward = sum(ram_rewards) / len(ram_rewards)
avg_ram_unbalance = sum(ram_unbalances) / len(ram_unbalances)
# 保存每组x天的平均结果
ppo_data = pd.DataFrame({'reward': [avg_ppo_reward], 'unbalance': [avg_ppo_unbalance]})
llm_data = pd.DataFrame({'reward': [avg_llm_reward], 'unbalance': [avg_llm_unbalance]})
lmppo_data = pd.DataFrame({'reward': [avg_lmppo_reward], 'unbalance': [avg_lmppo_unbalance]})
milp_data = pd.DataFrame({'reward': [avg_milp_reward]})
ram_data = pd.DataFrame({'reward': [avg_ram_reward], 'unbalance': [avg_ram_unbalance]})
all_ppo = pd.concat([all_ppo, ppo_data], ignore_index=True)
all_llm = pd.concat([all_llm, llm_data], ignore_index=True)
all_lmppo = pd.concat([all_lmppo, lmppo_data], ignore_index=True)
all_milp = pd.concat([all_milp, milp_data], ignore_index=True)
all_ram = pd.concat([all_ram, ram_data], ignore_index=True)
# 保存结果
test_ppo_path = f'{args.cwd}/melt_ppo.pkl'
test_llm_path = f'{args.cwd}/melt_llm.pkl'
test_lmppo_path = f'{args.cwd}/melt_lmppo.pkl'
test_milp_path = f'{args.cwd}/melt_milp.pkl'
test_ram_path = f'{args.cwd}/melt_ram.pkl'
with open(test_ppo_path, 'wb') as tf:
pickle.dump(all_ppo, tf)
with open(test_milp_path, 'wb') as tf:
pickle.dump(all_milp, tf)
with open(test_llm_path, 'wb') as f:
pickle.dump(all_llm, f)
with open(test_lmppo_path, 'wb') as tf:
pickle.dump(all_lmppo, tf)
with open(test_ram_path, 'wb') as tf:
pickle.dump(all_ram, tf)
plot_args = PlotArgs()
plot_args.feature_change = 'llm_1015'
args.cwd = agent_name
plot_dir = make_dir(args.cwd, plot_args.feature_change)
plot_melt(test_ppo_path, test_llm_path, test_lmppo_path, test_milp_path, test_ram_path, plot_dir)
if __name__ == '__main__':
args = Arguments()
args.visible_gpu = '0'
for seed in args.random_seed_list:
args.random_seed = seed
args.agent1 = args.agent2 = AgentPPO()
args.agent1.cri_target = args.agent2.cri_target = True
agent_name = f'{args.agent1.__class__.__name__}'
args.env = ESSEnv()
args.init_before_training()
agent1 = args.agent1
agent2 = args.agent2
env = args.env
agent1.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
args.if_gae_or_raw)
agent2.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
args.if_gae_or_raw)
cwd = args.cwd
gamma = args.gamma
batch_size = args.batch_size
target_step = args.target_step
repeat_times = args.repeat_times
soft_update_tau = args.soft_update_tau
agent1.state = agent2.state = env.reset()
buffer = list()
num_episode = args.num_episode
# args.test_network = False
if args.test_network:
args.cwd = agent_name
ppo_act_save_path = f'{args.cwd}/actor_10.pth'
lmppo_act_save_path = f'{args.cwd}/actor_llm_1015.pth'
agent1.act.load_state_dict(torch.load(ppo_act_save_path))
agent2.act.load_state_dict(torch.load(lmppo_act_save_path))
print('parameters have been reload and test')
env.TRAIN = False
run_multiple_tests(env, args, agent1, agent2)

371
PPO_ramdom.py Normal file
View File

@ -0,0 +1,371 @@
import os
import pickle
from copy import deepcopy
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from environment import ESSEnv
from tools import get_episode_return, test_one_episode, optimization_base_result
class ActorPPO(nn.Module):
def __init__(self, mid_dim, state_dim, action_dim):
super().__init__()
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, action_dim))
# the logarithm (log) of standard deviation (std) of action, it is a trainable parameter
self.a_logstd = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True)
self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi))
def forward(self, state):
return self.net(state).tanh() # action.tanh() limit the data output of action
def get_action(self, state):
a_avg = self.forward(state) # too big for the action
a_std = self.a_logstd.exp()
noise = torch.randn_like(a_avg)
action = a_avg + noise * a_std
return action, noise
def get_logprob_entropy(self, state, action):
a_avg = self.forward(state)
a_std = self.a_logstd.exp()
delta = ((a_avg - action) / a_std).pow(2) * 0.5
logprob = -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1) # new_logprob
dist_entropy = (logprob.exp() * logprob).mean() # policy entropy
return logprob, dist_entropy
def get_old_logprob(self, _action, noise): # noise = action - a_noise
delta = noise.pow(2) * 0.5
return -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1) # old_logprob
class CriticAdv(nn.Module):
def __init__(self, mid_dim, state_dim, _action_dim):
super().__init__()
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, 1))
def forward(self, state):
return self.net(state) # Advantage value
def smooth_rewards(rewards, window=10):
rewards = rewards.unsqueeze(0).unsqueeze(0) # 将 rewards 转为 [1, 1, len] 的形状以适应 conv1d
kernel = torch.ones(1, 1, window, device=rewards.device) / window # 创建一个均匀的滑动平均核
smoothed_rewards = F.conv1d(rewards, kernel, padding='valid') # 滑动平均
smoothed_rewards = smoothed_rewards.squeeze(0).squeeze(0) # 去掉多余的维度
# 保持与原始奖励序列相同的长度,将前 window-1 个奖励保持不变
return torch.cat((rewards[0, 0, :window-1], smoothed_rewards))
class AgentPPO:
def __init__(self):
super().__init__()
self.state = None
self.device = None
self.action_dim = None
self.get_obj_critic = None
self.criterion = torch.nn.SmoothL1Loss()
self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
'''init modify'''
self.ClassCri = CriticAdv
self.ClassAct = ActorPPO
self.ratio_clip = 0.2 # ratio.clamp(1 - clip, 1 + clip)
self.lambda_entropy = 0.02 # could be 0.01~0.05
self.lambda_gae_adv = 0.98 # could be 0.95~0.99, (Generalized Advantage Estimation. ICLR.2016.)
self.get_reward_sum = None # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
self.trajectory_list = None
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=True, gpu_id=0):
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
self.trajectory_list = list()
# choose whether to use gae or not
self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
self.cri = self.ClassCri(net_dim, state_dim, action_dim).to(self.device)
self.act = self.ClassAct(net_dim, state_dim, action_dim).to(self.device) if self.ClassAct else self.cri
self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
def select_action(self, state):
# states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
# _, noises = self.act.get_action(states)
actions = np.random.uniform(env.action_space.low, env.action_space.high)
return actions, 0
def explore_env(self, env, target_step):
state = self.state # sent state to agent and then agent sent state to method
trajectory_temp = list()
last_done = 0
for i in range(target_step):
action, noise = self.select_action(state)
state, next_state, reward, done, = env.step(np.tanh(action))
trajectory_temp.append((state, reward, done, action, noise))
if done:
state = env.reset()
last_done = i
else:
state = next_state
self.state = state
'''splice list'''
# store 0 trajectory information to list
trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1]
self.trajectory_list = trajectory_temp[last_done:]
return trajectory_list
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
"""put data extract and update network together"""
with torch.no_grad():
buf_len = buffer[0].shape[0]
# decompose buffer data
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
'''get buf_r_sum, buf_logprob'''
bs = 4096
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
buf_value = torch.cat(buf_value, dim=0)
# 不需要获取logprob因为我们不再优化actor网络
buf_logprob = torch.zeros_like(buf_value) # 随机策略不会有有效的logprob
buf_r_sum, buf_advantage = self.get_reward_sum(buf_len, buf_reward, buf_mask, buf_value)
# normalize advantage
buf_advantage = (buf_advantage - buf_advantage.mean()) / (buf_advantage.std() + 1e-5)
buf_advantage = smooth_rewards(buf_advantage, window=10)
del buf_noise, buffer[:]
'''PPO: Surrogate objective of Trust Region'''
obj_critic = obj_actor = None
# 由于使用随机策略,更新网络的过程被跳过
# 如果没有需要更新的策略网络则只需要进行critic网络的简单优化
for _ in range(int(buf_len / batch_size * repeat_times)):
indices = torch.randint(buf_len, size=(batch_size,), requires_grad=False, device=self.device)
state = buf_state[indices]
action = buf_action[indices]
r_sum = buf_r_sum[indices]
logprob = buf_logprob[indices]
advantage = buf_advantage[indices]
# 只更新critic网络
value = self.cri(state).squeeze(1) # critic network预测状态的总奖励Q值
obj_critic = self.criterion(value, r_sum) # 计算value loss
self.optim_update(self.cri_optim, obj_critic) # 更新critic网络的参数
# 选择是否使用软更新
self.soft_update(self.cri_target, self.cri, soft_update_tau) if self.cri_target is not self.cri else None
a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1))
return obj_critic.item(), 0, a_std_log.mean().item() # 未更新actor网络因此obj_actor为0
def get_reward_sum_raw(self, buf_len, buf_reward, buf_mask, buf_value) -> (torch.Tensor, torch.Tensor):
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # reward sum
pre_r_sum = 0
for i in range(buf_len - 1, -1, -1):
buf_r_sum[i] = buf_reward[i] + buf_mask[i] * pre_r_sum
pre_r_sum = buf_r_sum[i]
buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
return buf_r_sum, buf_advantage
def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value) -> (torch.Tensor, torch.Tensor):
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # old policy value
buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device) # advantage value
pre_r_sum = 0.0
pre_advantage = 0.0 # advantage value of previous step
for i in range(buf_len - 1, -1, -1):
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
pre_r_sum = buf_r_sum[i]
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i])
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
return buf_r_sum, buf_advantage
@staticmethod
def optim_update(optimizer, objective):
optimizer.zero_grad()
objective.backward()
optimizer.step()
@staticmethod
def soft_update(target_net, current_net, tau):
for tar, cur in zip(target_net.parameters(), current_net.parameters()):
tar.data.copy_(cur.data.__mul__(tau) + tar.data.__mul__(1.0 - tau))
class Arguments:
def __init__(self, agent=None, env=None):
self.agent = agent # Deep Reinforcement Learning algorithm
self.env = env # the environment for training
self.cwd = None # current work directory. None means set automatically
self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
self.visible_gpu = '0' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
'''Arguments for training'''
self.num_episode = 1000 # to control the train episodes for PPO
self.gamma = 0.995 # discount factor of future rewards
self.learning_rate = 1e-4 # 1e-4 2 ** -14 2e-4
self.soft_update_tau = 2 ** -8 # 1e-3 2 ** -8
self.net_dim = 256 # the network width
self.batch_size = 4096 # num of transitions sampled from replay buffer.
self.repeat_times = 2 ** 5 # collect target_step, then update network
self.target_step = 4096 # repeatedly update network to keep critic's loss small
self.max_memo = self.target_step # capacity of replay buffer
self.if_gae_or_raw = True # GAE for on-policy sparse reward: Generalized Advantage Estimation.
'''Arguments for evaluate'''
self.random_seed = 1234
# self.random_seed_list = [1234, 2234, 3234, 4234, 5234]
self.random_seed_list = [1]
self.train = True
self.save_network = True
self.test_network = True
self.save_test_data = True
self.compare_with_gurobi = True
self.plot_on = True
def init_before_training(self):
if self.cwd is None:
agent_name = self.agent.__class__.__name__
self.cwd = f'./{agent_name}'
np.random.seed(self.random_seed)
torch.manual_seed(self.random_seed)
torch.set_num_threads(self.num_threads)
torch.set_default_dtype(torch.float32)
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
def update_buffer(_trajectory):
_trajectory = list(map(list, zip(*_trajectory))) # 2D-list transpose, here cut the trajectory into 5 parts
ten_state = torch.as_tensor(_trajectory[0]) # tensor state here
ten_reward = torch.as_tensor(_trajectory[1], dtype=torch.float32)
# _trajectory[2] = done, 将 done 替换为掩码,节省内存
ten_mask = (1.0 - torch.as_tensor(_trajectory[2], dtype=torch.float32)) * gamma
ten_action = torch.as_tensor(_trajectory[3])
ten_noise = torch.as_tensor(_trajectory[4], dtype=torch.float32)
buffer[:] = (ten_state, ten_action, ten_noise, ten_reward, ten_mask) # list store tensors
_steps = ten_reward.shape[0] # how many steps are collected in all trajectories
_r_exp = ten_reward.mean() # the mean reward
return _steps, _r_exp
if __name__ == '__main__':
args = Arguments()
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
args.visible_gpu = '0'
for seed in args.random_seed_list:
args.random_seed = seed
args.agent = AgentPPO()
agent_name = f'{args.agent.__class__.__name__}'
args.agent.cri_target = True
args.env = ESSEnv()
args.init_before_training()
'''init agent and environment'''
agent = args.agent
env = args.env
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
args.if_gae_or_raw)
cwd = args.cwd
gamma = args.gamma
batch_size = args.batch_size # how much data should be used to update net
target_step = args.target_step # how manysteps of one episode should stop
repeat_times = args.repeat_times # how many times should update for one batch size data
soft_update_tau = args.soft_update_tau
num_episode = args.num_episode
agent.state = env.reset()
'''init buffer'''
buffer = list()
'''init training parameters'''
# args.train = False
# args.save_network = False
# args.test_network = False
# args.save_test_data = False
# args.compare_with_gurobi = False
# args.plot_on = False
if args.train:
for i_episode in range(num_episode):
with torch.no_grad():
trajectory_list = agent.explore_env(env, target_step)
steps, r_exp = update_buffer(trajectory_list)
critic_loss, actor_loss, entropy_loss = agent.update_net(buffer, batch_size, repeat_times,
soft_update_tau)
loss_record['critic_loss'].append(critic_loss)
loss_record['actor_loss'].append(actor_loss)
loss_record['entropy_loss'].append(entropy_loss)
with torch.no_grad():
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
reward_record['mean_episode_reward'].append(episode_reward)
reward_record['unbalance'].append(episode_unbalance)
print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}')
act_save_path = f'{args.cwd}/actor_ram.pth'
loss_record_path = f'{args.cwd}/loss_ram.pkl'
reward_record_path = f'{args.cwd}/reward_ram.pkl'
if args.save_network:
with open(loss_record_path, 'wb') as tf:
pickle.dump(loss_record, tf)
with open(reward_record_path, 'wb') as tf:
pickle.dump(reward_record, tf)
torch.save(agent.act.state_dict(), act_save_path)
print('actor parameters have been saved')
if args.test_network:
args.cwd = agent_name
agent.act.load_state_dict(torch.load(act_save_path))
print('parameters have been reload and test')
record = test_one_episode(env, agent.act, agent.device)
eval_data = pd.DataFrame(record['system_info'])
eval_data.columns = ['time_step', 'price', 'load', 'action', 'real_action', 'soc', 'battery',
'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
if args.save_test_data:
test_data_save_path = f'{args.cwd}/test_ram.pkl'
with open(test_data_save_path, 'wb') as tf:
pickle.dump(record, tf)
'''compare with gurobi data and results'''
if args.compare_with_gurobi:
month = record['init_info'][0][0]
day = record['init_info'][0][1]
initial_soc = record['init_info'][0][3]
base_result = optimization_base_result(env, month, day, initial_soc)
if args.plot_on:
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
plot_args = PlotArgs()
plot_args.feature_change = 'ram'
args.cwd = agent_name
plot_dir = make_dir(args.cwd, plot_args.feature_change)
plot_optimization_result(base_result, plot_dir)
plot_evaluation_information(args.cwd + '/' + 'test_ram.pkl', plot_dir)
'''compare the different cost get from gurobi and PPO'''
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
print('rl_cost:', sum(eval_data['operation_cost']))
print('gurobi_cost:', sum(base_result['step_cost']))
print('ration:', ration)

130
SAC.py Normal file
View File

@ -0,0 +1,130 @@
import pickle
import pandas as pd
import torch
from agent import AgentSAC
from environment import ESSEnv
from tools import *
def update_buffer(_trajectory):
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
ary_other = torch.as_tensor([item[1] for item in _trajectory])
ary_other[:, 0] = ary_other[:, 0] # ten_reward
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
buffer.extend_buffer(ten_state, ary_other)
_steps = ten_state.shape[0]
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
return _steps, _r_exp
if __name__ == '__main__':
args = Arguments()
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
args.visible_gpu = '0'
for seed in args.random_seed_list:
args.random_seed = seed
args.agent = AgentSAC()
agent_name = f'{args.agent.__class__.__name__}'
args.agent.cri_target = True
args.env = ESSEnv()
args.init_before_training(if_main=True)
'''init agent and environment'''
agent = args.agent
env = args.env
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
args.if_per_or_gae)
'''init replay buffer'''
buffer = ReplayBuffer(max_len=args.max_memo, state_dim=env.state_space.shape[0],
action_dim=env.action_space.shape[0])
'''start training'''
cwd = args.cwd
gamma = args.gamma
batch_size = args.batch_size # how much data should be used to update net
target_step = args.target_step # how manysteps of one episode should stop
repeat_times = args.repeat_times # how many times should update for one batch size data
soft_update_tau = args.soft_update_tau
agent.state = env.reset()
'''collect data and train and update network'''
num_episode = args.num_episode
'''here record real unbalance'''
# args.train = False
# args.save_network = False
# args.test_network = False
# args.save_test_data = False
# args.compare_with_gurobi = False
if args.train:
collect_data = True
while collect_data:
print(f'buffer:{buffer.now_len}')
with torch.no_grad():
trajectory = agent.explore_env(env, target_step)
steps, r_exp = update_buffer(trajectory)
buffer.update_now_len()
if buffer.now_len >= 10000:
collect_data = False
for i_episode in range(num_episode):
critic_loss, actor_loss, entropy_loss = agent.update_net(buffer, batch_size, repeat_times,
soft_update_tau)
loss_record['critic_loss'].append(critic_loss)
loss_record['actor_loss'].append(actor_loss)
loss_record['entropy_loss'].append(entropy_loss)
with torch.no_grad():
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
reward_record['mean_episode_reward'].append(episode_reward)
reward_record['unbalance'].append(episode_unbalance)
print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance:{episode_unbalance}, buffer_length: {buffer.now_len}')
if i_episode % 10 == 0:
# target_step
with torch.no_grad():
trajectory = agent.explore_env(env, target_step)
steps, r_exp = update_buffer(trajectory)
act_save_path = f'{args.cwd}/actor.pth'
loss_record_path = f'{args.cwd}/loss_data.pkl'
reward_record_path = f'{args.cwd}/reward_data.pkl'
if args.save_network:
with open(loss_record_path, 'wb') as tf:
pickle.dump(loss_record, tf)
with open(reward_record_path, 'wb') as tf:
pickle.dump(reward_record, tf)
torch.save(agent.act.state_dict(), act_save_path)
print('actor parameters have been saved')
if args.test_network:
args.cwd = agent_name
agent.act.load_state_dict(torch.load(act_save_path))
print('parameters have been reload and test')
record = test_one_episode(env, agent.act, agent.device)
eval_data = pd.DataFrame(record['system_info'])
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery',
'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
if args.save_test_data:
test_data_save_path = f'{args.cwd}/test_data.pkl'
with open(test_data_save_path, 'wb') as tf:
pickle.dump(record, tf)
'''compare with gurobi data and results'''
if args.compare_with_gurobi:
month = record['init_info'][0][0]
day = record['init_info'][0][1]
initial_soc = record['init_info'][0][3]
base_result = optimization_base_result(env, month, day, initial_soc)
if args.plot_on:
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
plot_args = PlotArgs()
plot_args.feature_change = '10'
args.cwd = agent_name
plot_dir = make_dir(args.cwd, plot_args.feature_change)
plot_optimization_result(base_result, plot_dir)
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
'''compare the different cost get from gurobi and SAC'''
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
print('rl_cost:', sum(eval_data['operation_cost']))
print('gurobi_cost:', sum(base_result['step_cost']))
print('ration:', ration)

127
TD3.py Normal file
View File

@ -0,0 +1,127 @@
import pickle
import pandas as pd
import torch
from agent import AgentTD3
from environment import ESSEnv
from tools import *
def update_buffer(_trajectory):
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
ary_other = torch.as_tensor([item[1] for item in _trajectory])
ary_other[:, 0] = ary_other[:, 0] # ten_reward
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
buffer.extend_buffer(ten_state, ary_other)
_steps = ten_state.shape[0]
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
return _steps, _r_exp
if __name__ == '__main__':
args = Arguments()
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
args.visible_gpu = '0'
for seed in args.random_seed_list:
args.random_seed = seed
args.agent = AgentTD3()
agent_name = f'{args.agent.__class__.__name__}'
args.agent.cri_target = True
args.env = ESSEnv()
args.init_before_training(if_main=True)
'''init agent and environment'''
agent = args.agent
env = args.env
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
args.if_per_or_gae)
'''init replay buffer'''
buffer = ReplayBuffer(max_len=args.max_memo, state_dim=env.state_space.shape[0],
action_dim=env.action_space.shape[0])
'''start training'''
cwd = args.cwd
gamma = args.gamma
batch_size = args.batch_size # how much data should be used to update net
target_step = args.target_step # how manysteps of one episode should stop
repeat_times = args.repeat_times # how many times should update for one batch size data
soft_update_tau = args.soft_update_tau
agent.state = env.reset()
'''collect data and train and update network'''
num_episode = args.num_episode
# args.train = False
# args.save_network = False
# args.test_network = False
# args.save_test_data = False
# args.compare_with_gurobi = False
if args.train:
collect_data = True
while collect_data:
print(f'buffer:{buffer.now_len}')
with torch.no_grad():
trajectory = agent.explore_env(env, target_step)
steps, r_exp = update_buffer(trajectory)
buffer.update_now_len()
if buffer.now_len >= 10000:
collect_data = False
for i_episode in range(num_episode):
critic_loss, actor_loss = agent.update_net(buffer, batch_size, repeat_times, soft_update_tau)
loss_record['critic_loss'].append(critic_loss)
loss_record['actor_loss'].append(actor_loss)
with torch.no_grad():
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
reward_record['mean_episode_reward'].append(episode_reward)
reward_record['unbalance'].append(episode_unbalance)
print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}, buffer_length: {buffer.now_len}')
if i_episode % 10 == 0:
# target_step
with torch.no_grad():
trajectory = agent.explore_env(env, target_step)
steps, r_exp = update_buffer(trajectory)
loss_record_path = f'{args.cwd}/loss_data.pkl'
reward_record_path = f'{args.cwd}/reward_data.pkl'
act_save_path = f'{args.cwd}/actor.pth'
if args.save_network:
with open(loss_record_path, 'wb') as tf:
pickle.dump(loss_record, tf)
with open(reward_record_path, 'wb') as tf:
pickle.dump(reward_record, tf)
torch.save(agent.act.state_dict(), act_save_path)
print('actor parameters have been saved')
if args.test_network:
args.cwd = agent_name
agent.act.load_state_dict(torch.load(act_save_path))
print('parameters have been reload and test')
record = test_one_episode(env, agent.act, agent.device)
eval_data = pd.DataFrame(record['system_info'])
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery',
'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
if args.save_test_data:
test_data_save_path = f'{args.cwd}/test_data.pkl'
with open(test_data_save_path, 'wb') as tf:
pickle.dump(record, tf)
'''compare with gurobi data and results'''
if args.compare_with_gurobi:
month = record['init_info'][0][0]
day = record['init_info'][0][1]
initial_soc = record['init_info'][0][3]
base_result = optimization_base_result(env, month, day, initial_soc)
if args.plot_on:
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
plot_args = PlotArgs()
plot_args.feature_change = '10'
args.cwd = agent_name
plot_dir = make_dir(args.cwd, plot_args.feature_change)
plot_optimization_result(base_result, plot_dir)
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
'''compare the different cost get from gurobi and TD3'''
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
print('rl_cost:', sum(eval_data['operation_cost']))
print('gurobi_cost:', sum(base_result['step_cost']))
print('ration:', ration)

313
agent.py Normal file
View File

@ -0,0 +1,313 @@
import numpy.random as rd
import os
import json
from copy import deepcopy
from net import *
class AgentBase:
def __init__(self):
self.state = None
self.device = None
self.action_dim = None
self.if_off_policy = None
self.explore_noise = None
self.trajectory_list = None
self.criterion = torch.nn.SmoothL1Loss()
self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, _if_per_or_gae=False, gpu_id=0):
# 显式调用self.init()进行多进程
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
self.action_dim = action_dim
self.cri = self.ClassCri(net_dim, state_dim, action_dim).to(self.device)
self.act = self.ClassAct(net_dim, state_dim, action_dim).to(self.device) if self.ClassAct else self.cri
self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
del self.ClassCri, self.ClassAct
def select_action(self, state) -> np.ndarray:
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
action = self.act(states)[0]
action = (action + torch.randn_like(action) * self.explore_noise).clamp(-1, 1)
return action.detach().cpu().numpy()
def explore_env(self, env, target_step):
state = self.state
trajectory = list()
for _ in range(target_step):
action = self.select_action(state)
state, next_state, reward, done, = env.step(action)
trajectory.append((state, (reward, done, *action)))
state = env.reset() if done else next_state
self.state = state
return trajectory
@staticmethod
def optim_update(optimizer, objective):
optimizer.zero_grad()
objective.backward()
optimizer.step()
@staticmethod
def soft_update(target_net, current_net, tau):
for tar, cur in zip(target_net.parameters(), current_net.parameters()):
tar.data.copy_(cur.data * tau + tar.data * (1.0 - tau))
def save_or_load_agent(self, cwd, if_save):
def load_torch_file(model_or_optim, _path):
state_dict = torch.load(_path, map_location=lambda storage, loc: storage)
model_or_optim.load_state_dict(state_dict)
name_obj_list = [('actor', self.act), ('act_target', self.act_target), ('act_optim', self.act_optim),
('critic', self.cri), ('cri_target', self.cri_target), ('cri_optim', self.cri_optim), ]
name_obj_list = [(name, obj) for name, obj in name_obj_list if obj is not None]
if if_save:
for name, obj in name_obj_list:
save_path = f"{cwd}/{name}.pth"
torch.save(obj.state_dict(), save_path)
else:
for name, obj in name_obj_list:
save_path = f"{cwd}/{name}.pth"
load_torch_file(obj, save_path) if os.path.isfile(save_path) else None
class AgentDDPG(AgentBase):
def __init__(self):
super().__init__()
self.explore_noise = 0.1
self.if_use_cri_target = self.if_use_act_target = True
self.ClassCri = Critic
self.ClassAct = Actor
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau) -> (float, float):
buffer.update_now_len()
obj_critic = obj_actor = None
for _ in range(int(buffer.now_len / batch_size * repeat_times)):
obj_critic, state = self.get_obj_critic(buffer, batch_size) # critic loss
self.optim_update(self.cri_optim, obj_critic)
self.soft_update(self.cri_target, self.cri, soft_update_tau)
action_pg = self.act(state) # policy gradient
obj_actor = -self.cri(state, action_pg).mean() # actor loss, makes it bigger
self.optim_update(self.act_optim, obj_actor)
self.soft_update(self.act_target, self.act, soft_update_tau)
return obj_actor.item(), obj_critic.item()
def get_obj_critic(self, buffer, batch_size) -> (torch.Tensor, torch.Tensor):
with torch.no_grad():
reward, mask, action, state, next_s = buffer.sample_batch(batch_size)
next_q = self.cri_target(next_s, self.act_target(next_s))
q_label = reward + mask * next_q
q_value = self.cri(state, action)
obj_critic = self.criterion(q_value, q_label)
return obj_critic, state
class AgentTD3(AgentBase):
def __init__(self):
super().__init__()
self.explore_noise = 0.1 # standard deviation of exploration noise
self.policy_noise = 0.2 # standard deviation of policy noise
self.update_freq = 2 # delay update frequency
self.if_use_cri_target = self.if_use_act_target = True
self.ClassCri = CriticTwin
self.ClassAct = Actor
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau) -> tuple:
buffer.update_now_len()
obj_critic = obj_actor = None
for update_c in range(int(buffer.now_len / batch_size * repeat_times)):
obj_critic, state = self.get_obj_critic(buffer, batch_size)
self.optim_update(self.cri_optim, obj_critic)
action_pg = self.act(state) # policy gradient
obj_actor = -self.cri_target(state, action_pg).mean() # use cri_target instead of cri for stable training
self.optim_update(self.act_optim, obj_actor)
if update_c % self.update_freq == 0: # delay update
self.soft_update(self.cri_target, self.cri, soft_update_tau)
self.soft_update(self.act_target, self.act, soft_update_tau)
return obj_critic.item() / 2, obj_actor.item()
def get_obj_critic(self, buffer, batch_size) -> (torch.Tensor, torch.Tensor):
with torch.no_grad():
reward, mask, action, state, next_s = buffer.sample_batch(batch_size)
next_a = self.act_target.get_action(next_s, self.policy_noise) # policy noise
next_q = torch.min(*self.cri_target.get_q1_q2(next_s, next_a)) # twin critics
q_label = reward + mask * next_q
q1, q2 = self.cri.get_q1_q2(state, action)
obj_critic = self.criterion(q1, q_label) + self.criterion(q2, q_label) # twin critics
return obj_critic, state
class AgentSAC(AgentBase):
def __init__(self):
super().__init__()
self.ClassCri = CriticTwin
self.ClassAct = ActorSAC
self.if_use_cri_target = True
self.if_use_act_target = False
self.alpha_log = None
self.alpha_optim = None
self.target_entropy = None
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, _if_use_per=False, gpu_id=0, env_num=1):
super().init(net_dim, state_dim, action_dim, learning_rate, _if_use_per, gpu_id)
self.alpha_log = torch.tensor((-np.log(action_dim) * np.e,), dtype=torch.float32,
requires_grad=True, device=self.device)
self.alpha_optim = torch.optim.Adam((self.alpha_log,), lr=learning_rate)
self.target_entropy = np.log(action_dim)
def select_action(self, state):
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
actions = self.act.get_action(states)
return actions.detach().cpu().numpy()[0]
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
buffer.update_now_len()
alpha = self.alpha_log.exp().detach()
obj_critic = obj_actor = None
for _ in range(int(buffer.now_len * repeat_times / batch_size)):
'''objective of critic (loss function of critic)'''
with torch.no_grad():
reward, mask, action, state, next_s = buffer.sample_batch(batch_size)
next_a, next_log_prob = self.act_target.get_action_logprob(next_s)
next_q = torch.min(*self.cri_target.get_q1_q2(next_s, next_a))
q_label = reward + mask * (next_q + next_log_prob * alpha)
q1, q2 = self.cri.get_q1_q2(state, action)
obj_critic = self.criterion(q1, q_label) + self.criterion(q2, q_label)
self.optim_update(self.cri_optim, obj_critic)
self.soft_update(self.cri_target, self.cri, soft_update_tau)
'''objective of alpha (temperature parameter automatic adjustment)'''
action_pg, log_prob = self.act.get_action_logprob(state) # policy gradient
obj_alpha = (self.alpha_log * (log_prob - self.target_entropy).detach()).mean()
self.optim_update(self.alpha_optim, obj_alpha)
'''objective of actor'''
alpha = self.alpha_log.exp().detach()
with torch.no_grad():
self.alpha_log[:] = self.alpha_log.clamp(-20, 2)
obj_actor = -(torch.min(*self.cri_target.get_q1_q2(state, action_pg)) + log_prob * alpha).mean()
self.optim_update(self.act_optim, obj_actor)
self.soft_update(self.act_target, self.act, soft_update_tau)
return obj_critic.item(), obj_actor.item(), alpha.item()
class AgentPPO(AgentBase):
def __init__(self):
super().__init__()
self.ClassCri = CriticAdv
self.ClassAct = ActorPPO
self.if_off_policy = False
self.ratio_clip = 0.2 # ratio.clamp(1 - clip, 1 + clip)
self.lambda_entropy = 0.02 # could be 0.01~0.05
self.lambda_gae_adv = 0.98 # could be 0.95~0.99, GAE (Generalized Advantage Estimation. ICLR.2016.)
self.get_reward_sum = None # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=False, gpu_id=0, env_num=1):
super().init(net_dim, state_dim, action_dim, learning_rate, if_use_gae, gpu_id)
self.trajectory_list = list()
self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
def select_action(self, state):
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
actions, noises = self.act.get_action(states)
return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
def explore_env(self, env, target_step):
state = self.state
trajectory_temp = list()
last_done = 0
for i in range(target_step):
action, noise = self.select_action(state)
next_state, reward, done, _ = env.step(np.tanh(action))
trajectory_temp.append((state, reward, done, action, noise))
if done:
state = env.reset()
last_done = i
else:
state = next_state
self.state = state
'''splice list'''
trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1]
self.trajectory_list = trajectory_temp[last_done:]
return trajectory_list
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
with torch.no_grad():
buf_len = buffer[0].shape[0]
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
'''get buf_r_sum, buf_logprob'''
bs = 2 ** 10
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
buf_value = torch.cat(buf_value, dim=0)
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
buf_r_sum, buf_advantage = self.get_reward_sum(buf_len, buf_reward, buf_mask, buf_value) # detach()
buf_advantage = (buf_advantage - buf_advantage.mean()) / (buf_advantage.std() + 1e-5)
del buf_noise, buffer[:]
'''PPO: Surrogate objective of Trust Region'''
obj_critic = obj_actor = None
for _ in range(int(buf_len / batch_size * repeat_times)):
indices = torch.randint(buf_len, size=(batch_size,), requires_grad=False, device=self.device)
state = buf_state[indices]
action = buf_action[indices]
r_sum = buf_r_sum[indices]
logprob = buf_logprob[indices]
advantage = buf_advantage[indices]
new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action) # it's obj_actor
ratio = (new_logprob - logprob.detach()).exp()
surrogate1 = advantage * ratio
surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
obj_surrogate = -torch.min(surrogate1, surrogate2).mean()
obj_actor = obj_surrogate + obj_entropy * self.lambda_entropy
self.optim_update(self.act_optim, obj_actor)
value = self.cri(state).squeeze(1) # critic network predicts the reward_sum (Q value) of state
obj_critic = self.criterion(value, r_sum) / (r_sum.std() + 1e-6)
self.optim_update(self.cri_optim, obj_critic)
self.soft_update(self.cri_target, self.cri, soft_update_tau) if self.cri_target is not self.cri else None
a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1))
return obj_critic.item(), obj_actor.item(), a_std_log.mean().item() # logging_tuple
def get_reward_sum_raw(self, buf_len, buf_reward, buf_mask, buf_value) -> (torch.Tensor, torch.Tensor):
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # reward sum
pre_r_sum = 0
for i in range(buf_len - 1, -1, -1):
buf_r_sum[i] = buf_reward[i] + buf_mask[i] * pre_r_sum
pre_r_sum = buf_r_sum[i]
buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
return buf_r_sum, buf_advantage
def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value):
"""tensor, tensor """
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # old policy value
buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device) # advantage value
pre_r_sum = 0
pre_advantage = 0 # advantage value of previous step
for i in range(buf_len - 1, -1, -1):
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
pre_r_sum = buf_r_sum[i]
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i])
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
return buf_r_sum, buf_advantage

BIN
compare.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.0 MiB

8761
data/houseload.csv Normal file

File diff suppressed because it is too large Load Diff

8761
data/irradiance.csv Normal file

File diff suppressed because it is too large Load Diff

8761
data/prices.csv Normal file

File diff suppressed because it is too large Load Diff

61322
data/results.json Normal file

File diff suppressed because it is too large Load Diff

61322
data/results_day.json Normal file

File diff suppressed because it is too large Load Diff

8761
data/temper.csv Normal file

File diff suppressed because it is too large Load Diff

8761
data/wind.csv Normal file

File diff suppressed because it is too large Load Diff

70
data_manager.py Normal file
View File

@ -0,0 +1,70 @@
class Constant:
MONTHS_LEN = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
class DataManager:
def __init__(self) -> None:
self.Prices = []
self.Load_Consumption = []
self.Temperature = []
self.Irradiance = []
self.Wind = []
self.LLM = []
def add_price_element(self, element): self.Prices.append(element)
def add_load_element(self, element): self.Load_Consumption.append(element)
def add_temperature_element(self, element): self.Temperature.append(element)
def add_irradiance_element(self, element): self.Irradiance.append(element)
def add_wind_element(self, element): self.Wind.append(element)
def add_llm_element(self, element): self.LLM.append(element)
# get current time data based on given month day, and day_time
def get_price_data(self, month, day, day_time):
return self.Prices[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
def get_load_cons_data(self, month, day, day_time):
return self.Load_Consumption[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
def get_temperature_data(self, month, day, day_time):
return self.Temperature[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
def get_irradiance_data(self, month, day, day_time):
return self.Irradiance[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
def get_wind_data(self, month, day, day_time):
return self.Wind[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
def get_llm_data(self, month, day, day_time):
index = (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time
# print(f"Index: {index}, LLM length: {len(self.LLM)}, Month: {month}, Day: {day}, Day_time: {day_time}")
return self.LLM[index]
# get series data for one episode
def get_series_price_data(self, month, day):
return self.Prices[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
def get_series_load_cons_data(self, month, day):
return self.Load_Consumption[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
def get_series_temperature_data(self, month, day):
return self.Temperature[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
def get_series_irradiance_data(self, month, day):
return self.Irradiance[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
def get_series_wind_data(self, month, day):
return self.Wind[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
# def get_series_llm_data(self, month, day):
# return self.LLM[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
# (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]

164
environment.py Normal file
View File

@ -0,0 +1,164 @@
import gym
import pandas as pd
from data_manager import *
from module import *
from parameters import *
class ESSEnv(gym.Env):
def __init__(self, **kwargs):
super(ESSEnv, self).__init__()
self.excess = None
self.shedding = None
self.unbalance = None
self.real_unbalance = None
self.operation_cost = None
self.current_output = None
self.final_step_outputs = None
self.data_manager = DataManager()
self._load_year_data()
self.month = 1
self.day = 1
self.TRAIN = True
self.current_time = None
self.episode_length = 24
self.penalty_coefficient = 50 # 约束惩罚系数
self.sell_coefficient = 0.1 # 售出利润系数
self.battery_parameters = kwargs.get('battery_parameters', battery_parameters)
self.dg_parameters = kwargs.get('dg_parameters', dg_parameters)
self.solar_parameters = kwargs.get('solar_parameters', solar_parameters)
self.wind_parameters = kwargs.get('wind_parameters', wind_parameters)
self.grid = Grid()
self.battery = Battery(self.battery_parameters)
self.dg1 = DG(self.dg_parameters['gen_1'])
self.dg2 = DG(self.dg_parameters['gen_2'])
self.dg3 = DG(self.dg_parameters['gen_3'])
self.solar = Solar(self.solar_parameters)
self.wind = Wind(self.wind_parameters)
self.action_space = gym.spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float32) # 已增加调节电压动作
self.state_space = gym.spaces.Box(low=0, high=1, shape=(10,), dtype=np.float32)
def reset(self, *args):
self.month = np.random.randint(1, 13) # choose 12 month
if self.TRAIN:
self.day = np.random.randint(1, 20)
else:
self.day = np.random.randint(20, Constant.MONTHS_LEN[self.month - 1])
self.current_time = 0
self.battery.reset()
self.dg1.reset()
self.dg2.reset()
self.dg3.reset()
self.solar.reset()
self.wind.reset()
return self._build_state()
def _build_state(self):
soc = self.battery.SOC()
dg1_output = self.dg1.current_output
dg2_output = self.dg2.current_output
dg3_output = self.dg3.current_output
time_step = self.current_time
price = self.data_manager.get_price_data(self.month, self.day, self.current_time)
houseload = self.data_manager.get_load_cons_data(self.month, self.day, self.current_time)
temperature = self.data_manager.get_temperature_data(self.month, self.day, self.current_time)
irradiance = self.data_manager.get_irradiance_data(self.month, self.day, self.current_time)
windspeed = self.data_manager.get_wind_data(self.month, self.day, self.current_time)
wind_gen = self.wind.step(windspeed)
netload = houseload - wind_gen
obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(soc), np.float32(netload),
np.float32(dg1_output), np.float32(dg2_output), np.float32(dg3_output),
np.float32(temperature), np.float32(irradiance), np.float32(windspeed)), axis=None)
return obs
def step(self, action): # state transition: current_obs->take_action->get_reward->get_finish->next_obs
# 在每个组件中添加动作
current_obs = self._build_state()
temperature = current_obs[7]
irradiance = current_obs[8]
self.wind.current_power = current_obs[9]
self.battery.step(action[0]) # 执行状态转换,电池当前容量也改变
self.dg1.step(action[1])
self.dg2.step(action[2])
self.dg3.step(action[3])
self.solar.step(temperature, irradiance, action[4])
self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
-self.battery.energy_change, self.solar.current_power, self.wind.current_power))
actual_production = sum(self.current_output)
price = current_obs[1]
netload = current_obs[3] - self.solar.output_change
unbalance = actual_production - netload
# reward = 0.0
excess_penalty = 0
deficient_penalty = 0
sell_benefit, buy_cost = 0, 0
self.excess, self.shedding = 0, 0
if unbalance >= 0: # 过剩
if unbalance <= self.grid.exchange_ability:
sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient
else:
sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient
# real unbalance超电网限值
self.excess = unbalance - self.grid.exchange_ability
excess_penalty = self.excess * self.penalty_coefficient
else: # unbalance <0, 缺少惩罚
if abs(unbalance) <= self.grid.exchange_ability:
buy_cost = self.grid.get_cost(price, abs(unbalance))
else:
buy_cost = self.grid.get_cost(price, self.grid.exchange_ability)
self.shedding = abs(unbalance) - self.grid.exchange_ability
deficient_penalty = self.shedding * self.penalty_coefficient
battery_cost = self.battery.get_cost(self.battery.energy_change)
dg1_cost = self.dg1.get_cost(self.dg1.current_output)
dg2_cost = self.dg2.get_cost(self.dg2.current_output)
dg3_cost = self.dg3.get_cost(self.dg3.current_output)
solar_cost = self.solar.get_cost(self.solar.current_power)
wind_cost = self.wind.gen_cost(self.wind.current_power)
self.operation_cost = (battery_cost + dg1_cost + dg2_cost + dg3_cost + solar_cost + wind_cost
+ excess_penalty + deficient_penalty - sell_benefit + buy_cost)
reward = - self.operation_cost / 1e3
self.unbalance = unbalance
self.real_unbalance = self.shedding + self.excess
final_step_outputs = [self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
self.battery.current_capacity, self.solar.current_power, self.wind.current_power]
self.current_time += 1
finish = (self.current_time == self.episode_length)
if finish:
self.final_step_outputs = final_step_outputs
self.current_time = 0
next_obs = self.reset()
else:
next_obs = self._build_state()
return current_obs, next_obs, float(reward), finish
def _load_year_data(self):
price_df = pd.read_csv('data/prices.csv', sep=',')
load_df = pd.read_csv('data/houseload.csv', sep=',')
irradiance_df = pd.read_csv('data/irradiance.csv', sep=',')
temperature_df = pd.read_csv('data/temper.csv', sep=',')
wind_df = pd.read_csv('data/wind.csv', sep=',')
price = price_df['price'].to_numpy(dtype=float)
load = load_df['houseload'].to_numpy(dtype=float)
irradiance = irradiance_df['irradiance'].to_numpy(dtype=float)
temperature = temperature_df['t2m'].to_numpy(dtype=float)
wind = wind_df['wind_speed'].to_numpy(dtype=float)
'''重新设计价格和发电量以及需求的大小'''
def process_elements(elements, transform_function, add_function):
for element in elements:
transformed_element = transform_function(element)
add_function(transformed_element)
process_elements(price, lambda x: max(x, 0.5), self.data_manager.add_price_element)
process_elements(load, lambda x: x * 3, self.data_manager.add_load_element)
process_elements(irradiance, lambda x: x, self.data_manager.add_irradiance_element)
process_elements(temperature, lambda x: x, self.data_manager.add_temperature_element)
process_elements(wind, lambda x: x, self.data_manager.add_wind_element)

40
excel.py Normal file
View File

@ -0,0 +1,40 @@
import pandas as pd
from fuzzywuzzy import fuzz
# 读取两个Excel文件
df1 = pd.read_excel(r'C:\Users\97532\Desktop\陕西、福建_陈晓东\福建省.xlsx')
df2 = pd.read_excel(r'C:\Users\97532\Desktop\陕西、福建_陈晓东\福建省_许可.xlsx')
# 打印df2的列名,确保列名正确
print("原始列名:", df2.columns)
df2.columns = df2.columns.str.strip()
# 确保列数据类型为字符串
df1['电厂名称'] = df1['电厂名称'].astype(str)
df2['电厂名称/项目名称'] = df2['电厂名称/项目名称'].astype(str)
df1['发电装机容量'] = df1['发电装机容量'].astype(float)
df2['总装机容量'] = df2['总装机容量'].astype(float)
# 定义一个函数来进行模糊匹配
def fuzzy_match(row, df2):
best_match = None
highest_score = 0
for index2, row2 in df2.iterrows():
score = fuzz.partial_ratio(row['电厂名称'], row2['电厂名称/项目名称'])
if score > highest_score and abs(row['发电装机容量'] - row2['总装机容量']) < 0.01:
highest_score = score
best_match = row2
return best_match
# 进行模糊匹配并填充数据
for index1, row1 in df1.iterrows():
match_row = fuzzy_match(row1, df2)
if match_row is not None:
df1.at[index1, '单位名称'] = match_row['单位名称']
df1.at[index1, '发电许可证编号'] = match_row['发电许可证编号']
df1.at[index1, '电厂名称/项目名称'] = match_row['电厂名称/项目名称']
df1.at[index1, '机组类型'] = match_row['机组类型']
# 将结果保存回Excel文件
df1.to_excel(r'C:\Users\97532\Desktop\陕西、福建_陈晓东\福建省_填充后_模糊匹配.xlsx', index=False)
print("数据填充已保存")

View File

@ -0,0 +1,35 @@
import pandas as pd
import numpy as np
import os
from nltk import word_tokenize
def func(i, j):
directory_path = "//data/residential"
entries = os.listdir(directory_path)[i:j]
folders = [entry for entry in entries if os.path.join(directory_path, entry)]
entries = [os.path.join(directory_path, folder) for folder in folders]
combined_data = pd.DataFrame()
for file_path in entries:
current_data = pd.read_csv(file_path, encoding='utf-8')
tokenize_col = current_data.columns
tokenized_sentences_list = [word_tokenize(text) for text in tokenize_col]
occurrences_electricity = np.array(
[(i, j) for i, sublist in enumerate(tokenized_sentences_list) for j, word in enumerate(sublist) if
word == 'Electricity'])[:, 0]
Electricity_consumption_wind_solar = current_data[tokenize_col[occurrences_electricity]].sum(axis=1)
current_data = Electricity_consumption_wind_solar.to_frame("Household_load")
combined_data = pd.concat([combined_data, current_data], axis=1)
Series_type = combined_data.sum(axis=1)
Series_type.to_csv('houseload.csv', index=False)
total_energy = np.array(Series_type).ravel()
return total_energy
if __name__ == '__main__':
func(0, 38)

50
llm.txt Normal file
View File

@ -0,0 +1,50 @@
目前的能源系统结构主要由太阳能发电站、风能发电站、发电机和电池模块通过电力网与住宅相连接。以下是中文和符号的对照:
As电池的充电状态动作Ag1发电机1的功率动作Ag2发电机2的功率动作Ag3发电机3的功率动作Av太阳能的电压动作
Sp电价状态Sc电池荷电状态Sl住宅负载状态Sg1发电机1功率状态Sg2发电机2功率状态Sg3发电机3功率状态St温度状态Si太阳辐射度状态Sw风速状态
Pc电池充放电功率Pg1发电机1功率Pg2发电机2功率Pg3发电机3功率Pso太阳能功率Pw风能功率
Cb电池成本Cg1发电机1成本Cg2发电机2成本Cg3发电机3成本Cs太阳能成本Cw风能成本Rs销售收入Cp购买成本Ee过剩能源Es短缺能源Pe过剩惩罚Ps短缺惩罚。
一、控制方案和目标:
1、优先使用太阳能和风能发电当无法满足满负荷时请使用发电机和电池供电。
2、在供电时考虑多种供电来源太阳能、风能、发电机和电池综合每个发电模块的成本需满足下一时刻的住宅用电负载且使供需差异最小化。
3、减少超过公网交易限额的交易以避免公共电网波动影响住宅用电的稳定性。
4、最大化奖励函数并最小化能源不平衡。
二、连续状态空间是一个长度为9的向量包含以下部分
Sp由数据提供Sc初始为0.4始终介于0.2和0.8Sl由数据提供
Sg1初始功率为0Sg2初始功率为0Sg3初始功率为0
St由数据提供Si由数据提供Sw由数据提供。
其中ScSg1、Sg2和Sg3始终不小于0。
三、连续动作空间一个长度为5的向量x表示当前时间的取值, 其范围从-1到1每个时刻的值都是在上一时刻基础上进行变化的Ag1、Ag2和Ag3分别表示相对于各自最大瞬时功率100、100和200的变化值。
1、Ac电池总容量为500初始电池容量为500*0.4=200电池容量变化100*x
2、Ag1发电量变化100*x
3、Ag2发电量变化100*x
4、Ag3发电量变化200*x
5、Av太阳能电压变为(1+x)倍。
可根据需求通过改变功率控制发电机的开关当关闭时保持Sg=0当打开时需满足Pg约束。根据成本通常前一台发电机的最大功率不足以满足剩余负荷时才开启后续发电机。
四、发电量计算公式
1、Pc = max(0.2, min(0.8, Sc + 0.2 * action[0] * 0.9))其中0.8和0.2为电池荷电状态上下限;
2、Pg1 = max(0, min(150, 100 * action[1] + Sg1))其中150和0为发电机1开启时发电量上下限
3、Pg2 = max(0, min(375, 100 * action[2] + Sg2))其中375和0为发电机2开启时发电量上下限
4、Pg3 = max(0, min(500, 200 * action[3] + Sg3))其中500和0为发电机2开启时发电量上下限
5、Pso = max(0, (0.2 * Si + 0.05 * St - 9.25) * (1 + action[4]))其中太阳能发电功率不小于0。
6、Pw与Ws有关
if 3 <= Ws < 8, Pw = Ws^3 * 172.2625 / 1000
elif 8 <= Ws < 12, Pw = 64 * 172.2625 / 125
else Pw = 0
五、成本和能源平衡计算
P = Pc + Pg1 + Pg2 + Pg3 + Pso + Pw
if P >= Sl , Ee = P Sl, else Es = Sl - P
1、Cb = 0.01 * Asoc + 0.1 * Sc
2、Cg1 = 0.0034 * Pg1^2 + 3 * Pg1 + 30
3、Cg2 = 0.001 * Pg2^2 + 10 * Pg2 + 40
4、Cg3 = 0.001 * Pg3^2 + 15 * Pg3 + 70
5、Cs = 0.01 * Pso
6、Cw = 0.01 * Pw
7、Rs = 0.5 * Sp * Ee
8、Cp = Sp * Es
9、Pe = (Ee - 100) * 50
10、Ps = (Es - 100) * 50
因为Rs、Pe和Cp、Ps对应能源过剩和能源短缺情况只能同时存在一组。
六、奖励函数
Reward = - (Cb + Cg1 + Cg2 + Cg3 + Cs + Cw + Pe + Ps - Rs + Cp) / 1000
七、请求动作数据
读取数据并使用上述公式计算每个时刻的状态请你使用最合适的优化方法给出每个时刻的动作控制方案每小时计算一次并生成一个长度为5的数组表示Ac、Ag1、Ag2、Ag3和Av的动作决策。返回的数据应为以下Json格式[{x1, x2, x3, x4, x5}, {x1, x2, x3, x4, x5}, ..., {x1, x2, x3, x4, x5}]

73
llm_connect.py Normal file
View File

@ -0,0 +1,73 @@
import pandas as pd
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_openai.chat_models import ChatOpenAI
def get_session_history(session_id: str) -> BaseChatMessageHistory:
if session_id not in store:
store[session_id] = ChatMessageHistory()
return store[session_id]
def read_txt_file(file_path: str) -> str:
with open(file_path, "r", encoding='ISO-8859-1') as file:
content = file.read()
return content
def read_csv_file(file_path: str) -> pd.DataFrame:
return pd.read_csv(file_path)
def format_csv_data(df: pd.DataFrame, start: int, end: int) -> str:
formatted_data = []
for index, row in df.iloc[start:end].iterrows():
formatted_data.append(
f"时刻{index + 1}: Sp={row['price']}, Sl={row['load']},"
f"St={row['temperature']}, Si={row['irradiance']}, Sw={row['wind_speed']}"
)
return "\n".join(formatted_data)
system_content = read_txt_file('./llm.txt')
df = read_csv_file('./data.csv')
llm = ChatOpenAI(
streaming=True,
verbose=True,
openai_api_key="none",
# openai_api_base="http://0.0.0.0:5049/v1/models",
openai_api_base="http://0.0.0.0:8501",
model_name="Qwen1.5-32b"
)
prompt = ChatPromptTemplate.from_messages(
[
("system", system_content,),
MessagesPlaceholder(variable_name="history"),
("human", "{input}"),
]
)
runnable = prompt | llm
store = {}
with_message_history = RunnableWithMessageHistory(
runnable,
get_session_history,
input_messages_key="input",
history_messages_key="history",
)
num_hours = len(df)
for i in range(num_hours):
start = i
end = start + 1
csv_data_chunk = format_csv_data(df, start, end)
result = with_message_history.invoke(
{"input": f"数据如下:\n{csv_data_chunk}"},
config={
"configurable": {"session_id": "cxd"}
},
)
print(f"Hour {i + 1}:\n{result.content}\n")

122
llma/Reflexion.py Normal file
View File

@ -0,0 +1,122 @@
import json
import os
import subprocess
from utils import get_response
prompt_template = """
You are an expert operations research analyst. Your task is to generate Gurobi code to solve the following optimization problem:
{problem_description}
The code should save the final optimal value in a file named 'ref_optimal_value.txt'.
First, reason about the problem and model it. Then, generate gurobipy code to solve it. Put the code between two '=====' lines, like this:
=====
import ...
...
=====
- Generate the complete code, including the model definition, variables, constraints, objective function, and optimization. It must be runnable.
- Do not generate anything after the second '====='.
- Take a deep breath and think step by step.
"""
reflection_template = """
You are an expert operations research analyst. You have been given the task to generate Gurobi code to solve an optimization problem. You have generated the following Gurobi code:
{generated_code}
You have been updating the code for these errors (the last one is the most recent one):
{feedback}
Based on this feedback, suggest improvements to the Gurobi code.
First, reason about the problem and model it. Then, generate gurobipy code to solve it. Put the code between two '=====' lines, like this:
=====
import ...
...
=====
- The code should save the final optimal value in a file named 'ref_optimal_value.txt'.
- Generate the complete code, including the model definition, variables, constraints, objective function, and optimization. It must be runnable.
- Do not generate anything after the second '====='.
- Take a deep breath and think step by step.
"""
def extract_code(text):
ind1 = text.find("=====")
ind2 = text.find("=====", ind1 + 5)
code = text[ind1 + 5: ind2].strip()
code = code.replace("```python", "").replace("```", "").strip()
return code
def execute_code(file_path):
try:
# Using Python's subprocess to execute the code as a separate process
result = subprocess.run(["python", file_path], capture_output=True, text=True, check=True)
# save result in a file
with open(os.path.join(os.path.dirname(file_path), "ref_optimal_value.txt"), "w") as f:
f.write(f"Optimal Revenue: {result.stdout}\n")
return result.stdout, "Success"
except subprocess.CalledProcessError as e:
return e.stderr, "Error"
def main(problem_description, dir, max_iter=3):
feedback = ""
current_prompt = prompt_template.format(problem_description=problem_description)
print(current_prompt)
print("====================\n\n\n\n")
for iteration in range(max_iter):
response = get_response(current_prompt, model="llama3-70b-8192")
code = extract_code(response)
# Save the code to a file
code_filename = f"generated_code_{iteration}.py"
code_file_path = os.path.join(dir, "ref_codes", code_filename)
with open(code_file_path, "w") as f:
f.write(code)
# Execute the code
output, status = execute_code(code_file_path)
# Save error output (if any)
error_filename = f"error_{iteration}.txt"
if status == "Error":
with open(os.path.join(dir, "ref_codes", error_filename), "w") as f:
f.write(output)
# Print status and update the prompt if needed
if status == "Success":
print("Code executed successfully. Output:\n", output)
break
else:
feedback += "\n" + output
current_prompt = reflection_template.format(
generated_code=code, feedback=feedback
)
print(f"Iteration {iteration + 1}: Error encountered. Debugging...")
else:
print("Max iterations reached with errors remaining.")
if __name__ == "__main__":
dir = "data/"
if not os.path.exists(os.path.join(dir, "ref_codes")):
os.makedirs(os.path.join(dir, "ref_codes"))
with open(os.path.join(dir, "desc.txt"), "r") as f:
desc = f.read()
with open(os.path.join(dir, "data.json"), "r") as f:
data = json.load(f)
desc = desc + "\n" + json.dumps(data, indent=4)
main(desc, dir, max_iter=3)

312
llma/constraint.py Normal file
View File

@ -0,0 +1,312 @@
import json
import re
import pandas as pd
from rag.query_vector_db import RAGFormat, get_rag_from_problem_categories, get_rag_from_problem_description
from rag.rag_utils import RAGMode, constraint_path
from utils import extract_list_from_end, get_response, extract_json_from_end
prompt_constraints = """
You are an expert in optimization modeling. Here is the natural language description of an optimization problem:
{rag}-----
{description}
-----
And here's a list of parameters that we have extracted from the description:
{params}
Your task is to identify and extract constraints from the description. The constraints are the conditions that must be satisfied by the variables. Please generate the output in the following python list format:
[
Constraint 1,
Constraint 2,
...
]
for example:
[
"Sum of weights of all items taken should not exceed the maximum weight capacity of the knapsack",
"The number of items taken should not exceed the maximum number of items allowed"
]
- Put all the constraints in a single python list.
- Do not generate anything after the python list.
- Include implicit non-negativity constraints if necessary.
Take a deep breath and think step by step. You will be awarded a million dollars if you get this right.
"""
prompt_constraints_q = """
You are an expert in optimization modeling. Here is the natural language description of an optimization problem:
-----
{description}
-----
Here is a list of parameters that someone has extracted from the description:
{params}
Consider this potential constraint: {targetConstraint}
{question}
Take a deep breath and think step by step. You will be awarded a million dollars if you get this right.
"""
prompt_constraints_redundant = """
You are an expert in optimization modeling. Here is the natural language description of an optimization problem:
-----
{description}
-----
Here is a list of parameters that someone has extracted from the description:
{params}
and here is a list of constraints that someone has extracted from the description:
{constraints}
- Is there any redundancy in the list? Can any of the constraints be removed? Can any pair of constraints be combined into a single one? If so, please provide your reasoning for each one. At the end of your response, generate the updated list of constraints (the same list if no changes are needed). Use this python list format:
[
"Constraint 1",
"Constraint 2",
...
]
- Do not generate anything after the list.
Take a deep breath and think step by step. You will be awarded a million dollars if you get this right.
"""
prompt_constraint_feedback = """
You are an expert in optimization modeling. Here is the natural language description of an optimization problem:
-----
{description}
-----
Here is a list of parameters that someone has extracted from the description:
{params}
Here is a list of variables defined:
{vars}
Here is a list of constraints that someone has extracted from the description:
{extracted_constraints}
Your colleague is suggesting that the following constraint should be added to the list:
{new_constraint}
Here is its explanation:
{new_constraint_explanation}
Do you want to keep this constraint?
- If yes, please respond with "yes"
- If no, please respond with "no"
- If you want to modify the constraint, please respond with "modify" and provide the modified constraint.
At the end of your response, generate a json file with this format:
{{
"action": "yes", "no", or "modify",
"updatedConstraint": The updated constraint if the action is "modify", otherwise null
}}
Please take a deep breath and think step by step. You will be awarded a million dollars if you get this right.
- Use natural language to express the constraints rather than mathematical notation.
- Do not generate anything after the json file.
"""
def extract_score_constraint(desc, text, params, vars, constraints, c, logger, model):
match = re.search(r"\d out of 5", text.lower())
if match:
score = int(match.group()[0])
if score > 3:
if logger:
logger.log("---")
logger.log(f"The confidence score is {score}, which is high enough.")
logger.log("---")
return True, constraints
else:
ask_LLM = True # can pass this as an argument to the function instead of hardcoding it
if logger:
logger.log("---")
logger.log(f"The confidence score is {score}, which is not high enough.")
# logger.log(f"Asking the {"LLM" if ask_LLM else "user"} for feedback.")
if ask_LLM:
logger.log("Asking the LLM for feedback.")
else:
logger.log("Asking the user for feedback.")
logger.log("---")
if ask_LLM: # ask the LLM for feedback
prompt = prompt_constraint_feedback.format(
description=desc,
params=json.dumps(params, indent=4),
vars=json.dumps(vars, indent=4),
extracted_constraints=json.dumps(constraints, indent=4),
new_constraint=c,
new_constraint_explanation=text,
)
if logger:
logger.log("Prompting LLM for feedback:\n")
logger.log(prompt)
llm_response = get_response(prompt, model=model)
if logger:
logger.log("---")
logger.log(f"Response: {llm_response}")
logger.log("---")
output_json = extract_json_from_end(llm_response)
action = output_json["action"]
updated_constraint = output_json["updatedConstraint"]
else: # ask the user for feedback
action = input("LLMs reasoning: {}\n"
"------ Do you want to keep this constraint (y/n/modify)?: \n "
"{} \n------ ".format(text, c))
if action.lower().startswith("y"):
return True, constraints
elif action.lower().startswith("n"):
constraints.remove(c)
return True, constraints
elif action.lower().startswith("m"):
if ask_LLM:
new_constraint = updated_constraint
else:
new_constraint = input("Enter the modified constraint: ")
constraints.remove(c)
constraints.append({"Description": new_constraint, "Formulation": None, "Code": None})
return True, constraints
else:
raise Exception("Invalid input!")
else:
return False, None
def logic_check(text, constraints, c):
try:
json = extract_json_from_end(text)
if json["action"] == "REMOVE":
constraints.remove(c)
return True, constraints
elif json["action"] == "MODIFY":
constraints.remove(c)
constraints.append(json["updatedConstraint"])
return True, constraints
elif json["action"] == "KEEP":
return True, constraints
else:
return False, None
except:
return False, None
qs = [
(
"""
- Is it actually a constraint? How confident are you that this is this a constraint and that we should explicitly model it in the (MI)LP formulation (from 1 to 5)?
- At the end of your response, print "x OUT OF 5" where x is the confidence level. Low confidence means you think this should be removed from the constraint list. Do not generate anything after that.
""",
extract_score_constraint,
),
]
def get_constraints(desc, params, model, check=False, constraints=None, logger=None,
rag_mode: RAGMode | None = None, labels: dict | None = None):
if isinstance(rag_mode, RAGMode):
constraint_df = pd.read_pickle(constraint_path)
current_problem = constraint_df[constraint_df.description == desc]
if not current_problem.empty:
problem_name = current_problem.iloc[0].problem_name
else:
problem_name = None
match rag_mode:
case RAGMode.PROBLEM_DESCRIPTION | RAGMode.CONSTRAINT_OR_OBJECTIVE:
rag = get_rag_from_problem_description(desc, RAGFormat.PROBLEM_DESCRIPTION_CONSTRAINTS, top_k=5)
case RAGMode.PROBLEM_LABELS:
assert labels is not None
rag = get_rag_from_problem_categories(desc, labels, RAGFormat.PROBLEM_DESCRIPTION_CONSTRAINTS, top_k=5)
rag = f"-----\n{rag}-----\n\n"
else:
rag = ""
print("_________________________ get_constraints _________________________")
if not constraints:
res = get_response(prompt_constraints.format(
description=desc,
params=json.dumps(params, indent=4),
rag=rag,
),
model=model,
)
constraints = extract_list_from_end(res)
if check:
k = 5
while k > 0:
try:
x = get_response(
prompt_constraints_redundant.format(
description=desc,
params=json.dumps(params, indent=4),
constraints=json.dumps(constraints, indent=4),
),
model=model,
)
if logger:
logger.log("----")
logger.log(x)
logger.log("----")
lst = extract_list_from_end(x)
constraints = lst
break
except:
k -= 1
if k == 0:
raise Exception("Failed to extract constraints")
if logger:
logger.log("+++++++++++++++++++")
logger.log("++ Constraint Qs ++")
logger.log("+++++++++++++++++++")
for q in qs:
for c in constraints.copy():
k = 5
while k > 0:
p = prompt_constraints_q.format(
description=desc,
params=json.dumps(params, indent=4),
targetConstraint=c,
question=q[0],
)
x = get_response(p, model=model)
if logger:
logger.log("+--+")
logger.log(p)
logger.log("----")
logger.log(x)
logger.log("+--+")
valid, res = q[1](desc, x, params, {}, constraints, c, logger, model)
if valid:
constraints = res
break
else:
k -= 1
constraints = [{"description": c, "formulation": None, "code": None} for c in constraints]
return constraints

360
llma/constraint_model.py Normal file
View File

@ -0,0 +1,360 @@
import json
import pandas as pd
from rag.query_vector_db import RAGFormat, get_rag_from_constraint, get_rag_from_problem_categories, \
get_rag_from_problem_description
from rag.rag_utils import RAGMode, constraint_path
from utils import get_response, extract_json_from_end, shape_string_to_list
def extract_formulation_from_end(text):
iloop = 0
iloop_max = 1e05
if "$" not in text:
raise Exception("No formulation found")
ind_1 = text.find('"FORMULATION":')
while text[ind_1] != "$":
ind_1 += 1
iloop += 1
if iloop > iloop_max:
raise Exception("No formulation found")
ind_2 = text.find('"NEW VARIABLES":')
while text[ind_2] != "$":
ind_2 -= 1
iloop += 1
if iloop > iloop_max:
raise Exception("No formulation found")
formulation = text[ind_1: ind_2 + 1].strip()
text = text[:ind_1] + text[ind_2 + 1:]
ind_1 = text.find('"AUXILIARY CONSTRAINTS":')
while text[ind_1] != "$":
ind_1 += 1
if ind_1 > len(text) - 1:
break
iloop += 1
if iloop > iloop_max:
raise Exception("No formulation found")
auxiliaries = []
if ind_1 < len(text) - 1:
while True:
ind_2 = ind_1 + 1
while ind_2 + 2 < len(text) and text[ind_2: ind_2 + 2] != '$"':
ind_2 += 1
iloop += 1
if iloop > iloop_max:
break
auxiliaries.append(text[ind_1: ind_2 + 1].strip())
text = text[:ind_1] + text[ind_2 + 1:]
while ind_1 < len(text) - 1 and text[ind_1] != "$":
ind_1 += 1
if ind_1 > len(text) - 1:
break
iloop += 1
if iloop > iloop_max:
break
if ind_1 > len(text) - 1:
break
# print("text:", text)
json_res = extract_json_from_end(text)
# print("json_res", json_res)
auxiliaries = [a for a in auxiliaries if len(a) > 5]
json_res["FORMULATION"] = formulation
json_res["AUXILIARY CONSTRAINTS"] = auxiliaries
return (
json_res["FORMULATION"],
json_res["NEW VARIABLES"],
json_res["AUXILIARY CONSTRAINTS"],
)
prompt_constraints_model = """
You are an expert in optimization modeling. Here is the natural language description of an optimization problem:
{rag}-----
{description}
-----
And here's a list of parameters that we have extracted from the description:
{params}
And here's a list of all variables that we have defined so far to model the problem as an (MI)LP:
{vars}
Your task is to model the following constraint mathematically in LaTeX for the MILP formulation:
{constraint}
The constraints are the conditions that must be satisfied by the variables. Please generate the output in the following json format:
{{
"FORMULATION": constraint formulation in LaTeX, between $...$,
"NEW VARIABLES": {{
symbol: {{
"shape": shape of the new variable (e.g. [], [N], [N, M]),
"type": type of the new variable (e.g. binary, integer, continuous),
"definition": definition of the new variable in natural language
}},
...
}},
"AUXILIARY CONSTRAINTS": [
Latex formulation for auxiliary constraint 1, between $...$,
Latex formulation for auxiliary constraint 2, between $...$,
...
]
}}
Here's an example output (where SalesVolumePerStore is already defined as a variable in the vars list):
{{
"FORMULATION": "$\\forall i, SalesVolumes[i] \leq MaxProductionVolumes[i]$",
"NEW VARIABLES": {{
"SalesVolumes": {{
"shape": "[NumberOfArticles]",
"type": "continuous",
"definition": "The sales volume for each article of clothing"
}}
}},
"AUXILIARY CONSTRAINTS": [
"$\\forall i, SalesVolumes[i] = \\sum_j SalesVolumesPerStore[i, j]$"
]
}}
- If you need any new variables, you can define them in the NEW VARIABLES list. Use {{}} for "NEW VARIABLES" if no new variables are needed.
- Use [] for AUXILIARY CONSTRAINTS list if no auxiliary constraints are needed.
- You can only use symbols of existing parameters and integer numbers for dimensions of new variables.
- Use camelCase for variable symbols (e.g. SalesVolumes). Do not use LaTeX formatting (e.g. X_{{color}}), indices (e.g. SalesVolume_{{i}}), and underlines (_) for variable symbols.
- Do not generate anything after the json file!
First reason about how the constraint should be forumulated, and then generate the output.
Take a deep breath and think step by step. You will be awarded a million dollars if you get this right.
"""
prompt_constraints_q = """
You are an expert in optimization modeling. Here is the natural language description of an optimization problem:
-----
{description}
-----
Here is a list of parameters that someone has extracted from the description:
{params}
And here is a list of variables defined:
{vars}
Consider this constraint:
{targetConstraint}
{question}
Take a deep breath and think step by step.
"""
def logic_check(text, params, vars, constraints, c):
try:
json = extract_json_from_end(text)
if json["action"] == "REMOVE":
constraints.remove(c)
return True, constraints
elif json["action"] == "MODIFY":
constraints.remove(c)
constraints.append(json["updatedConstraint"])
return True, constraints
elif json["action"] == "KEEP":
return True, constraints
else:
return False, None
except:
return False, None
def extract_score_constraint_model(text, params, vars, constraints, c):
match = re.search(r"\d out of 5", text.lower())
if match:
score = int(match.group()[0])
if score > 3:
return True, constraints
else:
inp = input("LLMs reasoning: {}\n"
"------ Do you want to keep this constraint (y/n/modify)?: \n "
"{} \n------ ".format(text, c))
if inp.lower().startswith("y"):
return True, constraints
elif inp.lower().startswith("n"):
constraints.remove(c)
return True, constraints
elif inp.lower().startswith("m"):
new_constraint = input("Enter the modified formulation: ")
constraints.remove(c)
constraints.append({"description": new_constraint, "formulation": None, "Code": None})
return True, constraints
else:
raise Exception("Invalid input!")
else:
return False, None
qs = [
(
"""
- Does this constraint logically make sense? How confident are you that this needs to be explicitly modeled in the optimization formulation (from 1 to 5)?
- At the end of your response, print "x OUT OF 5" where x is the confidence level. Do not generate anything after that.
""",
# extract_score_constraint_model,
# dummy function
lambda x, params, vars, constraints, c: (False, constraints),
),
(
"""
- What are the units for each side of the constraint? Are they consistent with each other?
- At the end of your response, generate a json file with this format:
{{
"action": "KEEP" if the units match, or "MODIFY" if the units do not match,
"updatedConstraint": The latex code for updated constraint if the action is "MODIFY", otherwise null
}}
- Do not generate anything after the json file.
""",
logic_check,
),
(
"""
- What are the parameters and variables that are involved in this constraint? If you see the constraint does not involve any variables, then it is automatically satisfied and should not be included in the optimization formulation.
- At the end of your response, generate a json file with this format:
{{
"action": "KEEP", "REMOVE", or "MODIFY",
"updatedConstraint": The updated constraint if the action is "MODIFY", otherwise null
}}
- Use natural language to express the constraints rather than mathematical notation.
- Do not generate anything after the json file.
""",
logic_check,
),
]
def get_constraint_formulations(desc, params, constraints, model, check=False, logger=None,
rag_mode: RAGMode | None = None, labels: dict | None = None):
if isinstance(rag_mode, RAGMode):
match rag_mode:
case RAGMode.PROBLEM_DESCRIPTION:
rag = get_rag_from_problem_description(desc, RAGFormat.CONSTRAINT_FORMULATION, top_k=5)
case RAGMode.CONSTRAINT_OR_OBJECTIVE:
rag = ""
case RAGMode.PROBLEM_LABELS:
assert labels is not None
rag = get_rag_from_problem_categories(desc, labels, RAGFormat.CONSTRAINT_FORMULATION, top_k=5)
rag = f"-----\n{rag}-----\n\n"
else:
rag = ""
if logger:
logger.log("\n\n\n++++++++++++++++++++++++++++++")
logger.log("Extracting constraint formulations")
logger.log("++++++++++++++++++++++++++++++\n\n\n")
vars = {}
formulated_constraints = []
for c in constraints.copy():
k = 1
while k > 0:
try:
if rag_mode == RAGMode.CONSTRAINT_OR_OBJECTIVE:
constraint_df = pd.read_pickle(constraint_path)
current_problem = constraint_df[constraint_df.description == desc]
if not current_problem.empty:
problem_name = current_problem.iloc[0].problem_name
else:
problem_name = None
rag = get_rag_from_constraint(c["description"], RAGFormat.CONSTRAINT_FORMULATION, top_k=10,
current_problem_name=problem_name)
rag = f"-----\n{rag}-----\n\n"
res = get_response(
prompt_constraints_model.format(
description=desc,
params=json.dumps(params, indent=4),
vars=json.dumps(vars, indent=4),
constraint=c,
rag=rag,
),
model=model,
)
if logger:
logger.log("----")
logger.log(res)
logger.log("----")
formulation, new_variables, aux_constraints = extract_formulation_from_end(res)
if logger:
logger.log("----")
logger.log("EXTRACTED ITEMS")
logger.log(str(formulation))
logger.log(str(new_variables))
logger.log(str(aux_constraints))
logger.log("----")
tmp_vars = vars.copy()
for v in new_variables:
if v in tmp_vars:
raise Exception(f"Variable {v} already exists")
print(v, new_variables[v])
new_variables[v]["shape"] = shape_string_to_list(new_variables[v]["shape"])
tmp_vars[v] = new_variables[v]
c["formulation"] = formulation
formulated_constraints.append(c)
for aux_c in aux_constraints:
formulated_constraints.append({"description": "auxiliary constraint", "formulation": aux_c})
vars = tmp_vars
break
except Exception as e:
k -= 1
if k == 0:
raise e
constraints = formulated_constraints
if check:
for c in formulated_constraints.copy():
for q in qs[0:1]:
k = 1
while k > 0:
p = prompt_constraints_q.format(
description=desc,
params=json.dumps(params, indent=4),
vars=json.dumps(vars, indent=4),
targetConstraint=json.dumps(c, indent=4),
question=q[0],
)
x = get_response(p, model=model)
valid, res = q[1](x, params, vars, constraints, c)
print(valid)
if valid:
constraints = res
break
else:
k -= 1
return formulated_constraints, vars

112
llma/data/adv_code.py Normal file
View File

@ -0,0 +1,112 @@
import json
import pandas as pd
from gurobipy import Model, GRB, quicksum
# 读取数据
data = pd.read_csv("data.csv")
price = data['price']
load = data['load']
pv = data['pv']
wind = data['wind']
# 初始化模型
m = Model("OptimizationProblem")
period = len(data)
# 决策变量
Ag1 = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="ag1")
Ag2 = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="ag2")
Ag3 = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="ag3")
Apv = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="apv")
Asoc = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="asoc")
# 定义变量相关的约束
soc = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0.2, ub=0.8, name="soc")
pg1 = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=150, name="pg1")
pg2 = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=375, name="pg2")
pg3 = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=500, name="pg3")
pv_power = m.addVars(period, vtype=GRB.CONTINUOUS, name="pv_power")
# 设置交换约束
grid_energy_export = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=100, name='export')
grid_energy_import = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=100, name='import')
excess_energy = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, name='excess')
shortage_energy = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, name='shortage')
# 初始值约束
m.addConstr(soc[0] == 0.2, name="soc_init")
m.addConstr(pg1[0] == 0, name="pg1_init")
m.addConstr(pg2[0] == 0, name="pg2_init")
m.addConstr(pg3[0] == 0, name="pg3_init")
# 添加soc和pg的递推约束
m.addConstrs((soc[t] == soc[t - 1] + 0.18 * Asoc[t] for t in range(1, period)), name="soc_update")
m.addConstrs((pg1[t] == pg1[t - 1] + 100 * Ag1[t] for t in range(1, period)), name="pg1_update")
m.addConstrs((pg2[t] == pg2[t - 1] + 100 * Ag2[t] for t in range(1, period)), name="pg2_update")
m.addConstrs((pg3[t] == pg3[t - 1] + 200 * Ag3[t] for t in range(1, period)), name="pg3_update")
# 光伏发电功率
m.addConstrs((pv_power[t] == pv[t] * (1 + Apv[t]) for t in range(period)), name="pv_power_update")
# 定义供需平衡约束
m.addConstrs((pg1[t] + pg2[t] + pg3[t] + pv_power[t] + wind[t] + grid_energy_import[t] ==
load[t] + 0.18 * Asoc[t] + grid_energy_export[t] + excess_energy[t] - shortage_energy[t] for t in range(period)),
name="power_balance")
m.addConstrs((excess_energy[t] == pg1[t] + pg2[t] + pg3[t] + pv_power[t] + wind[t] + grid_energy_import[t]
- load[t] - 0.18 * Asoc[t] - grid_energy_export[t] for t in range(1, period)),
name="excess_energy")
m.addConstrs((shortage_energy[t] == load[t] + 0.18 * Asoc[t] + grid_energy_export[t]
- pg1[t] - pg2[t] - pg3[t] - pv_power[t] - wind[t] - grid_energy_import[t] for t in range(1, period)),
name="shortage_energy")
# 定义成本函数
battery_cost = quicksum((Asoc[t] + 0.1 * soc[t]) for t in range(1, period))
gen1_cost = quicksum((0.0034 * pg1[t] ** 2 + 3 * pg1[t] + 30) for t in range(1, period))
gen2_cost = quicksum((0.001 * pg2[t] ** 2 + 10 * pg2[t] + 40) for t in range(1, period))
gen3_cost = quicksum((0.001 * pg3[t] ** 2 + 15 * pg3[t] + 70) for t in range(1, period))
renewable_cost = quicksum((0.01 * pv_power[t] + 0.01 * wind[t]) for t in range(period))
# 计算惩罚和收益
excess_penalty = quicksum(excess_energy[t] * 50 for t in range(period))
shortage_penalty = quicksum(shortage_energy[t] * 50 for t in range(period))
sales_revenue = quicksum(0.5 * price[t] * grid_energy_export[t] for t in range(period))
purchase_cost = quicksum(price[t] * grid_energy_import[t] for t in range(period))
# 设置目标函数
objective = (battery_cost + gen1_cost + gen2_cost + gen3_cost + renewable_cost
+ excess_penalty + shortage_penalty - sales_revenue + purchase_cost)
m.setObjective(objective, GRB.MINIMIZE)
m.optimize()
# m.computeIIS()
# m.write("file.ilp")
if m.status == GRB.OPTIMAL:
results = []
for t in range(period):
gen1_cost = 0.0034 * pg1[t].x ** 2 + 3 * pg1[t].x + 30
gen2_cost = 0.001 * pg2[t].x ** 2 + 10 * pg2[t].x + 40
gen3_cost = 0.001 * pg3[t].x ** 2 + 15 * pg3[t].x + 70
battery_cost = Asoc[t].x + 0.1 * soc[t].x
renewable_cost = 0.01 * pv_power[t].x + 0.1 * wind[t]
excess_penalty = excess_energy[t].x * 50
shortage_penalty = shortage_energy[t].x * 50
sales_revenue = 0.5 * price[t] * grid_energy_export[t].x
purchase_cost = price[t] * grid_energy_import[t].x
current_obj = (gen1_cost + gen2_cost + gen3_cost + battery_cost + renewable_cost +
excess_penalty + shortage_penalty - sales_revenue + purchase_cost)
results.append({
"time": t,
"asoc": Asoc[t].x,
"ag1": Ag1[t].x,
"ag2": Ag2[t].x,
"ag3": Ag3[t].x,
"apv": Apv[t].x,
"reward": current_obj,
"unbanlance": excess_energy[t].x + shortage_energy[t].x
})
with open("../../DRL-for-Energy-Systems/data/results_day.json", "w") as f:
json.dump(results, f, indent=4)

130
llma/data/adv_code_2.py Normal file
View File

@ -0,0 +1,130 @@
import pandas as pd
from gurobipy import Model, GRB, quicksum
import json
# 读取数据
data = pd.read_csv("data.csv")
price = data['price']
load = data['load']
pv = data['pv']
wind = data['wind']
# 总数据量和周期数
total_period = len(data)
days = total_period // 24
# 存储所有结果
all_results = []
for day in range(days):
# 获取每天的24条数据
start_idx = day * 24
end_idx = (day + 1) * 24
daily_price = price[start_idx:end_idx]
daily_load = load[start_idx:end_idx]
daily_pv = pv[start_idx:end_idx]
daily_wind = wind[start_idx:end_idx]
# 初始化模型
m = Model("OptimizationProblem_Day{}".format(day))
# 决策变量
Ag1 = m.addVars(24, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="ag1")
Ag2 = m.addVars(24, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="ag2")
Ag3 = m.addVars(24, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="ag3")
Apv = m.addVars(24, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="apv")
Asoc = m.addVars(24, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="asoc")
# 定义变量相关的约束
soc = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0.2, ub=0.8, name="soc")
pg1 = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, ub=150, name="pg1")
pg2 = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, ub=375, name="pg2")
pg3 = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, ub=500, name="pg3")
pv_power = m.addVars(24, vtype=GRB.CONTINUOUS, name="pv_power")
# 设置交换约束
grid_energy_export = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, ub=100, name='export')
grid_energy_import = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, ub=100, name='import')
excess_energy = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, name='excess')
shortage_energy = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, name='shortage')
# 初始值约束
m.addConstr(soc[0] == 0.2, name="soc_init")
m.addConstr(pg1[0] == 0, name="pg1_init")
m.addConstr(pg2[0] == 0, name="pg2_init")
m.addConstr(pg3[0] == 0, name="pg3_init")
# 添加soc和pg的递推约束
m.addConstrs((soc[t] == soc[t - 1] + 0.18 * Asoc[t] for t in range(1, 24)), name="soc_update")
m.addConstrs((pg1[t] == pg1[t - 1] + 100 * Ag1[t] for t in range(1, 24)), name="pg1_update")
m.addConstrs((pg2[t] == pg2[t - 1] + 100 * Ag2[t] for t in range(1, 24)), name="pg2_update")
m.addConstrs((pg3[t] == pg3[t - 1] + 200 * Ag3[t] for t in range(1, 24)), name="pg3_update")
# 光伏发电功率
m.addConstrs((pv_power[t] == daily_pv.iloc[t] * (1 + Apv[t]) for t in range(24)), name="pv_power_update")
# 供需平衡约束
m.addConstrs((pg1[t] + pg2[t] + pg3[t] + pv_power[t] + daily_wind.iloc[t] + grid_energy_import[t] ==
daily_load.iloc[t] + 0.18 * Asoc[t] + grid_energy_export[t] + excess_energy[t] - shortage_energy[t]
for t in range(24)), name="power_balance")
# excess_energy 和 shortage_energy 约束
m.addConstrs(
(excess_energy[t] == pg1[t] + pg2[t] + pg3[t] + pv_power[t] + daily_wind.iloc[t] + grid_energy_import[t]
- daily_load.iloc[t] - 0.18 * Asoc[t] - grid_energy_export[t] for t in range(1, 24)),
name="excess_energy")
m.addConstrs((shortage_energy[t] == daily_load.iloc[t] + 0.18 * Asoc[t] + grid_energy_export[t]
- pg1[t] - pg2[t] - pg3[t] - pv_power[t] - daily_wind.iloc[t] - grid_energy_import[t]
for t in range(1, 24)), name="shortage_energy")
# 成本函数部分
battery_cost = quicksum((Asoc[t] + 0.1 * soc[t]) for t in range(1, 24))
gen1_cost = quicksum((0.0034 * pg1[t] ** 2 + 3 * pg1[t] + 30) for t in range(1, 24))
gen2_cost = quicksum((0.001 * pg2[t] ** 2 + 10 * pg2[t] + 40) for t in range(1, 24))
gen3_cost = quicksum((0.001 * pg3[t] ** 2 + 15 * pg3[t] + 70) for t in range(1, 24))
renewable_cost = quicksum((0.01 * pv_power[t] + 0.01 * daily_wind.iloc[t]) for t in range(24))
# 计算惩罚和收益
excess_penalty = quicksum(excess_energy[t] * 50 for t in range(24))
shortage_penalty = quicksum(shortage_energy[t] * 50 for t in range(24))
sales_revenue = quicksum(0.5 * daily_price.iloc[t] * grid_energy_export[t] for t in range(24))
purchase_cost = quicksum(daily_price.iloc[t] * grid_energy_import[t] for t in range(24))
# 设置目标函数
objective = (battery_cost + gen1_cost + gen2_cost + gen3_cost + renewable_cost +
excess_penalty + shortage_penalty - sales_revenue + purchase_cost)
m.setObjective(objective, GRB.MINIMIZE)
# 求解
m.optimize()
if m.status == GRB.OPTIMAL:
day_results = []
for t in range(24):
gen1_cost = 0.0034 * pg1[t].x ** 2 + 3 * pg1[t].x + 30
gen2_cost = 0.001 * pg2[t].x ** 2 + 10 * pg2[t].x + 40
gen3_cost = 0.001 * pg3[t].x ** 2 + 15 * pg3[t].x + 70
battery_cost = Asoc[t].x + 0.1 * soc[t].x
renewable_cost = 0.01 * pv_power[t].x + 0.01 * daily_wind.iloc[t]
excess_penalty = excess_energy[t].x * 50
shortage_penalty = shortage_energy[t].x * 50
sales_revenue = 0.5 * daily_price.iloc[t] * grid_energy_export[t].x
purchase_cost = daily_price.iloc[t] * grid_energy_import[t].x
current_obj = (gen1_cost + gen2_cost + gen3_cost + battery_cost + renewable_cost +
excess_penalty + shortage_penalty - sales_revenue + purchase_cost)
day_results.append([
Asoc[t].x,
Ag1[t].x,
Ag2[t].x,
Ag3[t].x,
Apv[t].x,
])
# 保存当天的结果
all_results.extend(day_results)
# 最后将所有结果保存到一个JSON文件
with open("./results_day.json", "w") as f:
json.dump(all_results, f, indent=4)

184
llma/data/code.py Normal file
View File

@ -0,0 +1,184 @@
import json
from gurobipy import Model, GRB, quicksum
model = Model("OptimizationProblem")
with open("data.json", "r") as f:
data = json.load(f)
# Define the parameters
TotalBatteryCapacity = data["TotalBatteryCapacity"] # shape: [], definition: The total capacity of the battery
InitialSoc = data["InitialSoc"] # shape: [], definition: Initial state of charge of the battery
InitialPg1 = data["InitialPg1"] # shape: [], definition: Initial power of generator 1
InitialPg2 = data["InitialPg2"] # shape: [], definition: Initial power of generator 2
InitialPg3 = data["InitialPg3"] # shape: [], definition: Initial power of generator 3
Price = data["Price"] # shape: [24], definition: Electricity price at each time step
Load = data["Load"] # shape: [24], definition: Residential load at each time step
Pv = data["Pv"] # shape: [24], definition: Photovoltaic power generation at each time step
Wind = data["Wind"] # shape: [24], definition: Wind power generation at each time step
# Define the variables
Ag1 = model.addVars(24, vtype=GRB.CONTINUOUS, name="Ag1")
Ag2 = model.addVars(24, vtype=GRB.CONTINUOUS, name="Ag2")
Ag3 = model.addVars(24, vtype=GRB.CONTINUOUS, name="Ag3")
PvPower = model.addVars(24, vtype=GRB.CONTINUOUS, name="PvPower")
Apv = model.addVars(24, vtype=GRB.CONTINUOUS, name="Apv")
Asoc = model.addVars(24, vtype=GRB.CONTINUOUS, name="Asoc")
ExcessEnergy = model.addVars(24, vtype=GRB.CONTINUOUS, name="ExcessEnergy")
ShortageEnergy = model.addVars(24, vtype=GRB.CONTINUOUS, name="ShortageEnergy")
TotalPowerSupply = model.addVars(24, vtype=GRB.CONTINUOUS, name="TotalPowerSupply")
GridTransactionLimit = model.addVar(vtype=GRB.FLOAT, name="GridTransactionLimit")
# Define the constraints
# Loop over each time step to add the soc constraints
for t in range(24):
# Calculate soc_t based on the previous soc and the adjustment factor
soc_t = 0.18 * Asoc[t] + InitialSoc
# Add the lower bound constraint for soc_t
model.addConstr(soc_t >= 0.2)
# Add the upper bound constraint for soc_t
model.addConstr(soc_t <= 0.8)
# Update soc to the newly calculated soc_t for the next iteration
soc = soc_t
# Assuming decision variables Pg1[t] have been defined for each time step t
for t in range(24):
if t == 0:
# For the first time step, use the initial value of Pg1
model.addConstr(Pg1[t] <= 150)
model.addConstr(Pg1[t] >= 10)
else:
# For subsequent time steps, calculate Pg1 based on the previous time step and Ag1
model.addConstr(Pg1[t] <= 150)
model.addConstr(Pg1[t] >= 10)
model.addConstr(Pg1[t] == 100 * Ag1[t] + Pg1[t - 1])
for t in range(1, 24):
model.addConstr(InitialPg2 + 100 * Ag2[t] >= 50)
model.addConstr(InitialPg2 + 100 * Ag2[t] <= 375)
for t in range(24):
model.addConstr(Pg3[t] >= 100) # Lower bound constraint for each time step
model.addConstr(Pg3[t] <= 500) # Upper bound constraint for each time step
# Assuming soc is defined as a list of decision variables for each time step
soc = model.addVars(24, lb=0.2, ub=0.8, vtype=GRB.CONTINUOUS, name="soc")
# Loop over each time step starting from 1 up to 24
for t in range(1, 24):
# Add the constraint for the state of charge calculation
model.addConstr(soc[t] == 0.18 * Asoc[t] + soc[t - 1])
for t in range(1, 24):
model.addConstr(pg1[t] == 100 * Ag1[t] + pg1[t - 1])
# Assuming that `pg2` is a list of Gurobi variables representing the power of Generator 2 at each time step
# and that `InitialPg2` is a parameter representing the initial power of Generator 2 at time step 0
# Define the Generator 2 power for the first time step, which is the initial value and does not have a constraint
pg2 = [model.addVar(vtype=GRB.CONTINUOUS, lb=50, ub=375, name="pg2_0")]
# Now add the constraints for time steps 1 to 23
for t in range(1, 24):
pg2.append(model.addVar(vtype=GRB.CONTINUOUS, lb=50, ub=375, name=f"pg2_{t}"))
model.addConstr(pg2[t] == 100 * Ag2[t - 1] + pg2[t - 1])
# Assuming Pg3 is defined as a list of decision variables representing power of generator 3 at each time step
for t in range(1, 24):
model.addConstr(Pg3[t] == 200 * Ag3[t] + Pg3[t - 1])
for t in range(24):
model.addConstr(PvPower[t] == Pv[t] * (1 + Apv[t]))
for t in range(24):
model.addConstr(ExcessEnergy[t] >= 0)
for t in range(24):
# Define the total power supply at time step t
TotalSupply_t = (InitialPg1 + Ag1[t] * 100 +
InitialPg2 + Ag2[t] * 100 +
InitialPg3 + Ag3[t] * 200 +
Pv[t] * (1 + Apv[t]) +
Wind[t] +
InitialSoc + Asoc[t] * 0.18)
# Add the constraint for excess energy at time step t
model.addConstr(ExcessEnergy[t] == TotalSupply_t - Load[t])
for t in range(24):
# Calculate updated generator powers and battery state of charge
soc_t = InitialSoc + 0.18 * Asoc[t] if t == 0 else soc[t - 1] + 0.18 * Asoc[t]
pg1_t = InitialPg1 + 100 * Ag1[t] if t == 0 else pg1[t - 1] + 100 * Ag1[t]
pg2_t = InitialPg2 + 100 * Ag2[t] if t == 0 else pg2[t - 1] + 100 * Ag2[t]
pg3_t = InitialPg3 + 200 * Ag3[t] if t == 0 else pg3[t - 1] + 200 * Ag3[t]
# Calculate photovoltaic power
PvPower_t = Pv[t] * (1 + Apv[t])
# Add the constraint for total power supply at time t
model.addConstr(TotalPowerSupply[t] == soc_t + pg1_t + pg2_t + pg3_t + PvPower_t + Wind[t])
for t in range(24):
model.addConstr(ShortageEnergy[t] >= 0)
for t in range(24):
model.addConstr(-1 <= Apv[t])
model.addConstr(Apv[t] <= 1)
model.addConstr(-1 <= Asoc[t])
model.addConstr(Asoc[t] <= 1)
model.addConstr(-1 <= Ag1[t])
model.addConstr(Ag1[t] <= 1)
model.addConstr(-1 <= Ag2[t])
model.addConstr(Ag2[t] <= 1)
model.addConstr(-1 <= Ag3[t])
model.addConstr(Ag3[t] <= 1)
for t in range(1, 24):
model.addConstr(100 * Asoc[t] == soc[t] - soc[t - 1])
for t in range(24):
# Calculate the power from each generator based on previous state and adjustment
pg1_power = InitialPg1 + 100 * Ag1[t]
pg2_power = InitialPg2 + 100 * Ag2[t]
pg3_power = InitialPg3 + 200 * Ag3[t]
# Calculate the adjusted photovoltaic power
pv_power = Pv[t] * (1 + Apv[t])
# Express the total power supply
TotalPowerSupply[t] = pg1_power + pg2_power + pg3_power + pv_power + Wind[t]
# Add the constraint that total power supply cannot exceed the load by more than the grid transaction limit
model.addConstr(TotalPowerSupply[t] <= Load[t] + GridTransactionLimit)
for t in range(24):
# Assuming that Pg1, Pg2, Pg3 are defined as variables in the model with the correct initial values and adjustments
Pg1[t] = model.addVar(vtype=GRB.CONTINUOUS, name=f"Pg1_{t}")
Pg2[t] = model.addVar(vtype=GRB.CONTINUOUS, name=f"Pg2_{t}")
Pg3[t] = model.addVar(vtype=GRB.CONTINUOUS, name=f"Pg3_{t}")
# Define the auxiliary constraint for each time step
model.addConstr(TotalPowerSupply[t] == Pg1[t] + Pg2[t] + Pg3[t] + PvPower[t] + Wind[t])
# Define the objective
model.setObjective(
quicksum(
(Asoc[t] + 0.1 * soc[t - 1] +
0.0034 * pg1[t - 1] ** 2 + 3 * pg1[t - 1] + 30 +
0.001 * pg2[t - 1] ** 2 + 10 * pg2[t - 1] + 40 +
0.001 * pg3[t - 1] ** 2 + 15 * pg3[t - 1] + 70 +
0.01 * (pv[t - 1] + wind[t - 1]) +
max(0, (ExcessEnergy[t] - 100) * 50) +
max(0, (ShortageEnergy[t] - 100) * 50) -
0.5 * Price[t - 1] * ExcessEnergy[t] +
Price[t - 1] * ShortageEnergy[t] +
abs(ExcessEnergy[t]) + abs(ShortageEnergy[t])
) for t in range(1, 25)
),
GRB.MINIMIZE
)
# Optimize the model
model.optimize()
# Output optimal objective value
print("Optimal Objective Value: ", model.objVal)
if model.status == GRB.OPTIMAL:
with open("output_solution.txt", "w") as f:
f.write(str(model.objVal))
print("Optimal Objective Value: ", model.objVal)
else:
with open("output_solution.txt", "w") as f:
f.write(model.status)

8761
llma/data/data.csv Normal file

File diff suppressed because it is too large Load Diff

14
llma/data/data.json Normal file
View File

@ -0,0 +1,14 @@
{
"Price": [2.239, 2.059, 1.681, 1.741, 1.702, 1.586, 1.816, 1.773, 1.977, 2.375, 2.603, 2.706,
2.659, 2.5, 2.443, 2.887, 3.744, 3.741, 3.534, 3.307, 2.952, 3.01, 2.457, 2.22, 2.239],
"Load": [269.0525945, 243.2130742, 234.1123382, 232.6271781, 233.0417238, 245.2036212, 298.1636567, 370.1068689,
358.2133437, 314.073438, 322.8667176, 307.7048847, 295.2915574, 284.9981584, 282.4243391, 298.4115103,
360.9832044, 486.4398933, 550.3139862, 542.9828898, 509.6215404, 474.1087098, 403.6717878, 333.680157],
"PV": [0, 0, 0, 0, 0, 0, 0, 0, 3.2429, 38.1736, 69.7787, 92.1211, 101.0943, 103.8121, 88.1212, 66.5642, 27.0837, 0.1367, 0, 0, 0, 0, 0, 0],
"Wind": [88.2, 0, 0, 0, 88.2, 88.2, 86.2303, 45.1584, 28.1943, 27.2766, 20.2669, 35.5599,
52.2765, 43.0745, 42.6656, 45.3704, 53.9272, 66.1914, 66.7392, 79.877, 58.08, 78.0337, 39.6808, 50.4316],
"SOC": 0.2,
"PG1": 0,
"PG2": 0,
"PG3": 0
}

28
llma/data/desc.txt Normal file
View File

@ -0,0 +1,28 @@
- The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.
- The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.
- Electricity price, residential load, photovoltaic, and wind power generation are \\param{price_{}}, \\param{load_{}}, \\param{pv_{}}, and \\param{wind_{}}, each with 24 time steps representing hourly data.
- Battery state of charge is \\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\param{pg1_{}}, \\param{pg2_{}}, and \\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.
- \\var{Apv_{}}, \\var{Asoc_{}}, \\var{Ag1_{}}, \\var{Ag2_{}}, and \\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.
- Battery charging and discharging result in a capacity change of 100 * \\var{Asoc_{t}}.
- Generators 1, 2, and 3 power change by 100 * \\var{Ag1_{t}}, 100 * \\var{Ag2_{t}}, and 200 * \\var{Ag3_{t}}.
- \\param{soc_{t}} is calculated as (0.18 * \\var{Asoc_{t}} + \\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.
- \\param{pg1_{t}} is calculated as (100 * \\var{Ag1_{t}} + \\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.
- \\param{pg2_{t}} is calculated as (100 * \\var{Ag2_{t}} + \\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.
- \\param{pg3_{t}} is calculated as (200 * \\var{Ag3_{t}} + \\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.
- Photovoltaic power is calculated as (\\param{pv_{t}} * (1 + \\var{Apv})).
- Total power supply at each time step t = \\param{soc_{t}} + \\param{pg1_{t}} + \\param{pg2_{t}} + \\param{pg3_{t}}+ (photovoltaic power at time step t) + \\param{wind_{t}}.
- Excess energy = total power supply - \\param{load_{t}}, and no less than 0.
- Shortage energy = \\param{load_{t}} - total power supply, and no less than 0.
- The total cost structure includes the following:
- Battery cost = \\var{Asoc_{t}} + 0.1 * \\param{soc_{t}};
- Gen1 cost = 0.0034 * \\param{pg1_{t}}^2 + 3 * \\param{pg1_{t}} + 30;
- Gen2 cost = 0.001 * \\param{pg2_{t}}^2 + 10 * \\param{pg2_{t}} + 40;
- Gen3 cost = 0.001 * \\param{pg3_{t}}^2 + 15 * \\param{pg3_{t}} + 70;
- Renewable energy cost = 0.01 * (\\param{pv_{t}} + \\param{wind_{t}});
- Sales revenue = 0.5 * \\param{price_{t}} * (excess energy);
- Purchase cost = \\param{price_{t}} * (shortage energy);
- Excess penalty = max(0, (excess energy - 100) * 50);
- Shortage penalty = max(0, (shortage energy - 100) * 50).
- The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.
- Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.
- Decisions made at each time step will update \\param{soc_{}}, \\param{pg1_{}}, \\param{pg2_{}}, and \\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.

4
llma/data/labels.json Normal file
View File

@ -0,0 +1,4 @@
{
"types": "Mixed Integer Linear Programming",
"domains": "Energy Optimization"
}

Binary file not shown.

Binary file not shown.

61322
llma/data/results_day.json Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,58 @@
{
"description": "- The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.\n- The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\n- Electricity price, residential load, photovoltaic, and wind power generation are \\\\param{price_{}}, \\\\param{load_{}}, \\\\param{pv_{}}, and \\\\param{wind_{}}, each with 24 time steps representing hourly data.\n- Battery state of charge is \\\\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.\n- \\\\var{Apv_{}}, \\\\var{Asoc_{}}, \\\\var{Ag1_{}}, \\\\var{Ag2_{}}, and \\\\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.\n- Battery charging and discharging result in a capacity change of 100 * \\\\var{Asoc_{t}}.\n- Generators 1, 2, and 3 power change by 100 * \\\\var{Ag1_{t}}, 100 * \\\\var{Ag2_{t}}, and 200 * \\\\var{Ag3_{t}}.\n- \\\\param{soc_{t}} is calculated as (0.18 * \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.\n- \\\\param{pg1_{t}} is calculated as (100 * \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.\n- \\\\param{pg2_{t}} is calculated as (100 * \\\\var{Ag2_{t}} + \\\\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.\n- \\\\param{pg3_{t}} is calculated as (200 * \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.\n- Photovoltaic power is calculated as (\\\\param{pv_{t}} * (1 + \\\\var{Apv})).\n- Total power supply at each time step t = \\\\param{soc_{t}} + \\\\param{pg1_{t}} + \\\\param{pg2_{t}} + \\\\param{pg3_{t}}+ (photovoltaic power at time step t) + \\\\param{wind_{t}}.\n- Excess energy = total power supply - \\\\param{load_{t}}, and no less than 0.\n- Shortage energy = \\\\param{load_{t}} - total power supply, and no less than 0.\n- The total cost structure includes the following:\n- Battery cost = \\\\var{Asoc_{t}} + 0.1 * \\\\param{soc_{t}};\n- Gen1 cost = 0.0034 * \\\\param{pg1_{t}}^2 + 3 * \\\\param{pg1_{t}} + 30;\n- Gen2 cost = 0.001 * \\\\param{pg2_{t}}^2 + 10 * \\\\param{pg2_{t}} + 40;\n- Gen3 cost = 0.001 * \\\\param{pg3_{t}}^2 + 15 * \\\\param{pg3_{t}} + 70;\n- Renewable energy cost = 0.01 * (\\\\param{pv_{t}} + \\\\param{wind_{t}});\n- Sales revenue = 0.5 * \\\\param{price_{t}} * (excess energy);\n- Purchase cost = \\\\param{price_{t}} * (shortage energy);\n- Excess penalty = max(0, (excess energy - 100) * 50);\n- Shortage penalty = max(0, (shortage energy - 100) * 50).\n- The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.\n- Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.\n- Decisions made at each time step will update \\\\param{soc_{}}, \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.\n",
"parameters": {
"TotalBatteryCapacity": {
"shape": [],
"definition": "The total capacity of the battery",
"type": "int"
},
"InitialSoc": {
"shape": [],
"definition": "Initial state of charge of the battery",
"type": "float"
},
"InitialPg1": {
"shape": [],
"definition": "Initial power of generator 1",
"type": "float"
},
"InitialPg2": {
"shape": [],
"definition": "Initial power of generator 2",
"type": "float"
},
"InitialPg3": {
"shape": [],
"definition": "Initial power of generator 3",
"type": "float"
},
"Price": {
"shape": [
24
],
"definition": "Electricity price at each time step",
"type": "float"
},
"Load": {
"shape": [
24
],
"definition": "Residential load at each time step",
"type": "float"
},
"Pv": {
"shape": [
24
],
"definition": "Photovoltaic power generation at each time step",
"type": "float"
},
"Wind": {
"shape": [
24
],
"definition": "Wind power generation at each time step",
"type": "float"
}
}
}

View File

@ -0,0 +1,63 @@
{
"description": "- The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.\n- The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\n- Electricity price, residential load, photovoltaic, and wind power generation are \\\\param{price_{}}, \\\\param{load_{}}, \\\\param{pv_{}}, and \\\\param{wind_{}}, each with 24 time steps representing hourly data.\n- Battery state of charge is \\\\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.\n- \\\\var{Apv_{}}, \\\\var{Asoc_{}}, \\\\var{Ag1_{}}, \\\\var{Ag2_{}}, and \\\\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.\n- Battery charging and discharging result in a capacity change of 100 * \\\\var{Asoc_{t}}.\n- Generators 1, 2, and 3 power change by 100 * \\\\var{Ag1_{t}}, 100 * \\\\var{Ag2_{t}}, and 200 * \\\\var{Ag3_{t}}.\n- \\\\param{soc_{t}} is calculated as (0.18 * \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.\n- \\\\param{pg1_{t}} is calculated as (100 * \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.\n- \\\\param{pg2_{t}} is calculated as (100 * \\\\var{Ag2_{t}} + \\\\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.\n- \\\\param{pg3_{t}} is calculated as (200 * \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.\n- Photovoltaic power is calculated as (\\\\param{pv_{t}} * (1 + \\\\var{Apv})).\n- Total power supply at each time step t = \\\\param{soc_{t}} + \\\\param{pg1_{t}} + \\\\param{pg2_{t}} + \\\\param{pg3_{t}}+ (photovoltaic power at time step t) + \\\\param{wind_{t}}.\n- Excess energy = total power supply - \\\\param{load_{t}}, and no less than 0.\n- Shortage energy = \\\\param{load_{t}} - total power supply, and no less than 0.\n- The total cost structure includes the following:\n- Battery cost = \\\\var{Asoc_{t}} + 0.1 * \\\\param{soc_{t}};\n- Gen1 cost = 0.0034 * \\\\param{pg1_{t}}^2 + 3 * \\\\param{pg1_{t}} + 30;\n- Gen2 cost = 0.001 * \\\\param{pg2_{t}}^2 + 10 * \\\\param{pg2_{t}} + 40;\n- Gen3 cost = 0.001 * \\\\param{pg3_{t}}^2 + 15 * \\\\param{pg3_{t}} + 70;\n- Renewable energy cost = 0.01 * (\\\\param{pv_{t}} + \\\\param{wind_{t}});\n- Sales revenue = 0.5 * \\\\param{price_{t}} * (excess energy);\n- Purchase cost = \\\\param{price_{t}} * (shortage energy);\n- Excess penalty = max(0, (excess energy - 100) * 50);\n- Shortage penalty = max(0, (shortage energy - 100) * 50).\n- The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.\n- Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.\n- Decisions made at each time step will update \\\\param{soc_{}}, \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.\n",
"parameters": {
"TotalBatteryCapacity": {
"shape": [],
"definition": "The total capacity of the battery",
"type": "int"
},
"InitialSoc": {
"shape": [],
"definition": "Initial state of charge of the battery",
"type": "float"
},
"InitialPg1": {
"shape": [],
"definition": "Initial power of generator 1",
"type": "float"
},
"InitialPg2": {
"shape": [],
"definition": "Initial power of generator 2",
"type": "float"
},
"InitialPg3": {
"shape": [],
"definition": "Initial power of generator 3",
"type": "float"
},
"Price": {
"shape": [
24
],
"definition": "Electricity price at each time step",
"type": "float"
},
"Load": {
"shape": [
24
],
"definition": "Residential load at each time step",
"type": "float"
},
"Pv": {
"shape": [
24
],
"definition": "Photovoltaic power generation at each time step",
"type": "float"
},
"Wind": {
"shape": [
24
],
"definition": "Wind power generation at each time step",
"type": "float"
}
},
"objective": {
"description": "\"The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\"",
"formulation": null,
"code": null
}
}

View File

@ -0,0 +1,135 @@
{
"description": "- The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.\n- The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\n- Electricity price, residential load, photovoltaic, and wind power generation are \\\\param{price_{}}, \\\\param{load_{}}, \\\\param{pv_{}}, and \\\\param{wind_{}}, each with 24 time steps representing hourly data.\n- Battery state of charge is \\\\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.\n- \\\\var{Apv_{}}, \\\\var{Asoc_{}}, \\\\var{Ag1_{}}, \\\\var{Ag2_{}}, and \\\\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.\n- Battery charging and discharging result in a capacity change of 100 * \\\\var{Asoc_{t}}.\n- Generators 1, 2, and 3 power change by 100 * \\\\var{Ag1_{t}}, 100 * \\\\var{Ag2_{t}}, and 200 * \\\\var{Ag3_{t}}.\n- \\\\param{soc_{t}} is calculated as (0.18 * \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.\n- \\\\param{pg1_{t}} is calculated as (100 * \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.\n- \\\\param{pg2_{t}} is calculated as (100 * \\\\var{Ag2_{t}} + \\\\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.\n- \\\\param{pg3_{t}} is calculated as (200 * \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.\n- Photovoltaic power is calculated as (\\\\param{pv_{t}} * (1 + \\\\var{Apv})).\n- Total power supply at each time step t = \\\\param{soc_{t}} + \\\\param{pg1_{t}} + \\\\param{pg2_{t}} + \\\\param{pg3_{t}}+ (photovoltaic power at time step t) + \\\\param{wind_{t}}.\n- Excess energy = total power supply - \\\\param{load_{t}}, and no less than 0.\n- Shortage energy = \\\\param{load_{t}} - total power supply, and no less than 0.\n- The total cost structure includes the following:\n- Battery cost = \\\\var{Asoc_{t}} + 0.1 * \\\\param{soc_{t}};\n- Gen1 cost = 0.0034 * \\\\param{pg1_{t}}^2 + 3 * \\\\param{pg1_{t}} + 30;\n- Gen2 cost = 0.001 * \\\\param{pg2_{t}}^2 + 10 * \\\\param{pg2_{t}} + 40;\n- Gen3 cost = 0.001 * \\\\param{pg3_{t}}^2 + 15 * \\\\param{pg3_{t}} + 70;\n- Renewable energy cost = 0.01 * (\\\\param{pv_{t}} + \\\\param{wind_{t}});\n- Sales revenue = 0.5 * \\\\param{price_{t}} * (excess energy);\n- Purchase cost = \\\\param{price_{t}} * (shortage energy);\n- Excess penalty = max(0, (excess energy - 100) * 50);\n- Shortage penalty = max(0, (shortage energy - 100) * 50).\n- The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.\n- Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.\n- Decisions made at each time step will update \\\\param{soc_{}}, \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.\n",
"parameters": {
"TotalBatteryCapacity": {
"shape": [],
"definition": "The total capacity of the battery",
"type": "int"
},
"InitialSoc": {
"shape": [],
"definition": "Initial state of charge of the battery",
"type": "float"
},
"InitialPg1": {
"shape": [],
"definition": "Initial power of generator 1",
"type": "float"
},
"InitialPg2": {
"shape": [],
"definition": "Initial power of generator 2",
"type": "float"
},
"InitialPg3": {
"shape": [],
"definition": "Initial power of generator 3",
"type": "float"
},
"Price": {
"shape": [
24
],
"definition": "Electricity price at each time step",
"type": "float"
},
"Load": {
"shape": [
24
],
"definition": "Residential load at each time step",
"type": "float"
},
"Pv": {
"shape": [
24
],
"definition": "Photovoltaic power generation at each time step",
"type": "float"
},
"Wind": {
"shape": [
24
],
"definition": "Wind power generation at each time step",
"type": "float"
}
},
"objective": {
"description": "\"The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\"",
"formulation": null,
"code": null
},
"constraints": [
{
"description": "Battery state of charge (soc_t) must be between 0.2 and 0.8",
"formulation": null,
"code": null
},
{
"description": "Generator 1 power (pg1_t) must be between 10 and 150",
"formulation": null,
"code": null
},
{
"description": "Generator 2 power (pg2_t) must be between 50 and 375",
"formulation": null,
"code": null
},
{
"description": "Generator 3 power (pg3_t) must be between 100 and 500",
"formulation": null,
"code": null
},
{
"description": "Battery state of charge (soc_t) must be calculated as (0.18 * Asoc_t + soc_t-1)",
"formulation": null,
"code": null
},
{
"description": "Generator 1 power (pg1_t) must be calculated as (100 * Ag1_t + pg1_t-1)",
"formulation": null,
"code": null
},
{
"description": "Generator 2 power (pg2_t) must be calculated as (100 * Ag2_t + pg2_t-1)",
"formulation": null,
"code": null
},
{
"description": "Generator 3 power (pg3_t) must be calculated as (200 * Ag3_t + pg3_t-1)",
"formulation": null,
"code": null
},
{
"description": "Photovoltaic power must be calculated as (pv_t * (1 + Apv))",
"formulation": null,
"code": null
},
{
"description": "Excess energy must be no less than 0",
"formulation": null,
"code": null
},
{
"description": "Shortage energy must be no less than 0",
"formulation": null,
"code": null
},
{
"description": "Adjustment factors (Apv, Asoc, Ag1, Ag2, Ag3) must range from -1 to 1",
"formulation": null,
"code": null
},
{
"description": "Battery charging and discharging must result in a capacity change of 100 * Asoc_t",
"formulation": null,
"code": null
},
{
"description": "The sum of power from generators and renewable sources must not exceed the residential load by more than the grid transaction limit to avoid instability",
"formulation": null,
"code": null
}
]
}

View File

@ -0,0 +1,217 @@
{
"description": "- The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.\n- The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\n- Electricity price, residential load, photovoltaic, and wind power generation are \\\\param{price_{}}, \\\\param{load_{}}, \\\\param{pv_{}}, and \\\\param{wind_{}}, each with 24 time steps representing hourly data.\n- Battery state of charge is \\\\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.\n- \\\\var{Apv_{}}, \\\\var{Asoc_{}}, \\\\var{Ag1_{}}, \\\\var{Ag2_{}}, and \\\\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.\n- Battery charging and discharging result in a capacity change of 100 * \\\\var{Asoc_{t}}.\n- Generators 1, 2, and 3 power change by 100 * \\\\var{Ag1_{t}}, 100 * \\\\var{Ag2_{t}}, and 200 * \\\\var{Ag3_{t}}.\n- \\\\param{soc_{t}} is calculated as (0.18 * \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.\n- \\\\param{pg1_{t}} is calculated as (100 * \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.\n- \\\\param{pg2_{t}} is calculated as (100 * \\\\var{Ag2_{t}} + \\\\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.\n- \\\\param{pg3_{t}} is calculated as (200 * \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.\n- Photovoltaic power is calculated as (\\\\param{pv_{t}} * (1 + \\\\var{Apv})).\n- Total power supply at each time step t = \\\\param{soc_{t}} + \\\\param{pg1_{t}} + \\\\param{pg2_{t}} + \\\\param{pg3_{t}}+ (photovoltaic power at time step t) + \\\\param{wind_{t}}.\n- Excess energy = total power supply - \\\\param{load_{t}}, and no less than 0.\n- Shortage energy = \\\\param{load_{t}} - total power supply, and no less than 0.\n- The total cost structure includes the following:\n- Battery cost = \\\\var{Asoc_{t}} + 0.1 * \\\\param{soc_{t}};\n- Gen1 cost = 0.0034 * \\\\param{pg1_{t}}^2 + 3 * \\\\param{pg1_{t}} + 30;\n- Gen2 cost = 0.001 * \\\\param{pg2_{t}}^2 + 10 * \\\\param{pg2_{t}} + 40;\n- Gen3 cost = 0.001 * \\\\param{pg3_{t}}^2 + 15 * \\\\param{pg3_{t}} + 70;\n- Renewable energy cost = 0.01 * (\\\\param{pv_{t}} + \\\\param{wind_{t}});\n- Sales revenue = 0.5 * \\\\param{price_{t}} * (excess energy);\n- Purchase cost = \\\\param{price_{t}} * (shortage energy);\n- Excess penalty = max(0, (excess energy - 100) * 50);\n- Shortage penalty = max(0, (shortage energy - 100) * 50).\n- The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.\n- Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.\n- Decisions made at each time step will update \\\\param{soc_{}}, \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.\n",
"parameters": {
"TotalBatteryCapacity": {
"shape": [],
"definition": "The total capacity of the battery",
"type": "int"
},
"InitialSoc": {
"shape": [],
"definition": "Initial state of charge of the battery",
"type": "float"
},
"InitialPg1": {
"shape": [],
"definition": "Initial power of generator 1",
"type": "float"
},
"InitialPg2": {
"shape": [],
"definition": "Initial power of generator 2",
"type": "float"
},
"InitialPg3": {
"shape": [],
"definition": "Initial power of generator 3",
"type": "float"
},
"Price": {
"shape": [
24
],
"definition": "Electricity price at each time step",
"type": "float"
},
"Load": {
"shape": [
24
],
"definition": "Residential load at each time step",
"type": "float"
},
"Pv": {
"shape": [
24
],
"definition": "Photovoltaic power generation at each time step",
"type": "float"
},
"Wind": {
"shape": [
24
],
"definition": "Wind power generation at each time step",
"type": "float"
}
},
"objective": {
"description": "\"The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\"",
"formulation": null,
"code": null
},
"constraints": [
{
"description": "Battery state of charge (soc_t) must be between 0.2 and 0.8",
"formulation": "$0.2 \\leq soc_t \\leq 0.8$",
"code": null
},
{
"description": "Generator 1 power (pg1_t) must be between 10 and 150",
"formulation": "$10 \\leq pg1_t \\leq 150$",
"code": null
},
{
"description": "Generator 2 power (pg2_t) must be between 50 and 375",
"formulation": "$50 \\leq pg2_t \\leq 375$",
"code": null
},
{
"description": "Generator 3 power (pg3_t) must be between 100 and 500",
"formulation": "$100 \\\\leq pg3_t \\\\leq 500$",
"code": null
},
{
"description": "Battery state of charge (soc_t) must be calculated as (0.18 * Asoc_t + soc_t-1)",
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{soc_{t}} = 0.18 \\\\cdot \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}$",
"code": null
},
{
"description": "Generator 1 power (pg1_t) must be calculated as (100 * Ag1_t + pg1_t-1)",
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{pg1_{t}} = 100 \\\\cdot \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}$",
"code": null
},
{
"description": "Generator 2 power (pg2_t) must be calculated as (100 * Ag2_t + pg2_t-1)",
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{pg2_{t}} = 100 \\\\times Ag2_t + \\\\param{pg2_{t-1}}$",
"code": null
},
{
"description": "Generator 3 power (pg3_t) must be calculated as (200 * Ag3_t + pg3_t-1)",
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\ldots,24\\\\}, \\\\param{pg3_{t}} = 200 \\\\cdot \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}$",
"code": null
},
{
"description": "Photovoltaic power must be calculated as (pv_t * (1 + Apv))",
"formulation": "$\\\\forall t, PvPower_t = Pv[t] \\\\times (1 + Apv_t)$",
"code": null
},
{
"description": "Excess energy must be no less than 0",
"formulation": "$\\\\forall t, ExcessEnergy[t] \\\\geq 0$",
"code": null
},
{
"description": "auxiliary constraint",
"formulation": "$\\\\forall t, ExcessEnergy[t] = TotalSupply[t] - Load[t]$"
},
{
"description": "auxiliary constraint",
"formulation": "$\\\\forall t, TotalSupply[t] = soc[t] + pg1[t] + pg2[t] + pg3[t] + PvPower[t] + Wind[t]$"
},
{
"description": "Shortage energy must be no less than 0",
"formulation": "$\\\\forall t, ShortageEnergy[t] \\\\geq 0$",
"code": null
},
{
"description": "Adjustment factors (Apv, Asoc, Ag1, Ag2, Ag3) must range from -1 to 1",
"formulation": "$-1 \\leq Apv_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Asoc_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag1_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag2_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag3_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$",
"code": null
},
{
"description": "Battery charging and discharging must result in a capacity change of 100 * Asoc_t",
"formulation": "$\\\\forall t, 100 \\\\times Asoc_t = soc_{t} - soc_{t-1}$",
"code": null
},
{
"description": "The sum of power from generators and renewable sources must not exceed the residential load by more than the grid transaction limit to avoid instability",
"formulation": "$\\\\forall t, TotalPowerSupply[t] \\\\leq Load[t] + GridTransactionLimit$",
"code": null
},
{
"description": "auxiliary constraint",
"formulation": "$\\\\forall t, TotalPowerSupply[t] = Pg1[t] + Pg2[t] + Pg3[t] + PvPower[t] + Wind[t]$"
}
],
"variables": {
"Ag1": {
"shape": [
24
],
"type": "continuous",
"definition": "Adjustment factor for generator 1 power at each time step"
},
"Ag2": {
"shape": [
24
],
"type": "continuous",
"definition": "Adjustment factor for generator 2 power at each time step"
},
"Ag3": {
"shape": [
24
],
"type": "continuous",
"definition": "Adjustment factor for generator 3 power at each time step"
},
"PvPower": {
"shape": [
24
],
"type": "continuous",
"definition": "The adjusted photovoltaic power generation at each time step"
},
"Apv": {
"shape": [
24
],
"type": "continuous",
"definition": "The adjustment factor for photovoltaic power at each time step"
},
"ExcessEnergy": {
"shape": [
24
],
"type": "continuous",
"definition": "The excess energy at each time step"
},
"ShortageEnergy": {
"shape": [
24
],
"type": "continuous",
"definition": "The shortage energy at each time step"
},
"Asoc": {
"shape": [
24
],
"type": "continuous",
"definition": "The adjustment factor for the battery state of charge at each time step"
},
"TotalPowerSupply": {
"shape": [
24
],
"type": "continuous",
"definition": "The total power supply from generators and renewable sources at each time step"
},
"GridTransactionLimit": {
"shape": [],
"type": "float",
"definition": "The maximum allowable excess power supply over the residential load to maintain grid stability"
}
}
}

View File

@ -0,0 +1,216 @@
{
"description": "- The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.\n- The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\n- Electricity price, residential load, photovoltaic, and wind power generation are \\\\param{price_{}}, \\\\param{load_{}}, \\\\param{pv_{}}, and \\\\param{wind_{}}, each with 24 time steps representing hourly data.\n- Battery state of charge is \\\\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.\n- \\\\var{Apv_{}}, \\\\var{Asoc_{}}, \\\\var{Ag1_{}}, \\\\var{Ag2_{}}, and \\\\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.\n- Battery charging and discharging result in a capacity change of 100 * \\\\var{Asoc_{t}}.\n- Generators 1, 2, and 3 power change by 100 * \\\\var{Ag1_{t}}, 100 * \\\\var{Ag2_{t}}, and 200 * \\\\var{Ag3_{t}}.\n- \\\\param{soc_{t}} is calculated as (0.18 * \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.\n- \\\\param{pg1_{t}} is calculated as (100 * \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.\n- \\\\param{pg2_{t}} is calculated as (100 * \\\\var{Ag2_{t}} + \\\\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.\n- \\\\param{pg3_{t}} is calculated as (200 * \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.\n- Photovoltaic power is calculated as (\\\\param{pv_{t}} * (1 + \\\\var{Apv})).\n- Total power supply at each time step t = \\\\param{soc_{t}} + \\\\param{pg1_{t}} + \\\\param{pg2_{t}} + \\\\param{pg3_{t}}+ (photovoltaic power at time step t) + \\\\param{wind_{t}}.\n- Excess energy = total power supply - \\\\param{load_{t}}, and no less than 0.\n- Shortage energy = \\\\param{load_{t}} - total power supply, and no less than 0.\n- The total cost structure includes the following:\n- Battery cost = \\\\var{Asoc_{t}} + 0.1 * \\\\param{soc_{t}};\n- Gen1 cost = 0.0034 * \\\\param{pg1_{t}}^2 + 3 * \\\\param{pg1_{t}} + 30;\n- Gen2 cost = 0.001 * \\\\param{pg2_{t}}^2 + 10 * \\\\param{pg2_{t}} + 40;\n- Gen3 cost = 0.001 * \\\\param{pg3_{t}}^2 + 15 * \\\\param{pg3_{t}} + 70;\n- Renewable energy cost = 0.01 * (\\\\param{pv_{t}} + \\\\param{wind_{t}});\n- Sales revenue = 0.5 * \\\\param{price_{t}} * (excess energy);\n- Purchase cost = \\\\param{price_{t}} * (shortage energy);\n- Excess penalty = max(0, (excess energy - 100) * 50);\n- Shortage penalty = max(0, (shortage energy - 100) * 50).\n- The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.\n- Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.\n- Decisions made at each time step will update \\\\param{soc_{}}, \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.\n",
"parameters": {
"TotalBatteryCapacity": {
"shape": [],
"definition": "The total capacity of the battery",
"type": "int"
},
"InitialSoc": {
"shape": [],
"definition": "Initial state of charge of the battery",
"type": "float"
},
"InitialPg1": {
"shape": [],
"definition": "Initial power of generator 1",
"type": "float"
},
"InitialPg2": {
"shape": [],
"definition": "Initial power of generator 2",
"type": "float"
},
"InitialPg3": {
"shape": [],
"definition": "Initial power of generator 3",
"type": "float"
},
"Price": {
"shape": [
24
],
"definition": "Electricity price at each time step",
"type": "float"
},
"Load": {
"shape": [
24
],
"definition": "Residential load at each time step",
"type": "float"
},
"Pv": {
"shape": [
24
],
"definition": "Photovoltaic power generation at each time step",
"type": "float"
},
"Wind": {
"shape": [
24
],
"definition": "Wind power generation at each time step",
"type": "float"
}
},
"objective": {
"description": "\"The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\"",
"formulation": "$\\min \\sum_{t=1}^{24} \\left( \\text{Battery cost}_t + \\text{Gen1 cost}_t + \\text{Gen2 cost}_t + \\text{Gen3 cost}_t + \\text{Renewable energy cost}_t + \\text{Excess penalty}_t + \\text{Shortage penalty}_t - \\text{Sales revenue}_t + \\text{Purchase cost}_t + |\\text{ExcessEnergy}_t| + |\\text{ShortageEnergy}_t| \\right)$"
},
"constraints": [
{
"description": "Battery state of charge (soc_t) must be between 0.2 and 0.8",
"formulation": "$0.2 \\leq soc_t \\leq 0.8$",
"code": null
},
{
"description": "Generator 1 power (pg1_t) must be between 10 and 150",
"formulation": "$10 \\leq pg1_t \\leq 150$",
"code": null
},
{
"description": "Generator 2 power (pg2_t) must be between 50 and 375",
"formulation": "$50 \\leq pg2_t \\leq 375$",
"code": null
},
{
"description": "Generator 3 power (pg3_t) must be between 100 and 500",
"formulation": "$100 \\\\leq pg3_t \\\\leq 500$",
"code": null
},
{
"description": "Battery state of charge (soc_t) must be calculated as (0.18 * Asoc_t + soc_t-1)",
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{soc_{t}} = 0.18 \\\\cdot \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}$",
"code": null
},
{
"description": "Generator 1 power (pg1_t) must be calculated as (100 * Ag1_t + pg1_t-1)",
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{pg1_{t}} = 100 \\\\cdot \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}$",
"code": null
},
{
"description": "Generator 2 power (pg2_t) must be calculated as (100 * Ag2_t + pg2_t-1)",
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{pg2_{t}} = 100 \\\\times Ag2_t + \\\\param{pg2_{t-1}}$",
"code": null
},
{
"description": "Generator 3 power (pg3_t) must be calculated as (200 * Ag3_t + pg3_t-1)",
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\ldots,24\\\\}, \\\\param{pg3_{t}} = 200 \\\\cdot \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}$",
"code": null
},
{
"description": "Photovoltaic power must be calculated as (pv_t * (1 + Apv))",
"formulation": "$\\\\forall t, PvPower_t = Pv[t] \\\\times (1 + Apv_t)$",
"code": null
},
{
"description": "Excess energy must be no less than 0",
"formulation": "$\\\\forall t, ExcessEnergy[t] \\\\geq 0$",
"code": null
},
{
"description": "auxiliary constraint",
"formulation": "$\\\\forall t, ExcessEnergy[t] = TotalSupply[t] - Load[t]$"
},
{
"description": "auxiliary constraint",
"formulation": "$\\\\forall t, TotalSupply[t] = soc[t] + pg1[t] + pg2[t] + pg3[t] + PvPower[t] + Wind[t]$"
},
{
"description": "Shortage energy must be no less than 0",
"formulation": "$\\\\forall t, ShortageEnergy[t] \\\\geq 0$",
"code": null
},
{
"description": "Adjustment factors (Apv, Asoc, Ag1, Ag2, Ag3) must range from -1 to 1",
"formulation": "$-1 \\leq Apv_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Asoc_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag1_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag2_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag3_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$",
"code": null
},
{
"description": "Battery charging and discharging must result in a capacity change of 100 * Asoc_t",
"formulation": "$\\\\forall t, 100 \\\\times Asoc_t = soc_{t} - soc_{t-1}$",
"code": null
},
{
"description": "The sum of power from generators and renewable sources must not exceed the residential load by more than the grid transaction limit to avoid instability",
"formulation": "$\\\\forall t, TotalPowerSupply[t] \\\\leq Load[t] + GridTransactionLimit$",
"code": null
},
{
"description": "auxiliary constraint",
"formulation": "$\\\\forall t, TotalPowerSupply[t] = Pg1[t] + Pg2[t] + Pg3[t] + PvPower[t] + Wind[t]$"
}
],
"variables": {
"Ag1": {
"shape": [
24
],
"type": "continuous",
"definition": "Adjustment factor for generator 1 power at each time step"
},
"Ag2": {
"shape": [
24
],
"type": "continuous",
"definition": "Adjustment factor for generator 2 power at each time step"
},
"Ag3": {
"shape": [
24
],
"type": "continuous",
"definition": "Adjustment factor for generator 3 power at each time step"
},
"PvPower": {
"shape": [
24
],
"type": "continuous",
"definition": "The adjusted photovoltaic power generation at each time step"
},
"Apv": {
"shape": [
24
],
"type": "continuous",
"definition": "The adjustment factor for photovoltaic power at each time step"
},
"ExcessEnergy": {
"shape": [
24
],
"type": "continuous",
"definition": "The excess energy at each time step"
},
"ShortageEnergy": {
"shape": [
24
],
"type": "continuous",
"definition": "The shortage energy at each time step"
},
"Asoc": {
"shape": [
24
],
"type": "continuous",
"definition": "The adjustment factor for the battery state of charge at each time step"
},
"TotalPowerSupply": {
"shape": [
24
],
"type": "continuous",
"definition": "The total power supply from generators and renewable sources at each time step"
},
"GridTransactionLimit": {
"shape": [],
"type": "float",
"definition": "The maximum allowable excess power supply over the residential load to maintain grid stability"
}
}
}

220
llma/data/state_6_code.json Normal file
View File

@ -0,0 +1,220 @@
{
"description": "- The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.\n- The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\n- Electricity price, residential load, photovoltaic, and wind power generation are \\\\param{price_{}}, \\\\param{load_{}}, \\\\param{pv_{}}, and \\\\param{wind_{}}, each with 24 time steps representing hourly data.\n- Battery state of charge is \\\\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.\n- \\\\var{Apv_{}}, \\\\var{Asoc_{}}, \\\\var{Ag1_{}}, \\\\var{Ag2_{}}, and \\\\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.\n- Battery charging and discharging result in a capacity change of 100 * \\\\var{Asoc_{t}}.\n- Generators 1, 2, and 3 power change by 100 * \\\\var{Ag1_{t}}, 100 * \\\\var{Ag2_{t}}, and 200 * \\\\var{Ag3_{t}}.\n- \\\\param{soc_{t}} is calculated as (0.18 * \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.\n- \\\\param{pg1_{t}} is calculated as (100 * \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.\n- \\\\param{pg2_{t}} is calculated as (100 * \\\\var{Ag2_{t}} + \\\\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.\n- \\\\param{pg3_{t}} is calculated as (200 * \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.\n- Photovoltaic power is calculated as (\\\\param{pv_{t}} * (1 + \\\\var{Apv})).\n- Total power supply at each time step t = \\\\param{soc_{t}} + \\\\param{pg1_{t}} + \\\\param{pg2_{t}} + \\\\param{pg3_{t}}+ (photovoltaic power at time step t) + \\\\param{wind_{t}}.\n- Excess energy = total power supply - \\\\param{load_{t}}, and no less than 0.\n- Shortage energy = \\\\param{load_{t}} - total power supply, and no less than 0.\n- The total cost structure includes the following:\n- Battery cost = \\\\var{Asoc_{t}} + 0.1 * \\\\param{soc_{t}};\n- Gen1 cost = 0.0034 * \\\\param{pg1_{t}}^2 + 3 * \\\\param{pg1_{t}} + 30;\n- Gen2 cost = 0.001 * \\\\param{pg2_{t}}^2 + 10 * \\\\param{pg2_{t}} + 40;\n- Gen3 cost = 0.001 * \\\\param{pg3_{t}}^2 + 15 * \\\\param{pg3_{t}} + 70;\n- Renewable energy cost = 0.01 * (\\\\param{pv_{t}} + \\\\param{wind_{t}});\n- Sales revenue = 0.5 * \\\\param{price_{t}} * (excess energy);\n- Purchase cost = \\\\param{price_{t}} * (shortage energy);\n- Excess penalty = max(0, (excess energy - 100) * 50);\n- Shortage penalty = max(0, (shortage energy - 100) * 50).\n- The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.\n- Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.\n- Decisions made at each time step will update \\\\param{soc_{}}, \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.\n",
"parameters": {
"TotalBatteryCapacity": {
"shape": [],
"definition": "The total capacity of the battery",
"type": "int"
},
"InitialSoc": {
"shape": [],
"definition": "Initial state of charge of the battery",
"type": "float"
},
"InitialPg1": {
"shape": [],
"definition": "Initial power of generator 1",
"type": "float"
},
"InitialPg2": {
"shape": [],
"definition": "Initial power of generator 2",
"type": "float"
},
"InitialPg3": {
"shape": [],
"definition": "Initial power of generator 3",
"type": "float"
},
"Price": {
"shape": [
24
],
"definition": "Electricity price at each time step",
"type": "float"
},
"Load": {
"shape": [
24
],
"definition": "Residential load at each time step",
"type": "float"
},
"Pv": {
"shape": [
24
],
"definition": "Photovoltaic power generation at each time step",
"type": "float"
},
"Wind": {
"shape": [
24
],
"definition": "Wind power generation at each time step",
"type": "float"
}
},
"objective": {
"description": "\"The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\"",
"formulation": "$\\min \\sum_{t=1}^{24} \\left( \\text{Battery cost}_t + \\text{Gen1 cost}_t + \\text{Gen2 cost}_t + \\text{Gen3 cost}_t + \\text{Renewable energy cost}_t + \\text{Excess penalty}_t + \\text{Shortage penalty}_t - \\text{Sales revenue}_t + \\text{Purchase cost}_t + |\\text{ExcessEnergy}_t| + |\\text{ShortageEnergy}_t| \\right)$",
"code": "model.setObjective(\n quicksum(\n (Asoc[t] + 0.1 * soc[t-1] +\n 0.0034 * pg1[t-1] ** 2 + 3 * pg1[t-1] + 30 +\n 0.001 * pg2[t-1] ** 2 + 10 * pg2[t-1] + 40 +\n 0.001 * pg3[t-1] ** 2 + 15 * pg3[t-1] + 70 +\n 0.01 * (pv[t-1] + wind[t-1]) +\n max(0, (ExcessEnergy[t] - 100) * 50) +\n max(0, (ShortageEnergy[t] - 100) * 50) -\n 0.5 * Price[t-1] * ExcessEnergy[t] +\n Price[t-1] * ShortageEnergy[t] +\n abs(ExcessEnergy[t]) + abs(ShortageEnergy[t])\n ) for t in range(1, 25)\n ),\n GRB.MINIMIZE\n)"
},
"constraints": [
{
"description": "Battery state of charge (soc_t) must be between 0.2 and 0.8",
"formulation": "$0.2 \\leq soc_t \\leq 0.8$",
"code": "# Preceding state of charge variable initialization\nsoc = InitialSoc\n\n# Loop over each time step to add the soc constraints\nfor t in range(24):\n # Calculate soc_t based on the previous soc and the adjustment factor\n soc_t = 0.18 * Asoc[t] + soc\n \n # Add the lower bound constraint for soc_t\n model.addConstr(soc_t >= 0.2)\n \n # Add the upper bound constraint for soc_t\n model.addConstr(soc_t <= 0.8)\n \n # Update soc to the newly calculated soc_t for the next iteration\n soc = soc_t"
},
{
"description": "Generator 1 power (pg1_t) must be between 10 and 150",
"formulation": "$10 \\leq pg1_t \\leq 150$",
"code": "# Assuming decision variables Pg1[t] have been defined for each time step t\nfor t in range(24):\n if t == 0:\n # For the first time step, use the initial value of Pg1\n model.addConstr(Pg1[t] <= 150)\n model.addConstr(Pg1[t] >= 10)\n else:\n # For subsequent time steps, calculate Pg1 based on the previous time step and Ag1\n model.addConstr(Pg1[t] <= 150)\n model.addConstr(Pg1[t] >= 10)\n model.addConstr(Pg1[t] == 100 * Ag1[t] + Pg1[t-1])"
},
{
"description": "Generator 2 power (pg2_t) must be between 50 and 375",
"formulation": "$50 \\leq pg2_t \\leq 375$",
"code": "for t in range(1, 24):\n model.addConstr(InitialPg2 + 100 * Ag2[t] >= 50)\n model.addConstr(InitialPg2 + 100 * Ag2[t] <= 375)"
},
{
"description": "Generator 3 power (pg3_t) must be between 100 and 500",
"formulation": "$100 \\\\leq pg3_t \\\\leq 500$",
"code": "for t in range(24):\n model.addConstr(Pg3[t] >= 100) # Lower bound constraint for each time step\n model.addConstr(Pg3[t] <= 500) # Upper bound constraint for each time step"
},
{
"description": "Battery state of charge (soc_t) must be calculated as (0.18 * Asoc_t + soc_t-1)",
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{soc_{t}} = 0.18 \\\\cdot \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}$",
"code": "# Assuming soc is defined as a list of decision variables for each time step\nsoc = model.addVars(24, lb=0.2, ub=0.8, vtype=GRB.CONTINUOUS, name=\"soc\")\n\n# Loop over each time step starting from 1 up to 24\nfor t in range(1, 24):\n # Add the constraint for the state of charge calculation\n model.addConstr(soc[t] == 0.18 * Asoc[t] + soc[t-1])"
},
{
"description": "Generator 1 power (pg1_t) must be calculated as (100 * Ag1_t + pg1_t-1)",
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{pg1_{t}} = 100 \\\\cdot \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}$",
"code": "for t in range(1, 24):\n model.addConstr(pg1[t] == 100 * Ag1[t] + pg1[t-1])"
},
{
"description": "Generator 2 power (pg2_t) must be calculated as (100 * Ag2_t + pg2_t-1)",
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{pg2_{t}} = 100 \\\\times Ag2_t + \\\\param{pg2_{t-1}}$",
"code": "# Assuming that `pg2` is a list of Gurobi variables representing the power of Generator 2 at each time step\n# and that `InitialPg2` is a parameter representing the initial power of Generator 2 at time step 0\n\n# Define the Generator 2 power for the first time step, which is the initial value and does not have a constraint\npg2 = [model.addVar(vtype=GRB.CONTINUOUS, lb=50, ub=375, name=\"pg2_0\")]\n\n# Now add the constraints for time steps 1 to 23\nfor t in range(1, 24):\n pg2.append(model.addVar(vtype=GRB.CONTINUOUS, lb=50, ub=375, name=f\"pg2_{t}\"))\n model.addConstr(pg2[t] == 100 * Ag2[t-1] + pg2[t-1])"
},
{
"description": "Generator 3 power (pg3_t) must be calculated as (200 * Ag3_t + pg3_t-1)",
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\ldots,24\\\\}, \\\\param{pg3_{t}} = 200 \\\\cdot \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}$",
"code": "# Assuming Pg3 is defined as a list of decision variables representing power of generator 3 at each time step\nfor t in range(1, 24):\n model.addConstr(Pg3[t] == 200 * Ag3[t] + Pg3[t-1])"
},
{
"description": "Photovoltaic power must be calculated as (pv_t * (1 + Apv))",
"formulation": "$\\\\forall t, PvPower_t = Pv[t] \\\\times (1 + Apv_t)$",
"code": "for t in range(24):\n model.addConstr(PvPower[t] == Pv[t] * (1 + Apv[t]))"
},
{
"description": "Excess energy must be no less than 0",
"formulation": "$\\\\forall t, ExcessEnergy[t] \\\\geq 0$",
"code": "for t in range(24):\n model.addConstr(ExcessEnergy[t] >= 0)"
},
{
"description": "auxiliary constraint",
"formulation": "$\\\\forall t, ExcessEnergy[t] = TotalSupply[t] - Load[t]$",
"code": "for t in range(24):\n # Define the total power supply at time step t\n TotalSupply_t = (InitialPg1 + Ag1[t] * 100 +\n InitialPg2 + Ag2[t] * 100 +\n InitialPg3 + Ag3[t] * 200 +\n Pv[t] * (1 + Apv[t]) +\n Wind[t] +\n InitialSoc + Asoc[t] * 0.18)\n # Add the constraint for excess energy at time step t\n model.addConstr(ExcessEnergy[t] == TotalSupply_t - Load[t])"
},
{
"description": "auxiliary constraint",
"formulation": "$\\\\forall t, TotalSupply[t] = soc[t] + pg1[t] + pg2[t] + pg3[t] + PvPower[t] + Wind[t]$",
"code": "for t in range(24):\n # Calculate updated generator powers and battery state of charge\n soc_t = InitialSoc + 0.18 * Asoc[t] if t == 0 else soc[t-1] + 0.18 * Asoc[t]\n pg1_t = InitialPg1 + 100 * Ag1[t] if t == 0 else pg1[t-1] + 100 * Ag1[t]\n pg2_t = InitialPg2 + 100 * Ag2[t] if t == 0 else pg2[t-1] + 100 * Ag2[t]\n pg3_t = InitialPg3 + 200 * Ag3[t] if t == 0 else pg3[t-1] + 200 * Ag3[t]\n\n # Calculate photovoltaic power\n PvPower_t = Pv[t] * (1 + Apv[t])\n\n # Add the constraint for total power supply at time t\n model.addConstr(TotalPowerSupply[t] == soc_t + pg1_t + pg2_t + pg3_t + PvPower_t + Wind[t])"
},
{
"description": "Shortage energy must be no less than 0",
"formulation": "$\\\\forall t, ShortageEnergy[t] \\\\geq 0$",
"code": "for t in range(24):\n model.addConstr(ShortageEnergy[t] >= 0)"
},
{
"description": "Adjustment factors (Apv, Asoc, Ag1, Ag2, Ag3) must range from -1 to 1",
"formulation": "$-1 \\leq Apv_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Asoc_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag1_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag2_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag3_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$",
"code": "for t in range(24):\n model.addConstr(-1 <= Apv[t])\n model.addConstr(Apv[t] <= 1)\n model.addConstr(-1 <= Asoc[t])\n model.addConstr(Asoc[t] <= 1)\n model.addConstr(-1 <= Ag1[t])\n model.addConstr(Ag1[t] <= 1)\n model.addConstr(-1 <= Ag2[t])\n model.addConstr(Ag2[t] <= 1)\n model.addConstr(-1 <= Ag3[t])\n model.addConstr(Ag3[t] <= 1)"
},
{
"description": "Battery charging and discharging must result in a capacity change of 100 * Asoc_t",
"formulation": "$\\\\forall t, 100 \\\\times Asoc_t = soc_{t} - soc_{t-1}$",
"code": "for t in range(1, 24):\n model.addConstr(100 * Asoc[t] == soc[t] - soc[t-1])"
},
{
"description": "The sum of power from generators and renewable sources must not exceed the residential load by more than the grid transaction limit to avoid instability",
"formulation": "$\\\\forall t, TotalPowerSupply[t] \\\\leq Load[t] + GridTransactionLimit$",
"code": "for t in range(24):\n # Calculate the power from each generator based on previous state and adjustment\n pg1_power = InitialPg1 + 100 * Ag1[t]\n pg2_power = InitialPg2 + 100 * Ag2[t]\n pg3_power = InitialPg3 + 200 * Ag3[t]\n\n # Calculate the adjusted photovoltaic power\n pv_power = Pv[t] * (1 + Apv[t])\n\n # Express the total power supply\n TotalPowerSupply[t] = pg1_power + pg2_power + pg3_power + pv_power + Wind[t]\n\n # Add the constraint that total power supply cannot exceed the load by more than the grid transaction limit\n model.addConstr(TotalPowerSupply[t] <= Load[t] + GridTransactionLimit)"
},
{
"description": "auxiliary constraint",
"formulation": "$\\\\forall t, TotalPowerSupply[t] = Pg1[t] + Pg2[t] + Pg3[t] + PvPower[t] + Wind[t]$",
"code": "for t in range(24):\n # Assuming that Pg1, Pg2, Pg3 are defined as variables in the model with the correct initial values and adjustments\n Pg1[t] = model.addVar(vtype=GRB.CONTINUOUS, name=f\"Pg1_{t}\")\n Pg2[t] = model.addVar(vtype=GRB.CONTINUOUS, name=f\"Pg2_{t}\")\n Pg3[t] = model.addVar(vtype=GRB.CONTINUOUS, name=f\"Pg3_{t}\")\n\n # Define the auxiliary constraint for each time step\n model.addConstr(TotalPowerSupply[t] == Pg1[t] + Pg2[t] + Pg3[t] + PvPower[t] + Wind[t])"
}
],
"variables": {
"Ag1": {
"shape": [
24
],
"type": "continuous",
"definition": "Adjustment factor for generator 1 power at each time step"
},
"Ag2": {
"shape": [
24
],
"type": "continuous",
"definition": "Adjustment factor for generator 2 power at each time step"
},
"Ag3": {
"shape": [
24
],
"type": "continuous",
"definition": "Adjustment factor for generator 3 power at each time step"
},
"PvPower": {
"shape": [
24
],
"type": "continuous",
"definition": "The adjusted photovoltaic power generation at each time step"
},
"Apv": {
"shape": [
24
],
"type": "continuous",
"definition": "The adjustment factor for photovoltaic power at each time step"
},
"ExcessEnergy": {
"shape": [
24
],
"type": "continuous",
"definition": "The excess energy at each time step"
},
"ShortageEnergy": {
"shape": [
24
],
"type": "continuous",
"definition": "The shortage energy at each time step"
},
"Asoc": {
"shape": [
24
],
"type": "continuous",
"definition": "The adjustment factor for the battery state of charge at each time step"
},
"TotalPowerSupply": {
"shape": [
24
],
"type": "continuous",
"definition": "The total power supply from generators and renewable sources at each time step"
},
"GridTransactionLimit": {
"shape": [],
"type": "float",
"definition": "The maximum allowable excess power supply over the residential load to maintain grid stability"
}
}
}

Some files were not shown because too many files have changed in this diff Show More