论文提交
|
@ -0,0 +1,3 @@
|
||||||
|
__pycache__/
|
||||||
|
.idea/
|
||||||
|
gurobi.log
|
After Width: | Height: | Size: 125 KiB |
After Width: | Height: | Size: 146 KiB |
After Width: | Height: | Size: 125 KiB |
After Width: | Height: | Size: 141 KiB |
After Width: | Height: | Size: 451 KiB |
After Width: | Height: | Size: 126 KiB |
After Width: | Height: | Size: 513 KiB |
After Width: | Height: | Size: 144 KiB |
After Width: | Height: | Size: 160 KiB |
After Width: | Height: | Size: 125 KiB |
After Width: | Height: | Size: 125 KiB |
After Width: | Height: | Size: 143 KiB |
After Width: | Height: | Size: 125 KiB |
After Width: | Height: | Size: 146 KiB |
|
@ -0,0 +1,127 @@
|
||||||
|
import pickle
|
||||||
|
import pandas as pd
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from agent import AgentDDPG
|
||||||
|
from environment import ESSEnv
|
||||||
|
from tools import *
|
||||||
|
|
||||||
|
|
||||||
|
def update_buffer(_trajectory):
|
||||||
|
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
|
||||||
|
ary_other = torch.as_tensor([item[1] for item in _trajectory])
|
||||||
|
ary_other[:, 0] = ary_other[:, 0] # ten_reward
|
||||||
|
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
|
||||||
|
|
||||||
|
buffer.extend_buffer(ten_state, ary_other)
|
||||||
|
|
||||||
|
_steps = ten_state.shape[0]
|
||||||
|
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
|
||||||
|
return _steps, _r_exp
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = Arguments()
|
||||||
|
'''record real unbalance'''
|
||||||
|
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
|
||||||
|
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
|
||||||
|
args.visible_gpu = '0'
|
||||||
|
for seed in args.random_seed_list:
|
||||||
|
args.random_seed = seed
|
||||||
|
args.agent = AgentDDPG()
|
||||||
|
agent_name = f'{args.agent.__class__.__name__}'
|
||||||
|
args.agent.cri_target = True
|
||||||
|
args.env = ESSEnv()
|
||||||
|
args.init_before_training(if_main=True)
|
||||||
|
'''init agent and environment'''
|
||||||
|
agent = args.agent
|
||||||
|
env = args.env
|
||||||
|
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
||||||
|
args.if_per_or_gae)
|
||||||
|
'''init replay buffer'''
|
||||||
|
buffer = ReplayBuffer(max_len=args.max_memo, state_dim=env.state_space.shape[0],
|
||||||
|
action_dim=env.action_space.shape[0])
|
||||||
|
'''start training'''
|
||||||
|
cwd = args.cwd
|
||||||
|
gamma = args.gamma
|
||||||
|
batch_size = args.batch_size
|
||||||
|
target_step = args.target_step # how manysteps of one episode should stop
|
||||||
|
repeat_times = args.repeat_times # how many times should update for one batch size data
|
||||||
|
soft_update_tau = args.soft_update_tau
|
||||||
|
num_episode = args.num_episode
|
||||||
|
agent.state = env.reset()
|
||||||
|
'''collect data, train and update network'''
|
||||||
|
# args.train = False
|
||||||
|
# args.save_network = False
|
||||||
|
# args.test_network = False
|
||||||
|
# args.save_test_data = False
|
||||||
|
# args.compare_with_gurobi = False
|
||||||
|
if args.train:
|
||||||
|
collect_data = True
|
||||||
|
while collect_data:
|
||||||
|
print(f'buffer:{buffer.now_len}')
|
||||||
|
with torch.no_grad():
|
||||||
|
trajectory = agent.explore_env(env, target_step)
|
||||||
|
steps, r_exp = update_buffer(trajectory)
|
||||||
|
buffer.update_now_len()
|
||||||
|
if buffer.now_len >= 10000:
|
||||||
|
collect_data = False
|
||||||
|
for i_episode in range(num_episode):
|
||||||
|
critic_loss, actor_loss = agent.update_net(buffer, batch_size, repeat_times, soft_update_tau)
|
||||||
|
loss_record['critic_loss'].append(critic_loss)
|
||||||
|
loss_record['actor_loss'].append(actor_loss)
|
||||||
|
with torch.no_grad():
|
||||||
|
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
|
||||||
|
reward_record['mean_episode_reward'].append(episode_reward)
|
||||||
|
reward_record['unbalance'].append(episode_unbalance)
|
||||||
|
print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}, buffer_length: {buffer.now_len}')
|
||||||
|
if i_episode % 10 == 0: # target_step
|
||||||
|
with torch.no_grad():
|
||||||
|
trajectory = agent.explore_env(env, target_step)
|
||||||
|
steps, r_exp = update_buffer(trajectory)
|
||||||
|
|
||||||
|
loss_record_path = f'{args.cwd}/loss_data.pkl'
|
||||||
|
reward_record_path = f'{args.cwd}/reward_data.pkl'
|
||||||
|
act_save_path = f'{args.cwd}/actor.pth'
|
||||||
|
|
||||||
|
if args.save_network:
|
||||||
|
with open(loss_record_path, 'wb') as tf:
|
||||||
|
pickle.dump(loss_record, tf)
|
||||||
|
with open(reward_record_path, 'wb') as tf:
|
||||||
|
pickle.dump(reward_record, tf)
|
||||||
|
torch.save(agent.act.state_dict(), act_save_path)
|
||||||
|
print('actor parameters have been saved')
|
||||||
|
|
||||||
|
if args.test_network:
|
||||||
|
args.cwd = agent_name
|
||||||
|
agent.act.load_state_dict(torch.load(act_save_path))
|
||||||
|
print('parameters have been reload and test')
|
||||||
|
record = test_one_episode(env, agent.act, agent.device)
|
||||||
|
eval_data = pd.DataFrame(record['system_info'])
|
||||||
|
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery',
|
||||||
|
'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
|
||||||
|
if args.save_test_data:
|
||||||
|
test_data_save_path = f'{args.cwd}/test_data.pkl'
|
||||||
|
with open(test_data_save_path, 'wb') as tf:
|
||||||
|
pickle.dump(record, tf)
|
||||||
|
|
||||||
|
'''compare with gurobi data and results'''
|
||||||
|
if args.compare_with_gurobi:
|
||||||
|
month = record['init_info'][0][0]
|
||||||
|
day = record['init_info'][0][1]
|
||||||
|
initial_soc = record['init_info'][0][3]
|
||||||
|
base_result = optimization_base_result(env, month, day, initial_soc)
|
||||||
|
if args.plot_on:
|
||||||
|
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
|
||||||
|
|
||||||
|
plot_args = PlotArgs()
|
||||||
|
plot_args.feature_change = '10'
|
||||||
|
args.cwd = agent_name
|
||||||
|
plot_dir = make_dir(args.cwd, plot_args.feature_change)
|
||||||
|
plot_optimization_result(base_result, plot_dir)
|
||||||
|
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
|
||||||
|
'''compare the different cost get from gurobi and DDPG'''
|
||||||
|
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
|
||||||
|
print('rl_cost:', sum(eval_data['operation_cost']))
|
||||||
|
print('gurobi_cost:', sum(base_result['step_cost']))
|
||||||
|
print('ration:', ration)
|
|
@ -0,0 +1,374 @@
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
from copy import deepcopy
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
from environment import ESSEnv
|
||||||
|
from tools import get_episode_return, test_one_episode, optimization_base_result
|
||||||
|
|
||||||
|
|
||||||
|
class ActorPPO(nn.Module):
|
||||||
|
def __init__(self, mid_dim, state_dim, action_dim):
|
||||||
|
super().__init__()
|
||||||
|
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
|
||||||
|
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
|
||||||
|
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
||||||
|
nn.Linear(mid_dim, action_dim))
|
||||||
|
# the logarithm (log) of standard deviation (std) of action, it is a trainable parameter
|
||||||
|
self.a_logstd = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True)
|
||||||
|
self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi))
|
||||||
|
|
||||||
|
def forward(self, state):
|
||||||
|
return self.net(state).tanh() # action.tanh() limit the data output of action
|
||||||
|
|
||||||
|
def get_action(self, state):
|
||||||
|
a_avg = self.forward(state) # too big for the action
|
||||||
|
a_std = self.a_logstd.exp()
|
||||||
|
|
||||||
|
noise = torch.randn_like(a_avg)
|
||||||
|
action = a_avg + noise * a_std
|
||||||
|
return action, noise
|
||||||
|
|
||||||
|
def get_logprob_entropy(self, state, action):
|
||||||
|
a_avg = self.forward(state)
|
||||||
|
a_std = self.a_logstd.exp()
|
||||||
|
|
||||||
|
delta = ((a_avg - action) / a_std).pow(2) * 0.5
|
||||||
|
logprob = -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1) # new_logprob
|
||||||
|
|
||||||
|
dist_entropy = (logprob.exp() * logprob).mean() # policy entropy
|
||||||
|
return logprob, dist_entropy
|
||||||
|
|
||||||
|
def get_old_logprob(self, _action, noise): # noise = action - a_noise
|
||||||
|
delta = noise.pow(2) * 0.5
|
||||||
|
return -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1) # old_logprob
|
||||||
|
|
||||||
|
|
||||||
|
class CriticAdv(nn.Module):
|
||||||
|
def __init__(self, mid_dim, state_dim, _action_dim):
|
||||||
|
super().__init__()
|
||||||
|
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
|
||||||
|
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
|
||||||
|
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
||||||
|
nn.Linear(mid_dim, 1))
|
||||||
|
|
||||||
|
def forward(self, state):
|
||||||
|
return self.net(state) # Advantage value
|
||||||
|
|
||||||
|
|
||||||
|
def smooth_rewards(rewards, window=10):
|
||||||
|
rewards = rewards.unsqueeze(0).unsqueeze(0) # 将 rewards 转为 [1, 1, len] 的形状以适应 conv1d
|
||||||
|
kernel = torch.ones(1, 1, window, device=rewards.device) / window # 创建一个均匀的滑动平均核
|
||||||
|
smoothed_rewards = F.conv1d(rewards, kernel, padding='valid') # 滑动平均
|
||||||
|
smoothed_rewards = smoothed_rewards.squeeze(0).squeeze(0) # 去掉多余的维度
|
||||||
|
|
||||||
|
# 保持与原始奖励序列相同的长度,将前 window-1 个奖励保持不变
|
||||||
|
return torch.cat((rewards[0, 0, :window-1], smoothed_rewards))
|
||||||
|
|
||||||
|
|
||||||
|
class AgentPPO:
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.state = None
|
||||||
|
self.device = None
|
||||||
|
self.action_dim = None
|
||||||
|
self.get_obj_critic = None
|
||||||
|
|
||||||
|
self.criterion = torch.nn.SmoothL1Loss()
|
||||||
|
self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
|
||||||
|
self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
|
||||||
|
|
||||||
|
'''init modify'''
|
||||||
|
self.ClassCri = CriticAdv
|
||||||
|
self.ClassAct = ActorPPO
|
||||||
|
|
||||||
|
self.ratio_clip = 0.2 # ratio.clamp(1 - clip, 1 + clip)
|
||||||
|
self.lambda_entropy = 0.02 # could be 0.01~0.05
|
||||||
|
self.lambda_gae_adv = 0.98 # could be 0.95~0.99, (Generalized Advantage Estimation. ICLR.2016.)
|
||||||
|
self.get_reward_sum = None # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
|
||||||
|
self.trajectory_list = None
|
||||||
|
|
||||||
|
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=True, gpu_id=0):
|
||||||
|
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
|
||||||
|
self.trajectory_list = list()
|
||||||
|
# choose whether to use gae or not
|
||||||
|
self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
|
||||||
|
|
||||||
|
self.cri = self.ClassCri(net_dim, state_dim, action_dim).to(self.device)
|
||||||
|
self.act = self.ClassAct(net_dim, state_dim, action_dim).to(self.device) if self.ClassAct else self.cri
|
||||||
|
self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
|
||||||
|
self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
|
||||||
|
|
||||||
|
self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
|
||||||
|
self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
|
||||||
|
|
||||||
|
def select_action(self, state):
|
||||||
|
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
|
||||||
|
actions, noises = self.act.get_action(states)
|
||||||
|
return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
|
||||||
|
|
||||||
|
def explore_env(self, env, target_step):
|
||||||
|
state = self.state # sent state to agent and then agent sent state to method
|
||||||
|
trajectory_temp = list()
|
||||||
|
last_done = 0
|
||||||
|
for i in range(target_step):
|
||||||
|
action, noise = self.select_action(state)
|
||||||
|
state, next_state, reward, done, = env.step(np.tanh(action))
|
||||||
|
trajectory_temp.append((state, reward, done, action, noise))
|
||||||
|
if done:
|
||||||
|
state = env.reset()
|
||||||
|
last_done = i
|
||||||
|
else:
|
||||||
|
state = next_state
|
||||||
|
self.state = state
|
||||||
|
|
||||||
|
'''splice list'''
|
||||||
|
# store 0 trajectory information to list
|
||||||
|
trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1]
|
||||||
|
self.trajectory_list = trajectory_temp[last_done:]
|
||||||
|
return trajectory_list
|
||||||
|
|
||||||
|
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
|
||||||
|
"""put data extract and update network together"""
|
||||||
|
with torch.no_grad():
|
||||||
|
buf_len = buffer[0].shape[0]
|
||||||
|
# decompose buffer data
|
||||||
|
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
|
||||||
|
|
||||||
|
'''get buf_r_sum, buf_logprob'''
|
||||||
|
bs = 4096
|
||||||
|
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
|
||||||
|
buf_value = torch.cat(buf_value, dim=0)
|
||||||
|
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
|
||||||
|
|
||||||
|
buf_r_sum, buf_advantage = self.get_reward_sum(buf_len, buf_reward, buf_mask, buf_value) # detach()
|
||||||
|
# normalize advantage
|
||||||
|
buf_advantage = (buf_advantage - buf_advantage.mean()) / (buf_advantage.std() + 1e-5)
|
||||||
|
buf_advantage = smooth_rewards(buf_advantage, window=10)
|
||||||
|
del buf_noise, buffer[:]
|
||||||
|
|
||||||
|
'''PPO: Surrogate objective of Trust Region'''
|
||||||
|
obj_critic = obj_actor = None
|
||||||
|
for _ in range(int(buf_len / batch_size * repeat_times)):
|
||||||
|
indices = torch.randint(buf_len, size=(batch_size,), requires_grad=False, device=self.device)
|
||||||
|
|
||||||
|
state = buf_state[indices]
|
||||||
|
action = buf_action[indices]
|
||||||
|
r_sum = buf_r_sum[indices]
|
||||||
|
logprob = buf_logprob[indices]
|
||||||
|
advantage = buf_advantage[indices]
|
||||||
|
|
||||||
|
new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action) # it is obj_actor
|
||||||
|
ratio = (new_logprob - logprob.detach()).exp()
|
||||||
|
surrogate1 = advantage * ratio
|
||||||
|
surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
|
||||||
|
obj_surrogate = -torch.min(surrogate1, surrogate2).mean()
|
||||||
|
obj_actor = obj_surrogate + obj_entropy * self.lambda_entropy
|
||||||
|
self.optim_update(self.act_optim, obj_actor) # update actor
|
||||||
|
|
||||||
|
value = self.cri(state).squeeze(1) # critic network predicts the reward_sum (Q value) of state
|
||||||
|
# use smoothloss L1 to evaluate the value loss
|
||||||
|
# obj_critic = self.criterion(value, r_sum) / (r_sum.std() + 1e-6)
|
||||||
|
obj_critic = self.criterion(value, r_sum)
|
||||||
|
self.optim_update(self.cri_optim, obj_critic) # calculate and update the back propogation of value loss
|
||||||
|
# choose whether to use soft update
|
||||||
|
self.soft_update(self.cri_target, self.cri, soft_update_tau) if self.cri_target is not self.cri else None
|
||||||
|
|
||||||
|
a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1))
|
||||||
|
return obj_critic.item(), obj_actor.item(), a_std_log.mean().item() # logging_tuple
|
||||||
|
|
||||||
|
def get_reward_sum_raw(self, buf_len, buf_reward, buf_mask, buf_value) -> (torch.Tensor, torch.Tensor):
|
||||||
|
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # reward sum
|
||||||
|
|
||||||
|
pre_r_sum = 0
|
||||||
|
for i in range(buf_len - 1, -1, -1):
|
||||||
|
buf_r_sum[i] = buf_reward[i] + buf_mask[i] * pre_r_sum
|
||||||
|
pre_r_sum = buf_r_sum[i]
|
||||||
|
buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
|
||||||
|
return buf_r_sum, buf_advantage
|
||||||
|
|
||||||
|
def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value) -> (torch.Tensor, torch.Tensor):
|
||||||
|
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # old policy value
|
||||||
|
buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device) # advantage value
|
||||||
|
|
||||||
|
pre_r_sum = 0.0
|
||||||
|
pre_advantage = 0.0 # advantage value of previous step
|
||||||
|
for i in range(buf_len - 1, -1, -1):
|
||||||
|
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
|
||||||
|
pre_r_sum = buf_r_sum[i]
|
||||||
|
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i])
|
||||||
|
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
|
||||||
|
return buf_r_sum, buf_advantage
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def optim_update(optimizer, objective):
|
||||||
|
optimizer.zero_grad()
|
||||||
|
objective.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def soft_update(target_net, current_net, tau):
|
||||||
|
for tar, cur in zip(target_net.parameters(), current_net.parameters()):
|
||||||
|
tar.data.copy_(cur.data.__mul__(tau) + tar.data.__mul__(1.0 - tau))
|
||||||
|
|
||||||
|
|
||||||
|
class Arguments:
|
||||||
|
def __init__(self, agent=None, env=None):
|
||||||
|
self.agent = agent # Deep Reinforcement Learning algorithm
|
||||||
|
self.env = env # the environment for training
|
||||||
|
self.cwd = None # current work directory. None means set automatically
|
||||||
|
self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
|
||||||
|
self.visible_gpu = '0' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
|
||||||
|
self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
|
||||||
|
|
||||||
|
'''Arguments for training'''
|
||||||
|
self.num_episode = 1000 # to control the train episodes for PPO
|
||||||
|
self.gamma = 0.995 # discount factor of future rewards
|
||||||
|
self.learning_rate = 1e-4 # 1e-4 2 ** -14 2e-4
|
||||||
|
self.soft_update_tau = 2 ** -8 # 1e-3 2 ** -8
|
||||||
|
|
||||||
|
self.net_dim = 256 # the network width
|
||||||
|
self.batch_size = 4096 # num of transitions sampled from replay buffer.
|
||||||
|
self.repeat_times = 2 ** 5 # collect target_step, then update network
|
||||||
|
self.target_step = 4096 # repeatedly update network to keep critic's loss small
|
||||||
|
self.max_memo = self.target_step # capacity of replay buffer
|
||||||
|
self.if_gae_or_raw = True # GAE for on-policy sparse reward: Generalized Advantage Estimation.
|
||||||
|
|
||||||
|
'''Arguments for evaluate'''
|
||||||
|
self.random_seed = 1234
|
||||||
|
# self.random_seed_list = [1234, 2234, 3234, 4234, 5234]
|
||||||
|
self.random_seed_list = [1567]
|
||||||
|
self.train = True
|
||||||
|
self.save_network = True
|
||||||
|
self.test_network = True
|
||||||
|
self.save_test_data = True
|
||||||
|
self.compare_with_gurobi = True
|
||||||
|
self.plot_on = True
|
||||||
|
|
||||||
|
def init_before_training(self):
|
||||||
|
if self.cwd is None:
|
||||||
|
agent_name = self.agent.__class__.__name__
|
||||||
|
self.cwd = f'./{agent_name}'
|
||||||
|
|
||||||
|
np.random.seed(self.random_seed)
|
||||||
|
torch.manual_seed(self.random_seed)
|
||||||
|
torch.set_num_threads(self.num_threads)
|
||||||
|
torch.set_default_dtype(torch.float32)
|
||||||
|
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
|
||||||
|
|
||||||
|
|
||||||
|
def update_buffer(_trajectory):
|
||||||
|
_trajectory = list(map(list, zip(*_trajectory))) # 2D-list transpose, here cut the trajectory into 5 parts
|
||||||
|
ten_state = torch.as_tensor(_trajectory[0]) # tensor state here
|
||||||
|
ten_reward = torch.as_tensor(_trajectory[1], dtype=torch.float32)
|
||||||
|
# _trajectory[2] = done, 将 done 替换为掩码,节省内存
|
||||||
|
ten_mask = (1.0 - torch.as_tensor(_trajectory[2], dtype=torch.float32)) * gamma
|
||||||
|
ten_action = torch.as_tensor(_trajectory[3])
|
||||||
|
ten_noise = torch.as_tensor(_trajectory[4], dtype=torch.float32)
|
||||||
|
|
||||||
|
buffer[:] = (ten_state, ten_action, ten_noise, ten_reward, ten_mask) # list store tensors
|
||||||
|
|
||||||
|
_steps = ten_reward.shape[0] # how many steps are collected in all trajectories
|
||||||
|
_r_exp = ten_reward.mean() # the mean reward
|
||||||
|
return _steps, _r_exp
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = Arguments()
|
||||||
|
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
|
||||||
|
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
|
||||||
|
args.visible_gpu = '0'
|
||||||
|
for seed in args.random_seed_list:
|
||||||
|
args.random_seed = seed
|
||||||
|
args.agent = AgentPPO()
|
||||||
|
agent_name = f'{args.agent.__class__.__name__}'
|
||||||
|
args.agent.cri_target = True
|
||||||
|
args.env = ESSEnv()
|
||||||
|
args.init_before_training()
|
||||||
|
'''init agent and environment'''
|
||||||
|
agent = args.agent
|
||||||
|
env = args.env
|
||||||
|
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
||||||
|
args.if_gae_or_raw)
|
||||||
|
cwd = args.cwd
|
||||||
|
gamma = args.gamma
|
||||||
|
batch_size = args.batch_size # how much data should be used to update net
|
||||||
|
target_step = args.target_step # how manysteps of one episode should stop
|
||||||
|
repeat_times = args.repeat_times # how many times should update for one batch size data
|
||||||
|
soft_update_tau = args.soft_update_tau
|
||||||
|
num_episode = args.num_episode
|
||||||
|
agent.state = env.reset()
|
||||||
|
'''init buffer'''
|
||||||
|
buffer = list()
|
||||||
|
'''init training parameters'''
|
||||||
|
args.train = False
|
||||||
|
args.save_network = False
|
||||||
|
# args.test_network = False
|
||||||
|
# args.save_test_data = False
|
||||||
|
# args.compare_with_gurobi = False
|
||||||
|
# args.plot_on = False
|
||||||
|
if args.train:
|
||||||
|
for i_episode in range(num_episode):
|
||||||
|
with torch.no_grad():
|
||||||
|
trajectory_list = agent.explore_env(env, target_step)
|
||||||
|
steps, r_exp = update_buffer(trajectory_list)
|
||||||
|
critic_loss, actor_loss, entropy_loss = agent.update_net(buffer, batch_size, repeat_times,
|
||||||
|
soft_update_tau)
|
||||||
|
loss_record['critic_loss'].append(critic_loss)
|
||||||
|
loss_record['actor_loss'].append(actor_loss)
|
||||||
|
loss_record['entropy_loss'].append(entropy_loss)
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
|
||||||
|
reward_record['mean_episode_reward'].append(episode_reward)
|
||||||
|
reward_record['unbalance'].append(episode_unbalance)
|
||||||
|
print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}')
|
||||||
|
|
||||||
|
act_save_path = f'{args.cwd}/actor_10.pth'
|
||||||
|
loss_record_path = f'{args.cwd}/loss_10.pkl'
|
||||||
|
reward_record_path = f'{args.cwd}/reward_10.pkl'
|
||||||
|
|
||||||
|
if args.save_network:
|
||||||
|
with open(loss_record_path, 'wb') as tf:
|
||||||
|
pickle.dump(loss_record, tf)
|
||||||
|
with open(reward_record_path, 'wb') as tf:
|
||||||
|
pickle.dump(reward_record, tf)
|
||||||
|
torch.save(agent.act.state_dict(), act_save_path)
|
||||||
|
print('actor parameters have been saved')
|
||||||
|
|
||||||
|
if args.test_network:
|
||||||
|
args.cwd = agent_name
|
||||||
|
agent.act.load_state_dict(torch.load(act_save_path))
|
||||||
|
print('parameters have been reload and test')
|
||||||
|
record = test_one_episode(env, agent.act, agent.device)
|
||||||
|
eval_data = pd.DataFrame(record['system_info'])
|
||||||
|
eval_data.columns = ['time_step', 'price', 'load', 'action', 'real_action', 'soc', 'battery',
|
||||||
|
'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
|
||||||
|
if args.save_test_data:
|
||||||
|
test_data_save_path = f'{args.cwd}/test_10.pkl'
|
||||||
|
with open(test_data_save_path, 'wb') as tf:
|
||||||
|
pickle.dump(record, tf)
|
||||||
|
|
||||||
|
'''compare with gurobi data and results'''
|
||||||
|
if args.compare_with_gurobi:
|
||||||
|
month = record['init_info'][0][0]
|
||||||
|
day = record['init_info'][0][1]
|
||||||
|
initial_soc = record['init_info'][0][3]
|
||||||
|
base_result = optimization_base_result(env, month, day, initial_soc)
|
||||||
|
if args.plot_on:
|
||||||
|
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
|
||||||
|
|
||||||
|
plot_args = PlotArgs()
|
||||||
|
plot_args.feature_change = '10'
|
||||||
|
args.cwd = agent_name
|
||||||
|
plot_dir = make_dir(args.cwd, plot_args.feature_change)
|
||||||
|
plot_optimization_result(base_result, plot_dir)
|
||||||
|
plot_evaluation_information(args.cwd + '/' + 'test_10.pkl', plot_dir)
|
||||||
|
'''compare the different cost get from gurobi and PPO'''
|
||||||
|
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
|
||||||
|
print('rl_cost:', sum(eval_data['operation_cost']))
|
||||||
|
print('gurobi_cost:', sum(base_result['step_cost']))
|
||||||
|
print('ration:', ration)
|
|
@ -0,0 +1,472 @@
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
from copy import deepcopy
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
from data_manager import *
|
||||||
|
from environment import ESSEnv
|
||||||
|
from tools import optimization_base_result
|
||||||
|
|
||||||
|
|
||||||
|
def load_llm_actions(file_path):
|
||||||
|
with open(file_path, 'r') as file:
|
||||||
|
data = json.load(file)
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
class ActorPPO(nn.Module):
|
||||||
|
def __init__(self, mid_dim, state_dim, action_dim):
|
||||||
|
super().__init__()
|
||||||
|
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
|
||||||
|
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
|
||||||
|
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
||||||
|
nn.Linear(mid_dim, action_dim))
|
||||||
|
# the logarithm (log) of standard deviation (std) of action, it is a trainable parameter
|
||||||
|
self.a_logstd = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True)
|
||||||
|
self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi))
|
||||||
|
|
||||||
|
def forward(self, state):
|
||||||
|
return self.net(state).tanh() # action.tanh() limit the data output of action
|
||||||
|
|
||||||
|
def get_action(self, state):
|
||||||
|
a_avg = self.forward(state) # too big for the action
|
||||||
|
a_std = self.a_logstd.exp()
|
||||||
|
|
||||||
|
noise = torch.randn_like(a_avg)
|
||||||
|
action = a_avg + noise * a_std
|
||||||
|
return action, noise
|
||||||
|
|
||||||
|
def get_logprob_entropy(self, state, action):
|
||||||
|
a_avg = self.forward(state)
|
||||||
|
a_std = self.a_logstd.exp()
|
||||||
|
|
||||||
|
delta = ((a_avg - action) / a_std).pow(2) * 0.5
|
||||||
|
logprob = -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1) # new_logprob
|
||||||
|
|
||||||
|
dist_entropy = (logprob.exp() * logprob).mean() # policy entropy
|
||||||
|
return logprob, dist_entropy
|
||||||
|
|
||||||
|
def get_old_logprob(self, _action, noise): # noise = action - a_noise
|
||||||
|
delta = noise.pow(2) * 0.5
|
||||||
|
return -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1) # old_logprob
|
||||||
|
|
||||||
|
|
||||||
|
class CriticAdv(nn.Module):
|
||||||
|
def __init__(self, mid_dim, state_dim, _action_dim):
|
||||||
|
super().__init__()
|
||||||
|
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
|
||||||
|
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
|
||||||
|
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
||||||
|
nn.Linear(mid_dim, 1)
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, state):
|
||||||
|
return self.net(state) # Advantage value
|
||||||
|
|
||||||
|
|
||||||
|
class AgentPPO:
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.state = None
|
||||||
|
self.device = None
|
||||||
|
self.action_dim = None
|
||||||
|
self.get_obj_critic = None
|
||||||
|
self.current_index = 0
|
||||||
|
|
||||||
|
self.criterion = torch.nn.SmoothL1Loss()
|
||||||
|
self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
|
||||||
|
self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
|
||||||
|
|
||||||
|
self.ClassCri = CriticAdv
|
||||||
|
self.ClassAct = ActorPPO
|
||||||
|
|
||||||
|
self.ratio_clip = 0.2 # ratio.clamp(1 - clip, 1 + clip)
|
||||||
|
self.lambda_entropy = 0.02 # could be 0.01~0.05
|
||||||
|
self.lambda_gae_adv = 0.98 # could be 0.95~0.99, GAE (Generalized Advantage Estimation. ICLR.2016.)
|
||||||
|
self.get_reward_sum = None # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
|
||||||
|
self.trajectory_list = None
|
||||||
|
self.llm_actions = load_llm_actions('data/results.json')
|
||||||
|
|
||||||
|
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=False, gpu_id=0):
|
||||||
|
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
|
||||||
|
self.trajectory_list = list()
|
||||||
|
# choose whether to use gae or not
|
||||||
|
self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
|
||||||
|
|
||||||
|
self.cri = self.ClassCri(net_dim, state_dim, action_dim).to(self.device)
|
||||||
|
self.act = self.ClassAct(net_dim, state_dim, action_dim).to(self.device) if self.ClassAct else self.cri
|
||||||
|
self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
|
||||||
|
self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
|
||||||
|
|
||||||
|
self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
|
||||||
|
self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
|
||||||
|
|
||||||
|
def select_action(self, state):
|
||||||
|
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
|
||||||
|
actions, noises = self.act.get_action(states)
|
||||||
|
return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
|
||||||
|
|
||||||
|
def explore_env(self, env, target_step, index):
|
||||||
|
state = self.state # sent state to agent and then agent sent state to method
|
||||||
|
trajectory_temp = list()
|
||||||
|
last_done = 0
|
||||||
|
for i in range(target_step):
|
||||||
|
action, noise = self.select_action(state)
|
||||||
|
llm_action = self.llm_actions[index]
|
||||||
|
action = [0.95 * action[i] + 0.05 * llm_action[i] for i in range(5)]
|
||||||
|
state, next_state, reward, done, = env.step(np.tanh(action))
|
||||||
|
trajectory_temp.append((state, reward, done, action, noise))
|
||||||
|
if done:
|
||||||
|
state = env.reset()
|
||||||
|
last_done = i
|
||||||
|
else:
|
||||||
|
state = next_state
|
||||||
|
self.state = state
|
||||||
|
|
||||||
|
'''splice list'''
|
||||||
|
# store 0 trajectory information to list
|
||||||
|
trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1]
|
||||||
|
self.trajectory_list = trajectory_temp[last_done:]
|
||||||
|
return trajectory_list
|
||||||
|
|
||||||
|
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
|
||||||
|
"""put data extract and update network together"""
|
||||||
|
with torch.no_grad():
|
||||||
|
buf_len = buffer[0].shape[0]
|
||||||
|
# decompose buffer data
|
||||||
|
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
|
||||||
|
|
||||||
|
'''get buf_r_sum, buf_logprob'''
|
||||||
|
bs = 4096
|
||||||
|
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
|
||||||
|
buf_value = torch.cat(buf_value, dim=0)
|
||||||
|
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
|
||||||
|
|
||||||
|
buf_r_sum, buf_advantage = self.get_reward_sum(buf_len, buf_reward, buf_mask, buf_value) # detach()
|
||||||
|
# normalize advantage
|
||||||
|
buf_advantage = (buf_advantage - buf_advantage.mean()) / (buf_advantage.std() + 1e-5)
|
||||||
|
del buf_noise, buffer[:]
|
||||||
|
|
||||||
|
'''PPO: Surrogate objective of Trust Region'''
|
||||||
|
obj_critic = obj_actor = None
|
||||||
|
for _ in range(int(buf_len / batch_size * repeat_times)):
|
||||||
|
indices = torch.randint(buf_len, size=(batch_size,), requires_grad=False, device=self.device)
|
||||||
|
|
||||||
|
state = buf_state[indices]
|
||||||
|
action = buf_action[indices]
|
||||||
|
r_sum = buf_r_sum[indices]
|
||||||
|
logprob = buf_logprob[indices]
|
||||||
|
advantage = buf_advantage[indices]
|
||||||
|
|
||||||
|
new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action) # it's obj_actor
|
||||||
|
ratio = (new_logprob - logprob.detach()).exp()
|
||||||
|
surrogate1 = advantage * ratio
|
||||||
|
surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
|
||||||
|
obj_surrogate = -torch.min(surrogate1, surrogate2).mean()
|
||||||
|
obj_actor = obj_surrogate + obj_entropy * self.lambda_entropy
|
||||||
|
self.optim_update(self.act_optim, obj_actor) # update actor
|
||||||
|
|
||||||
|
value = self.cri(state).squeeze(1) # critic network predicts the reward_sum (Q value) of state
|
||||||
|
# use smoothloss L1 to evaluate the value loss
|
||||||
|
# obj_critic = self.criterion(value, r_sum) / (r_sum.std() + 1e-6)
|
||||||
|
obj_critic = self.criterion(value, r_sum)
|
||||||
|
self.optim_update(self.cri_optim, obj_critic) # calculate and update the back propogation of value loss
|
||||||
|
# choose whether to use soft update
|
||||||
|
self.soft_update(self.cri_target, self.cri, soft_update_tau) if self.cri_target is not self.cri else None
|
||||||
|
|
||||||
|
a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1))
|
||||||
|
return obj_critic.item(), obj_actor.item(), a_std_log.mean().item() # logging_tuple
|
||||||
|
|
||||||
|
def get_reward_sum_raw(self, buf_len, buf_reward, buf_mask, buf_value) -> (torch.Tensor, torch.Tensor):
|
||||||
|
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # reward sum
|
||||||
|
|
||||||
|
pre_r_sum = 0
|
||||||
|
for i in range(buf_len - 1, -1, -1):
|
||||||
|
buf_r_sum[i] = buf_reward[i] + buf_mask[i] * pre_r_sum
|
||||||
|
pre_r_sum = buf_r_sum[i]
|
||||||
|
buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
|
||||||
|
return buf_r_sum, buf_advantage
|
||||||
|
|
||||||
|
def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value) -> (torch.Tensor, torch.Tensor):
|
||||||
|
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # old policy value
|
||||||
|
buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device) # advantage value
|
||||||
|
|
||||||
|
pre_r_sum = 0.0
|
||||||
|
pre_advantage = 0.0 # advantage value of previous step
|
||||||
|
for i in range(buf_len - 1, -1, -1):
|
||||||
|
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
|
||||||
|
pre_r_sum = buf_r_sum[i]
|
||||||
|
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i]) # fix a bug here
|
||||||
|
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
|
||||||
|
return buf_r_sum, buf_advantage
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def optim_update(optimizer, objective):
|
||||||
|
optimizer.zero_grad()
|
||||||
|
objective.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def soft_update(target_net, current_net, tau):
|
||||||
|
for tar, cur in zip(target_net.parameters(), current_net.parameters()):
|
||||||
|
tar.data.copy_(cur.data.__mul__(tau) + tar.data.__mul__(1.0 - tau))
|
||||||
|
|
||||||
|
|
||||||
|
class Arguments:
|
||||||
|
def __init__(self, agent=None, env=None):
|
||||||
|
self.agent = agent # Deep Reinforcement Learning algorithm
|
||||||
|
self.env = env # the environment for training
|
||||||
|
self.cwd = None # current work directory. None means set automatically
|
||||||
|
self.visible_gpu = '0' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
|
||||||
|
self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
|
||||||
|
|
||||||
|
'''Arguments for training'''
|
||||||
|
self.num_episode = 1000 # to control the train episodes for PPO
|
||||||
|
self.gamma = 0.995 # discount factor of future rewards
|
||||||
|
self.learning_rate = 1e-4 # 2e-4 / 6e-5 / 2 ** -4
|
||||||
|
self.soft_update_tau = 2 ** -8 # 5e-3 / 4e-4 / 2 ** -8 1e-3
|
||||||
|
|
||||||
|
self.net_dim = 256 # the network width
|
||||||
|
self.batch_size = 4096 # num of transitions sampled from replay buffer.
|
||||||
|
self.repeat_times = 2 ** 5 # collect target_step, then update network
|
||||||
|
self.target_step = 4096 # repeatedly update network to keep critic's loss small
|
||||||
|
self.max_memo = self.target_step # capacity of replay buffer
|
||||||
|
self.if_gae_or_raw = True # GAE for on-policy sparse reward: Generalized Advantage Estimation.
|
||||||
|
|
||||||
|
'''Arguments for evaluate'''
|
||||||
|
self.random_seed = 1234 # initialize random seed in self.init_before_training()
|
||||||
|
# self.random_seed_list = [1234, 2234, 3234]
|
||||||
|
self.random_seed_list = [3234]
|
||||||
|
self.train = True
|
||||||
|
self.save_network = True
|
||||||
|
self.test_network = True
|
||||||
|
self.save_test_data = True
|
||||||
|
self.compare_with_gurobi = True
|
||||||
|
self.plot_on = True
|
||||||
|
|
||||||
|
def init_before_training(self):
|
||||||
|
if self.cwd is None:
|
||||||
|
agent_name = self.agent.__class__.__name__
|
||||||
|
self.cwd = f'./{agent_name}'
|
||||||
|
|
||||||
|
np.random.seed(self.random_seed)
|
||||||
|
torch.manual_seed(self.random_seed)
|
||||||
|
torch.set_num_threads(self.num_threads)
|
||||||
|
torch.set_default_dtype(torch.float32)
|
||||||
|
|
||||||
|
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
|
||||||
|
|
||||||
|
|
||||||
|
def update_buffer(_trajectory):
|
||||||
|
_trajectory = list(map(list, zip(*_trajectory))) # 2D-list transpose, here cut the trajectory into 5 parts
|
||||||
|
ten_state = torch.as_tensor(_trajectory[0], dtype=torch.float32) # tensor state here
|
||||||
|
ten_reward = torch.as_tensor(_trajectory[1], dtype=torch.float32)
|
||||||
|
# _trajectory[2] = done, replace done by mask, save memory
|
||||||
|
ten_mask = (1.0 - torch.as_tensor(_trajectory[2], dtype=torch.float32)) * gamma
|
||||||
|
ten_action = torch.as_tensor(_trajectory[3], dtype=torch.float32)
|
||||||
|
ten_noise = torch.as_tensor(_trajectory[4], dtype=torch.float32)
|
||||||
|
|
||||||
|
buffer[:] = (ten_state, ten_action, ten_noise, ten_reward, ten_mask) # list store tensors
|
||||||
|
|
||||||
|
_steps = ten_reward.shape[0] # how many steps are collected in all trajectories
|
||||||
|
_r_exp = ten_reward.mean() # the mean reward
|
||||||
|
return _steps, _r_exp
|
||||||
|
|
||||||
|
|
||||||
|
def test_one_episode(env, act, device, index):
|
||||||
|
"""to get evaluate information, here record the unbalance of after taking action"""
|
||||||
|
record_state = []
|
||||||
|
record_action = []
|
||||||
|
record_reward = []
|
||||||
|
record_unbalance = []
|
||||||
|
record_system_info = [] # [time, price, netload, action, real action, soc, output*4, unbalance, cost]
|
||||||
|
record_init_info = [] # include month,day,time,intial soc
|
||||||
|
env.TRAIN = False
|
||||||
|
state = env.reset()
|
||||||
|
record_init_info.append([env.month, env.day, env.current_time, env.battery.current_capacity])
|
||||||
|
print(f'current testing month is {env.month}, day is {env.day},initial_soc is {env.battery.current_capacity}')
|
||||||
|
llm_actions = load_llm_actions('data/results.json')
|
||||||
|
for i in range(24):
|
||||||
|
s_tensor = torch.as_tensor((state,), device=device)
|
||||||
|
a_tensor = act(s_tensor)
|
||||||
|
rl_action = a_tensor.detach().cpu().numpy()[0]
|
||||||
|
llm_action = llm_actions[index]
|
||||||
|
action = [0.95 * rl_action[i] + 0.05 * llm_action[i] for i in range(5)]
|
||||||
|
real_action = action
|
||||||
|
state, next_state, reward, done = env.step(action)
|
||||||
|
|
||||||
|
record_system_info.append([state[0], state[1], state[3] + env.wind.current_power, action, real_action,
|
||||||
|
env.battery.SOC(), env.battery.energy_change, next_state[4], next_state[5],
|
||||||
|
next_state[6], env.solar.current_power, env.wind.current_power, env.unbalance,
|
||||||
|
env.operation_cost, reward])
|
||||||
|
record_state.append(state)
|
||||||
|
record_action.append(real_action)
|
||||||
|
record_reward.append(reward)
|
||||||
|
record_unbalance.append(env.unbalance)
|
||||||
|
state = next_state
|
||||||
|
# add information of last step dg1, dh2, dg3, soc, tem, irr
|
||||||
|
record_system_info[-1][7:12] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2],
|
||||||
|
env.final_step_outputs[4], env.final_step_outputs[5]]
|
||||||
|
record_system_info[-1][5] = env.final_step_outputs[3]
|
||||||
|
record = {'init_info': record_init_info, 'system_info': record_system_info, 'state': record_state,
|
||||||
|
'action': record_action, 'reward': record_reward, 'unbalance': record_unbalance}
|
||||||
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm(env, index):
|
||||||
|
record_system_info = []
|
||||||
|
record_init_info = []
|
||||||
|
env.TRAIN = False
|
||||||
|
record_init_info.append([env.month, env.day, env.current_time, env.battery.current_capacity])
|
||||||
|
llm_actions = load_llm_actions('data/results.json')
|
||||||
|
|
||||||
|
cumulative_reward = 0
|
||||||
|
for i in range(24):
|
||||||
|
action = llm_actions[index + i]
|
||||||
|
real_action = action
|
||||||
|
state, next_state, reward, done = env.step(action)
|
||||||
|
record_system_info.append([state[0], state[1], state[3] + env.wind.current_power, action, real_action,
|
||||||
|
env.battery.SOC(), env.battery.energy_change, next_state[4], next_state[5],
|
||||||
|
next_state[6], env.solar.current_power, env.wind.current_power, env.unbalance,
|
||||||
|
env.operation_cost, reward])
|
||||||
|
cumulative_reward += reward
|
||||||
|
if done:
|
||||||
|
break
|
||||||
|
|
||||||
|
record_system_info[-1][7:12] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2],
|
||||||
|
env.final_step_outputs[4], env.final_step_outputs[5]]
|
||||||
|
record_system_info[-1][5] = env.final_step_outputs[3]
|
||||||
|
record = {'system_info': record_system_info, 'cumulative_reward': cumulative_reward}
|
||||||
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
def get_episode_return(env, act, device, index):
|
||||||
|
episode_reward = 0.0
|
||||||
|
episode_unbalance = 0.0
|
||||||
|
state = env.reset()
|
||||||
|
llm_actions = load_llm_actions('data/results.json')
|
||||||
|
for i in range(24):
|
||||||
|
s_tensor = torch.as_tensor((state,), device=device)
|
||||||
|
a_tensor = act(s_tensor)
|
||||||
|
rl_action = a_tensor.detach().cpu().numpy()[0]
|
||||||
|
llm_action = llm_actions[index]
|
||||||
|
action = [0.95 * rl_action[i] + 0.05 * llm_action[i] for i in range(5)]
|
||||||
|
state, next_state, reward, done, = env.step(action)
|
||||||
|
state = next_state
|
||||||
|
episode_reward += reward
|
||||||
|
episode_unbalance += env.real_unbalance
|
||||||
|
if done:
|
||||||
|
break
|
||||||
|
return episode_reward, episode_unbalance
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = Arguments()
|
||||||
|
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
|
||||||
|
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
|
||||||
|
args.visible_gpu = '0,1'
|
||||||
|
for seed in args.random_seed_list:
|
||||||
|
args.random_seed = seed
|
||||||
|
args.agent = AgentPPO()
|
||||||
|
args.agent.cri_target = True
|
||||||
|
agent_name = f'{args.agent.__class__.__name__}'
|
||||||
|
|
||||||
|
args.env = ESSEnv()
|
||||||
|
args.init_before_training()
|
||||||
|
'''init agent and environment'''
|
||||||
|
agent = args.agent
|
||||||
|
env = args.env
|
||||||
|
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
||||||
|
args.if_gae_or_raw)
|
||||||
|
cwd = args.cwd
|
||||||
|
gamma = args.gamma
|
||||||
|
batch_size = args.batch_size # how much data should be used to update net
|
||||||
|
target_step = args.target_step # how many steps of one episode should stop
|
||||||
|
repeat_times = args.repeat_times # how many times should update for one batch size data
|
||||||
|
soft_update_tau = args.soft_update_tau
|
||||||
|
agent.state = env.reset()
|
||||||
|
'''init buffer'''
|
||||||
|
buffer = list()
|
||||||
|
'''init training parameters'''
|
||||||
|
num_episode = args.num_episode
|
||||||
|
args.train = False
|
||||||
|
args.save_network = False
|
||||||
|
# args.test_network = False
|
||||||
|
# args.save_test_data = False
|
||||||
|
# args.compare_with_gurobi = False
|
||||||
|
# args.plot_on = False
|
||||||
|
if args.train:
|
||||||
|
for i_episode in range(num_episode):
|
||||||
|
with torch.no_grad():
|
||||||
|
index = (sum(Constant.MONTHS_LEN[:env.month - 1]) + env.day - 1) * 24 + env.current_time
|
||||||
|
trajectory_list = agent.explore_env(env, target_step, index)
|
||||||
|
steps, r_exp = update_buffer(trajectory_list)
|
||||||
|
critic_loss, actor_loss, entropy_loss = agent.update_net(buffer, batch_size, repeat_times,
|
||||||
|
soft_update_tau)
|
||||||
|
loss_record['critic_loss'].append(critic_loss)
|
||||||
|
loss_record['actor_loss'].append(actor_loss)
|
||||||
|
loss_record['entropy_loss'].append(entropy_loss)
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device, index)
|
||||||
|
reward_record['mean_episode_reward'].append(episode_reward)
|
||||||
|
reward_record['unbalance'].append(episode_unbalance)
|
||||||
|
print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}')
|
||||||
|
|
||||||
|
act_save_path = f'{args.cwd}/actor_llm_1015.pth'
|
||||||
|
loss_record_path = f'{args.cwd}/loss_llm_1015.pkl'
|
||||||
|
reward_record_path = f'{args.cwd}/reward_llm_1015.pkl'
|
||||||
|
|
||||||
|
if args.save_network:
|
||||||
|
with open(loss_record_path, 'wb') as tf:
|
||||||
|
pickle.dump(loss_record, tf)
|
||||||
|
with open(reward_record_path, 'wb') as tf:
|
||||||
|
pickle.dump(reward_record, tf)
|
||||||
|
torch.save(agent.act.state_dict(), act_save_path)
|
||||||
|
print('actor parameters have been saved')
|
||||||
|
|
||||||
|
if args.test_network:
|
||||||
|
args.cwd = agent_name
|
||||||
|
agent.act.load_state_dict(torch.load(act_save_path))
|
||||||
|
print('parameters have been reload and test')
|
||||||
|
index = (sum(Constant.MONTHS_LEN[:env.month - 1]) + env.day - 1) * 24 + env.current_time
|
||||||
|
record = test_one_episode(env, agent.act, agent.device, index)
|
||||||
|
# re = test_llm(env, index)
|
||||||
|
rl_data = pd.DataFrame(record['system_info'])
|
||||||
|
# llm_data = pd.DataFrame(re['system_info'])
|
||||||
|
rl_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery',
|
||||||
|
'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
|
||||||
|
if args.save_test_data:
|
||||||
|
test_data_save_path = f'{args.cwd}/test_llm_1015.pkl'
|
||||||
|
# test_llm_save_path = f'{args.cwd}/test_only_llm_1015.pkl'
|
||||||
|
with open(test_data_save_path, 'wb') as tf:
|
||||||
|
pickle.dump(record, tf)
|
||||||
|
# with open(test_llm_save_path, 'wb') as f:
|
||||||
|
# pickle.dump(re, f)
|
||||||
|
|
||||||
|
'''compare with gurobi data and results'''
|
||||||
|
if args.compare_with_gurobi:
|
||||||
|
month = record['init_info'][0][0]
|
||||||
|
day = record['init_info'][0][1]
|
||||||
|
initial_soc = record['init_info'][0][3]
|
||||||
|
base_result = optimization_base_result(env, month, day, initial_soc)
|
||||||
|
|
||||||
|
if args.plot_on:
|
||||||
|
from plotDRL import *
|
||||||
|
|
||||||
|
plot_args = PlotArgs()
|
||||||
|
plot_args.feature_change = 'llm_1015'
|
||||||
|
args.cwd = agent_name
|
||||||
|
plot_dir = make_dir(args.cwd, plot_args.feature_change)
|
||||||
|
# plot_optimization_result(base_result, plot_dir)
|
||||||
|
# plot_evaluation_information(args.cwd + '/' + 'test_llm_1015.pkl', plot_dir)
|
||||||
|
plot_soc(base_result, args.cwd + '/' + 'test_llm_1015.pkl', plot_dir)
|
||||||
|
plot_energy(base_result, args.cwd + '/' + 'test_llm_1015.pkl', plot_dir)
|
||||||
|
|
||||||
|
'''compare the different cost get from gurobi and PPO'''
|
||||||
|
print('rl_cost:', sum(rl_data['operation_cost']))
|
||||||
|
print('gurobi_cost:', sum(base_result['step_cost']))
|
||||||
|
print('ration:', sum(rl_data['operation_cost']) / sum(base_result['step_cost']))
|
|
@ -0,0 +1,136 @@
|
||||||
|
import random
|
||||||
|
from PPO_llm import *
|
||||||
|
from plotDRL import *
|
||||||
|
|
||||||
|
|
||||||
|
def generate_test_dates(num):
|
||||||
|
dates = []
|
||||||
|
months_days = {1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30, 7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31}
|
||||||
|
while len(dates) < num:
|
||||||
|
month = random.randint(1, 12)
|
||||||
|
day = random.randint(20, months_days[month])
|
||||||
|
if (month, day) not in dates:
|
||||||
|
dates.append((month, day))
|
||||||
|
return dates
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm(env, llm_actions, index):
|
||||||
|
sum_reward = 0
|
||||||
|
sum_unbalance = 0
|
||||||
|
for i in range(24):
|
||||||
|
action = llm_actions[index + i]
|
||||||
|
state, next_state, reward, done = env.step(action)
|
||||||
|
sum_reward += reward
|
||||||
|
sum_unbalance += env.real_unbalance
|
||||||
|
record = {'reward': [sum_reward], 'unbalance': [sum_unbalance]}
|
||||||
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
def test_one_episode(env, act, device):
|
||||||
|
state = env.reset()
|
||||||
|
sum_reward = 0
|
||||||
|
sum_unbalance = 0
|
||||||
|
for i in range(24):
|
||||||
|
s_tensor = torch.as_tensor((state,), device=device)
|
||||||
|
a_tensor = act(s_tensor)
|
||||||
|
action = a_tensor.detach().cpu().numpy()[0]
|
||||||
|
# rl_action = a_tensor.detach().cpu().numpy()[0]
|
||||||
|
# llm_action = llm_actions[index + i]
|
||||||
|
# action = 0.95 * np.array(rl_action) + 0.05 * np.array(llm_action)
|
||||||
|
state, next_state, reward, done = env.step(action)
|
||||||
|
state = next_state
|
||||||
|
sum_reward += reward
|
||||||
|
sum_unbalance += env.real_unbalance
|
||||||
|
record = {'reward': [sum_reward], 'unbalance': [sum_unbalance]}
|
||||||
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
def run_multiple_tests(env, args, agent1, agent2, num_tests=10):
|
||||||
|
all_ppo = pd.DataFrame()
|
||||||
|
all_llm = pd.DataFrame()
|
||||||
|
all_lmppo = pd.DataFrame()
|
||||||
|
all_milp = pd.DataFrame()
|
||||||
|
|
||||||
|
test_dates = generate_test_dates(num_tests)
|
||||||
|
llm_actions = load_llm_actions('data/results.json')
|
||||||
|
|
||||||
|
for month, day in test_dates:
|
||||||
|
print(f'current testing month is {month}, day is {day}')
|
||||||
|
index = (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24
|
||||||
|
|
||||||
|
ppo_res = test_one_episode(env, agent1.act, agent1.device)
|
||||||
|
base_res = optimization_base_result(env, month, day)
|
||||||
|
llm_res = test_llm(env, llm_actions, index)
|
||||||
|
lmppo_res = test_one_episode(env, agent2.act, agent2.device)
|
||||||
|
|
||||||
|
ppo_data = pd.DataFrame(ppo_res)
|
||||||
|
m_reward = - base_res['step_cost'].sum()/1e3
|
||||||
|
milp_data = pd.DataFrame({'reward': [m_reward]})
|
||||||
|
llm_data = pd.DataFrame(llm_res)
|
||||||
|
lmppo_data = pd.DataFrame(lmppo_res)
|
||||||
|
|
||||||
|
all_ppo = pd.concat([all_ppo, ppo_data], ignore_index=True)
|
||||||
|
all_milp = pd.concat([all_milp, milp_data], ignore_index=True)
|
||||||
|
all_llm = pd.concat([all_llm, llm_data], ignore_index=True)
|
||||||
|
all_lmppo = pd.concat([all_lmppo, lmppo_data], ignore_index=True)
|
||||||
|
|
||||||
|
test_ppo_path = f'{args.cwd}/melt_ppo.pkl'
|
||||||
|
test_llm_path = f'{args.cwd}/melt_llm.pkl'
|
||||||
|
test_lmppo_path = f'{args.cwd}/melt_lmppo.pkl'
|
||||||
|
test_milp_path = f'{args.cwd}/melt_milp.pkl'
|
||||||
|
|
||||||
|
with open(test_ppo_path, 'wb') as tf:
|
||||||
|
pickle.dump(all_ppo, tf)
|
||||||
|
with open(test_milp_path, 'wb') as tf:
|
||||||
|
pickle.dump(all_milp, tf)
|
||||||
|
with open(test_llm_path, 'wb') as f:
|
||||||
|
pickle.dump(all_llm, f)
|
||||||
|
with open(test_lmppo_path, 'wb') as tf:
|
||||||
|
pickle.dump(all_lmppo, tf)
|
||||||
|
|
||||||
|
plot_args = PlotArgs()
|
||||||
|
plot_args.feature_change = 'llm_1015'
|
||||||
|
args.cwd = agent_name
|
||||||
|
plot_dir = make_dir(args.cwd, plot_args.feature_change)
|
||||||
|
plot_melt(test_ppo_path, test_llm_path, test_lmppo_path, test_milp_path, plot_dir)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = Arguments()
|
||||||
|
args.visible_gpu = '0'
|
||||||
|
for seed in args.random_seed_list:
|
||||||
|
args.random_seed = seed
|
||||||
|
args.agent1 = args.agent2 = AgentPPO()
|
||||||
|
args.agent1.cri_target = args.agent2.cri_target = True
|
||||||
|
agent_name = f'{args.agent1.__class__.__name__}'
|
||||||
|
|
||||||
|
args.env = ESSEnv()
|
||||||
|
args.init_before_training()
|
||||||
|
|
||||||
|
agent1 = args.agent1
|
||||||
|
agent2 = args.agent2
|
||||||
|
env = args.env
|
||||||
|
agent1.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
||||||
|
args.if_gae_or_raw)
|
||||||
|
agent2.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
||||||
|
args.if_gae_or_raw)
|
||||||
|
cwd = args.cwd
|
||||||
|
gamma = args.gamma
|
||||||
|
batch_size = args.batch_size
|
||||||
|
target_step = args.target_step
|
||||||
|
repeat_times = args.repeat_times
|
||||||
|
soft_update_tau = args.soft_update_tau
|
||||||
|
agent1.state = agent2.state = env.reset()
|
||||||
|
buffer = list()
|
||||||
|
num_episode = args.num_episode
|
||||||
|
# args.test_network = False
|
||||||
|
|
||||||
|
if args.test_network:
|
||||||
|
args.cwd = agent_name
|
||||||
|
ppo_act_save_path = f'{args.cwd}/actor_10.pth'
|
||||||
|
lmppo_act_save_path = f'{args.cwd}/actor_llm_1015.pth'
|
||||||
|
agent1.act.load_state_dict(torch.load(ppo_act_save_path))
|
||||||
|
agent2.act.load_state_dict(torch.load(lmppo_act_save_path))
|
||||||
|
print('parameters have been reload and test')
|
||||||
|
env.TRAIN = False
|
||||||
|
run_multiple_tests(env, args, agent1, agent2, num_tests=12)
|
|
@ -0,0 +1,200 @@
|
||||||
|
import random
|
||||||
|
from PPO_llm import *
|
||||||
|
from plotDRL import *
|
||||||
|
|
||||||
|
test_day = 9 # 测试天数
|
||||||
|
|
||||||
|
|
||||||
|
def test_ram_episode(env):
|
||||||
|
state = env.reset()
|
||||||
|
sum_reward = 0
|
||||||
|
sum_unbalance = 0
|
||||||
|
for i in range(24):
|
||||||
|
action = np.random.uniform(env.action_space.low, env.action_space.high)
|
||||||
|
state, next_state, reward, done = env.step(action)
|
||||||
|
state = next_state
|
||||||
|
sum_reward += reward
|
||||||
|
sum_unbalance += env.real_unbalance
|
||||||
|
record = {'reward': [sum_reward], 'unbalance': [sum_unbalance]}
|
||||||
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
def test_llm(env, llm_actions, index):
|
||||||
|
sum_reward = 0
|
||||||
|
sum_unbalance = 0
|
||||||
|
for i in range(24):
|
||||||
|
action = llm_actions[index + i]
|
||||||
|
state, next_state, reward, done = env.step(action)
|
||||||
|
sum_reward += reward
|
||||||
|
sum_unbalance += env.real_unbalance
|
||||||
|
record = {'reward': [sum_reward], 'unbalance': [sum_unbalance]}
|
||||||
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
def test_one_episode(env, act, device):
|
||||||
|
state = env.reset()
|
||||||
|
sum_reward = 0
|
||||||
|
sum_unbalance = 0
|
||||||
|
for i in range(24):
|
||||||
|
s_tensor = torch.as_tensor((state,), device=device)
|
||||||
|
a_tensor = act(s_tensor)
|
||||||
|
action = a_tensor.detach().cpu().numpy()[0]
|
||||||
|
# rl_action = a_tensor.detach().cpu().numpy()[0]
|
||||||
|
# llm_action = llm_actions[index + i]
|
||||||
|
# action = 0.95 * np.array(rl_action) + 0.05 * np.array(llm_action)
|
||||||
|
state, next_state, reward, done = env.step(action)
|
||||||
|
state = next_state
|
||||||
|
sum_reward += reward
|
||||||
|
sum_unbalance += env.real_unbalance
|
||||||
|
record = {'reward': [sum_reward], 'unbalance': [sum_unbalance]}
|
||||||
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
def generate_test_dates():
|
||||||
|
# months_days = {1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30, 7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31}
|
||||||
|
dates = []
|
||||||
|
for month in range(1, 13): # 遍历12个月
|
||||||
|
start_day = 20
|
||||||
|
# days_in_month = months_days[month] # 确保每个月至少有9天
|
||||||
|
# if days_in_month >= start_day + 9: # 生成8个连续的天
|
||||||
|
|
||||||
|
for day in range(start_day, start_day + test_day):
|
||||||
|
dates.append((month, day))
|
||||||
|
return dates
|
||||||
|
|
||||||
|
|
||||||
|
def run_multiple_tests(env, args, agent1, agent2):
|
||||||
|
all_ppo = pd.DataFrame()
|
||||||
|
all_llm = pd.DataFrame()
|
||||||
|
all_lmppo = pd.DataFrame()
|
||||||
|
all_milp = pd.DataFrame()
|
||||||
|
all_ram = pd.DataFrame()
|
||||||
|
|
||||||
|
test_dates = generate_test_dates() # 获取测试日期
|
||||||
|
llm_actions = load_llm_actions('data/results_day.json')
|
||||||
|
|
||||||
|
# 对每个日期进行测试
|
||||||
|
for idx in range(0, len(test_dates), test_day):
|
||||||
|
current_dates = test_dates[idx:idx + test_day]
|
||||||
|
print(f'Start testing for {current_dates[0]} to {current_dates[-1]}')
|
||||||
|
|
||||||
|
ppo_rewards = []
|
||||||
|
ppo_unbalances = []
|
||||||
|
llm_rewards = []
|
||||||
|
llm_unbalances = []
|
||||||
|
lmppo_rewards = []
|
||||||
|
lmppo_unbalances = []
|
||||||
|
milp_rewards = []
|
||||||
|
ram_rewards = []
|
||||||
|
ram_unbalances = []
|
||||||
|
|
||||||
|
# 对当前这x天的日期进行逐个测试
|
||||||
|
for month, day in current_dates:
|
||||||
|
index = (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24
|
||||||
|
|
||||||
|
ppo_res = test_one_episode(env, agent1.act, agent1.device)
|
||||||
|
base_res = optimization_base_result(env, month, day)
|
||||||
|
llm_res = test_llm(env, llm_actions, index)
|
||||||
|
lmppo_res = test_one_episode(env, agent2.act, agent2.device)
|
||||||
|
ram_res = test_ram_episode(env)
|
||||||
|
|
||||||
|
# 存储每一天的结果
|
||||||
|
ppo_rewards.append(ppo_res['reward'][0])
|
||||||
|
ppo_unbalances.append(ppo_res['unbalance'][0])
|
||||||
|
llm_rewards.append(llm_res['reward'][0])
|
||||||
|
llm_unbalances.append(llm_res['unbalance'][0])
|
||||||
|
lmppo_rewards.append(lmppo_res['reward'][0])
|
||||||
|
lmppo_unbalances.append(lmppo_res['unbalance'][0])
|
||||||
|
m_reward = - base_res['step_cost'].sum() / 1e3
|
||||||
|
milp_rewards.append(m_reward)
|
||||||
|
ram_rewards.append(ram_res['reward'][0])
|
||||||
|
ram_unbalances.append(ram_res['unbalance'][0])
|
||||||
|
|
||||||
|
# 计算x天的平均结果
|
||||||
|
avg_ppo_reward = sum(ppo_rewards) / len(ppo_rewards)
|
||||||
|
avg_ppo_unbalance = sum(ppo_unbalances) / len(ppo_unbalances)
|
||||||
|
avg_llm_reward = sum(llm_rewards) / len(llm_rewards)
|
||||||
|
avg_llm_unbalance = sum(llm_unbalances) / len(llm_unbalances)
|
||||||
|
avg_lmppo_reward = sum(lmppo_rewards) / len(lmppo_rewards)
|
||||||
|
avg_lmppo_unbalance = sum(lmppo_unbalances) / len(lmppo_unbalances)
|
||||||
|
avg_milp_reward = sum(milp_rewards) / len(milp_rewards)
|
||||||
|
avg_ram_reward = sum(ram_rewards) / len(ram_rewards)
|
||||||
|
avg_ram_unbalance = sum(ram_unbalances) / len(ram_unbalances)
|
||||||
|
|
||||||
|
# 保存每组x天的平均结果
|
||||||
|
ppo_data = pd.DataFrame({'reward': [avg_ppo_reward], 'unbalance': [avg_ppo_unbalance]})
|
||||||
|
llm_data = pd.DataFrame({'reward': [avg_llm_reward], 'unbalance': [avg_llm_unbalance]})
|
||||||
|
lmppo_data = pd.DataFrame({'reward': [avg_lmppo_reward], 'unbalance': [avg_lmppo_unbalance]})
|
||||||
|
milp_data = pd.DataFrame({'reward': [avg_milp_reward]})
|
||||||
|
ram_data = pd.DataFrame({'reward': [avg_ram_reward], 'unbalance': [avg_ram_unbalance]})
|
||||||
|
|
||||||
|
all_ppo = pd.concat([all_ppo, ppo_data], ignore_index=True)
|
||||||
|
all_llm = pd.concat([all_llm, llm_data], ignore_index=True)
|
||||||
|
all_lmppo = pd.concat([all_lmppo, lmppo_data], ignore_index=True)
|
||||||
|
all_milp = pd.concat([all_milp, milp_data], ignore_index=True)
|
||||||
|
all_ram = pd.concat([all_ram, ram_data], ignore_index=True)
|
||||||
|
|
||||||
|
# 保存结果
|
||||||
|
test_ppo_path = f'{args.cwd}/melt_ppo.pkl'
|
||||||
|
test_llm_path = f'{args.cwd}/melt_llm.pkl'
|
||||||
|
test_lmppo_path = f'{args.cwd}/melt_lmppo.pkl'
|
||||||
|
test_milp_path = f'{args.cwd}/melt_milp.pkl'
|
||||||
|
test_ram_path = f'{args.cwd}/melt_ram.pkl'
|
||||||
|
|
||||||
|
with open(test_ppo_path, 'wb') as tf:
|
||||||
|
pickle.dump(all_ppo, tf)
|
||||||
|
with open(test_milp_path, 'wb') as tf:
|
||||||
|
pickle.dump(all_milp, tf)
|
||||||
|
with open(test_llm_path, 'wb') as f:
|
||||||
|
pickle.dump(all_llm, f)
|
||||||
|
with open(test_lmppo_path, 'wb') as tf:
|
||||||
|
pickle.dump(all_lmppo, tf)
|
||||||
|
with open(test_ram_path, 'wb') as tf:
|
||||||
|
pickle.dump(all_ram, tf)
|
||||||
|
|
||||||
|
plot_args = PlotArgs()
|
||||||
|
plot_args.feature_change = 'llm_1015'
|
||||||
|
args.cwd = agent_name
|
||||||
|
plot_dir = make_dir(args.cwd, plot_args.feature_change)
|
||||||
|
plot_melt(test_ppo_path, test_llm_path, test_lmppo_path, test_milp_path, test_ram_path, plot_dir)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = Arguments()
|
||||||
|
args.visible_gpu = '0'
|
||||||
|
for seed in args.random_seed_list:
|
||||||
|
args.random_seed = seed
|
||||||
|
args.agent1 = args.agent2 = AgentPPO()
|
||||||
|
args.agent1.cri_target = args.agent2.cri_target = True
|
||||||
|
agent_name = f'{args.agent1.__class__.__name__}'
|
||||||
|
|
||||||
|
args.env = ESSEnv()
|
||||||
|
args.init_before_training()
|
||||||
|
|
||||||
|
agent1 = args.agent1
|
||||||
|
agent2 = args.agent2
|
||||||
|
env = args.env
|
||||||
|
agent1.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
||||||
|
args.if_gae_or_raw)
|
||||||
|
agent2.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
||||||
|
args.if_gae_or_raw)
|
||||||
|
cwd = args.cwd
|
||||||
|
gamma = args.gamma
|
||||||
|
batch_size = args.batch_size
|
||||||
|
target_step = args.target_step
|
||||||
|
repeat_times = args.repeat_times
|
||||||
|
soft_update_tau = args.soft_update_tau
|
||||||
|
agent1.state = agent2.state = env.reset()
|
||||||
|
buffer = list()
|
||||||
|
num_episode = args.num_episode
|
||||||
|
# args.test_network = False
|
||||||
|
|
||||||
|
if args.test_network:
|
||||||
|
args.cwd = agent_name
|
||||||
|
ppo_act_save_path = f'{args.cwd}/actor_10.pth'
|
||||||
|
lmppo_act_save_path = f'{args.cwd}/actor_llm_1015.pth'
|
||||||
|
agent1.act.load_state_dict(torch.load(ppo_act_save_path))
|
||||||
|
agent2.act.load_state_dict(torch.load(lmppo_act_save_path))
|
||||||
|
print('parameters have been reload and test')
|
||||||
|
env.TRAIN = False
|
||||||
|
run_multiple_tests(env, args, agent1, agent2)
|
|
@ -0,0 +1,371 @@
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
from copy import deepcopy
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
from environment import ESSEnv
|
||||||
|
from tools import get_episode_return, test_one_episode, optimization_base_result
|
||||||
|
|
||||||
|
|
||||||
|
class ActorPPO(nn.Module):
|
||||||
|
def __init__(self, mid_dim, state_dim, action_dim):
|
||||||
|
super().__init__()
|
||||||
|
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
|
||||||
|
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
|
||||||
|
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
||||||
|
nn.Linear(mid_dim, action_dim))
|
||||||
|
# the logarithm (log) of standard deviation (std) of action, it is a trainable parameter
|
||||||
|
self.a_logstd = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True)
|
||||||
|
self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi))
|
||||||
|
|
||||||
|
def forward(self, state):
|
||||||
|
return self.net(state).tanh() # action.tanh() limit the data output of action
|
||||||
|
|
||||||
|
def get_action(self, state):
|
||||||
|
a_avg = self.forward(state) # too big for the action
|
||||||
|
a_std = self.a_logstd.exp()
|
||||||
|
|
||||||
|
noise = torch.randn_like(a_avg)
|
||||||
|
action = a_avg + noise * a_std
|
||||||
|
return action, noise
|
||||||
|
|
||||||
|
def get_logprob_entropy(self, state, action):
|
||||||
|
a_avg = self.forward(state)
|
||||||
|
a_std = self.a_logstd.exp()
|
||||||
|
|
||||||
|
delta = ((a_avg - action) / a_std).pow(2) * 0.5
|
||||||
|
logprob = -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1) # new_logprob
|
||||||
|
|
||||||
|
dist_entropy = (logprob.exp() * logprob).mean() # policy entropy
|
||||||
|
return logprob, dist_entropy
|
||||||
|
|
||||||
|
def get_old_logprob(self, _action, noise): # noise = action - a_noise
|
||||||
|
delta = noise.pow(2) * 0.5
|
||||||
|
return -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1) # old_logprob
|
||||||
|
|
||||||
|
|
||||||
|
class CriticAdv(nn.Module):
|
||||||
|
def __init__(self, mid_dim, state_dim, _action_dim):
|
||||||
|
super().__init__()
|
||||||
|
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
|
||||||
|
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
|
||||||
|
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
||||||
|
nn.Linear(mid_dim, 1))
|
||||||
|
|
||||||
|
def forward(self, state):
|
||||||
|
return self.net(state) # Advantage value
|
||||||
|
|
||||||
|
|
||||||
|
def smooth_rewards(rewards, window=10):
|
||||||
|
rewards = rewards.unsqueeze(0).unsqueeze(0) # 将 rewards 转为 [1, 1, len] 的形状以适应 conv1d
|
||||||
|
kernel = torch.ones(1, 1, window, device=rewards.device) / window # 创建一个均匀的滑动平均核
|
||||||
|
smoothed_rewards = F.conv1d(rewards, kernel, padding='valid') # 滑动平均
|
||||||
|
smoothed_rewards = smoothed_rewards.squeeze(0).squeeze(0) # 去掉多余的维度
|
||||||
|
|
||||||
|
# 保持与原始奖励序列相同的长度,将前 window-1 个奖励保持不变
|
||||||
|
return torch.cat((rewards[0, 0, :window-1], smoothed_rewards))
|
||||||
|
|
||||||
|
|
||||||
|
class AgentPPO:
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.state = None
|
||||||
|
self.device = None
|
||||||
|
self.action_dim = None
|
||||||
|
self.get_obj_critic = None
|
||||||
|
|
||||||
|
self.criterion = torch.nn.SmoothL1Loss()
|
||||||
|
self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
|
||||||
|
self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
|
||||||
|
|
||||||
|
'''init modify'''
|
||||||
|
self.ClassCri = CriticAdv
|
||||||
|
self.ClassAct = ActorPPO
|
||||||
|
|
||||||
|
self.ratio_clip = 0.2 # ratio.clamp(1 - clip, 1 + clip)
|
||||||
|
self.lambda_entropy = 0.02 # could be 0.01~0.05
|
||||||
|
self.lambda_gae_adv = 0.98 # could be 0.95~0.99, (Generalized Advantage Estimation. ICLR.2016.)
|
||||||
|
self.get_reward_sum = None # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
|
||||||
|
self.trajectory_list = None
|
||||||
|
|
||||||
|
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=True, gpu_id=0):
|
||||||
|
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
|
||||||
|
self.trajectory_list = list()
|
||||||
|
# choose whether to use gae or not
|
||||||
|
self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
|
||||||
|
|
||||||
|
self.cri = self.ClassCri(net_dim, state_dim, action_dim).to(self.device)
|
||||||
|
self.act = self.ClassAct(net_dim, state_dim, action_dim).to(self.device) if self.ClassAct else self.cri
|
||||||
|
self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
|
||||||
|
self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
|
||||||
|
|
||||||
|
self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
|
||||||
|
self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
|
||||||
|
|
||||||
|
def select_action(self, state):
|
||||||
|
# states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
|
||||||
|
# _, noises = self.act.get_action(states)
|
||||||
|
actions = np.random.uniform(env.action_space.low, env.action_space.high)
|
||||||
|
return actions, 0
|
||||||
|
|
||||||
|
def explore_env(self, env, target_step):
|
||||||
|
state = self.state # sent state to agent and then agent sent state to method
|
||||||
|
trajectory_temp = list()
|
||||||
|
last_done = 0
|
||||||
|
for i in range(target_step):
|
||||||
|
action, noise = self.select_action(state)
|
||||||
|
state, next_state, reward, done, = env.step(np.tanh(action))
|
||||||
|
trajectory_temp.append((state, reward, done, action, noise))
|
||||||
|
if done:
|
||||||
|
state = env.reset()
|
||||||
|
last_done = i
|
||||||
|
else:
|
||||||
|
state = next_state
|
||||||
|
self.state = state
|
||||||
|
|
||||||
|
'''splice list'''
|
||||||
|
# store 0 trajectory information to list
|
||||||
|
trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1]
|
||||||
|
self.trajectory_list = trajectory_temp[last_done:]
|
||||||
|
return trajectory_list
|
||||||
|
|
||||||
|
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
|
||||||
|
"""put data extract and update network together"""
|
||||||
|
with torch.no_grad():
|
||||||
|
buf_len = buffer[0].shape[0]
|
||||||
|
# decompose buffer data
|
||||||
|
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
|
||||||
|
|
||||||
|
'''get buf_r_sum, buf_logprob'''
|
||||||
|
bs = 4096
|
||||||
|
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
|
||||||
|
buf_value = torch.cat(buf_value, dim=0)
|
||||||
|
|
||||||
|
# 不需要获取logprob,因为我们不再优化actor网络
|
||||||
|
buf_logprob = torch.zeros_like(buf_value) # 随机策略不会有有效的logprob
|
||||||
|
|
||||||
|
buf_r_sum, buf_advantage = self.get_reward_sum(buf_len, buf_reward, buf_mask, buf_value)
|
||||||
|
# normalize advantage
|
||||||
|
buf_advantage = (buf_advantage - buf_advantage.mean()) / (buf_advantage.std() + 1e-5)
|
||||||
|
buf_advantage = smooth_rewards(buf_advantage, window=10)
|
||||||
|
del buf_noise, buffer[:]
|
||||||
|
|
||||||
|
'''PPO: Surrogate objective of Trust Region'''
|
||||||
|
obj_critic = obj_actor = None
|
||||||
|
|
||||||
|
# 由于使用随机策略,更新网络的过程被跳过
|
||||||
|
# 如果没有需要更新的策略网络,则只需要进行critic网络的简单优化
|
||||||
|
for _ in range(int(buf_len / batch_size * repeat_times)):
|
||||||
|
indices = torch.randint(buf_len, size=(batch_size,), requires_grad=False, device=self.device)
|
||||||
|
|
||||||
|
state = buf_state[indices]
|
||||||
|
action = buf_action[indices]
|
||||||
|
r_sum = buf_r_sum[indices]
|
||||||
|
logprob = buf_logprob[indices]
|
||||||
|
advantage = buf_advantage[indices]
|
||||||
|
|
||||||
|
# 只更新critic网络
|
||||||
|
value = self.cri(state).squeeze(1) # critic network预测状态的总奖励(Q值)
|
||||||
|
obj_critic = self.criterion(value, r_sum) # 计算value loss
|
||||||
|
self.optim_update(self.cri_optim, obj_critic) # 更新critic网络的参数
|
||||||
|
# 选择是否使用软更新
|
||||||
|
self.soft_update(self.cri_target, self.cri, soft_update_tau) if self.cri_target is not self.cri else None
|
||||||
|
|
||||||
|
a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1))
|
||||||
|
return obj_critic.item(), 0, a_std_log.mean().item() # 未更新actor网络,因此obj_actor为0
|
||||||
|
|
||||||
|
def get_reward_sum_raw(self, buf_len, buf_reward, buf_mask, buf_value) -> (torch.Tensor, torch.Tensor):
|
||||||
|
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # reward sum
|
||||||
|
|
||||||
|
pre_r_sum = 0
|
||||||
|
for i in range(buf_len - 1, -1, -1):
|
||||||
|
buf_r_sum[i] = buf_reward[i] + buf_mask[i] * pre_r_sum
|
||||||
|
pre_r_sum = buf_r_sum[i]
|
||||||
|
buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
|
||||||
|
return buf_r_sum, buf_advantage
|
||||||
|
|
||||||
|
def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value) -> (torch.Tensor, torch.Tensor):
|
||||||
|
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # old policy value
|
||||||
|
buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device) # advantage value
|
||||||
|
|
||||||
|
pre_r_sum = 0.0
|
||||||
|
pre_advantage = 0.0 # advantage value of previous step
|
||||||
|
for i in range(buf_len - 1, -1, -1):
|
||||||
|
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
|
||||||
|
pre_r_sum = buf_r_sum[i]
|
||||||
|
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i])
|
||||||
|
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
|
||||||
|
return buf_r_sum, buf_advantage
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def optim_update(optimizer, objective):
|
||||||
|
optimizer.zero_grad()
|
||||||
|
objective.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def soft_update(target_net, current_net, tau):
|
||||||
|
for tar, cur in zip(target_net.parameters(), current_net.parameters()):
|
||||||
|
tar.data.copy_(cur.data.__mul__(tau) + tar.data.__mul__(1.0 - tau))
|
||||||
|
|
||||||
|
|
||||||
|
class Arguments:
|
||||||
|
def __init__(self, agent=None, env=None):
|
||||||
|
self.agent = agent # Deep Reinforcement Learning algorithm
|
||||||
|
self.env = env # the environment for training
|
||||||
|
self.cwd = None # current work directory. None means set automatically
|
||||||
|
self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
|
||||||
|
self.visible_gpu = '0' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
|
||||||
|
self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
|
||||||
|
|
||||||
|
'''Arguments for training'''
|
||||||
|
self.num_episode = 1000 # to control the train episodes for PPO
|
||||||
|
self.gamma = 0.995 # discount factor of future rewards
|
||||||
|
self.learning_rate = 1e-4 # 1e-4 2 ** -14 2e-4
|
||||||
|
self.soft_update_tau = 2 ** -8 # 1e-3 2 ** -8
|
||||||
|
|
||||||
|
self.net_dim = 256 # the network width
|
||||||
|
self.batch_size = 4096 # num of transitions sampled from replay buffer.
|
||||||
|
self.repeat_times = 2 ** 5 # collect target_step, then update network
|
||||||
|
self.target_step = 4096 # repeatedly update network to keep critic's loss small
|
||||||
|
self.max_memo = self.target_step # capacity of replay buffer
|
||||||
|
self.if_gae_or_raw = True # GAE for on-policy sparse reward: Generalized Advantage Estimation.
|
||||||
|
|
||||||
|
'''Arguments for evaluate'''
|
||||||
|
self.random_seed = 1234
|
||||||
|
# self.random_seed_list = [1234, 2234, 3234, 4234, 5234]
|
||||||
|
self.random_seed_list = [1]
|
||||||
|
self.train = True
|
||||||
|
self.save_network = True
|
||||||
|
self.test_network = True
|
||||||
|
self.save_test_data = True
|
||||||
|
self.compare_with_gurobi = True
|
||||||
|
self.plot_on = True
|
||||||
|
|
||||||
|
def init_before_training(self):
|
||||||
|
if self.cwd is None:
|
||||||
|
agent_name = self.agent.__class__.__name__
|
||||||
|
self.cwd = f'./{agent_name}'
|
||||||
|
|
||||||
|
np.random.seed(self.random_seed)
|
||||||
|
torch.manual_seed(self.random_seed)
|
||||||
|
torch.set_num_threads(self.num_threads)
|
||||||
|
torch.set_default_dtype(torch.float32)
|
||||||
|
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
|
||||||
|
|
||||||
|
|
||||||
|
def update_buffer(_trajectory):
|
||||||
|
_trajectory = list(map(list, zip(*_trajectory))) # 2D-list transpose, here cut the trajectory into 5 parts
|
||||||
|
ten_state = torch.as_tensor(_trajectory[0]) # tensor state here
|
||||||
|
ten_reward = torch.as_tensor(_trajectory[1], dtype=torch.float32)
|
||||||
|
# _trajectory[2] = done, 将 done 替换为掩码,节省内存
|
||||||
|
ten_mask = (1.0 - torch.as_tensor(_trajectory[2], dtype=torch.float32)) * gamma
|
||||||
|
ten_action = torch.as_tensor(_trajectory[3])
|
||||||
|
ten_noise = torch.as_tensor(_trajectory[4], dtype=torch.float32)
|
||||||
|
|
||||||
|
buffer[:] = (ten_state, ten_action, ten_noise, ten_reward, ten_mask) # list store tensors
|
||||||
|
|
||||||
|
_steps = ten_reward.shape[0] # how many steps are collected in all trajectories
|
||||||
|
_r_exp = ten_reward.mean() # the mean reward
|
||||||
|
return _steps, _r_exp
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = Arguments()
|
||||||
|
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
|
||||||
|
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
|
||||||
|
args.visible_gpu = '0'
|
||||||
|
for seed in args.random_seed_list:
|
||||||
|
args.random_seed = seed
|
||||||
|
args.agent = AgentPPO()
|
||||||
|
agent_name = f'{args.agent.__class__.__name__}'
|
||||||
|
args.agent.cri_target = True
|
||||||
|
args.env = ESSEnv()
|
||||||
|
args.init_before_training()
|
||||||
|
'''init agent and environment'''
|
||||||
|
agent = args.agent
|
||||||
|
env = args.env
|
||||||
|
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
||||||
|
args.if_gae_or_raw)
|
||||||
|
cwd = args.cwd
|
||||||
|
gamma = args.gamma
|
||||||
|
batch_size = args.batch_size # how much data should be used to update net
|
||||||
|
target_step = args.target_step # how manysteps of one episode should stop
|
||||||
|
repeat_times = args.repeat_times # how many times should update for one batch size data
|
||||||
|
soft_update_tau = args.soft_update_tau
|
||||||
|
num_episode = args.num_episode
|
||||||
|
agent.state = env.reset()
|
||||||
|
'''init buffer'''
|
||||||
|
buffer = list()
|
||||||
|
'''init training parameters'''
|
||||||
|
# args.train = False
|
||||||
|
# args.save_network = False
|
||||||
|
# args.test_network = False
|
||||||
|
# args.save_test_data = False
|
||||||
|
# args.compare_with_gurobi = False
|
||||||
|
# args.plot_on = False
|
||||||
|
if args.train:
|
||||||
|
for i_episode in range(num_episode):
|
||||||
|
with torch.no_grad():
|
||||||
|
trajectory_list = agent.explore_env(env, target_step)
|
||||||
|
steps, r_exp = update_buffer(trajectory_list)
|
||||||
|
critic_loss, actor_loss, entropy_loss = agent.update_net(buffer, batch_size, repeat_times,
|
||||||
|
soft_update_tau)
|
||||||
|
loss_record['critic_loss'].append(critic_loss)
|
||||||
|
loss_record['actor_loss'].append(actor_loss)
|
||||||
|
loss_record['entropy_loss'].append(entropy_loss)
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
|
||||||
|
reward_record['mean_episode_reward'].append(episode_reward)
|
||||||
|
reward_record['unbalance'].append(episode_unbalance)
|
||||||
|
print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}')
|
||||||
|
|
||||||
|
act_save_path = f'{args.cwd}/actor_ram.pth'
|
||||||
|
loss_record_path = f'{args.cwd}/loss_ram.pkl'
|
||||||
|
reward_record_path = f'{args.cwd}/reward_ram.pkl'
|
||||||
|
|
||||||
|
if args.save_network:
|
||||||
|
with open(loss_record_path, 'wb') as tf:
|
||||||
|
pickle.dump(loss_record, tf)
|
||||||
|
with open(reward_record_path, 'wb') as tf:
|
||||||
|
pickle.dump(reward_record, tf)
|
||||||
|
torch.save(agent.act.state_dict(), act_save_path)
|
||||||
|
print('actor parameters have been saved')
|
||||||
|
|
||||||
|
if args.test_network:
|
||||||
|
args.cwd = agent_name
|
||||||
|
agent.act.load_state_dict(torch.load(act_save_path))
|
||||||
|
print('parameters have been reload and test')
|
||||||
|
record = test_one_episode(env, agent.act, agent.device)
|
||||||
|
eval_data = pd.DataFrame(record['system_info'])
|
||||||
|
eval_data.columns = ['time_step', 'price', 'load', 'action', 'real_action', 'soc', 'battery',
|
||||||
|
'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
|
||||||
|
if args.save_test_data:
|
||||||
|
test_data_save_path = f'{args.cwd}/test_ram.pkl'
|
||||||
|
with open(test_data_save_path, 'wb') as tf:
|
||||||
|
pickle.dump(record, tf)
|
||||||
|
|
||||||
|
'''compare with gurobi data and results'''
|
||||||
|
if args.compare_with_gurobi:
|
||||||
|
month = record['init_info'][0][0]
|
||||||
|
day = record['init_info'][0][1]
|
||||||
|
initial_soc = record['init_info'][0][3]
|
||||||
|
base_result = optimization_base_result(env, month, day, initial_soc)
|
||||||
|
if args.plot_on:
|
||||||
|
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
|
||||||
|
|
||||||
|
plot_args = PlotArgs()
|
||||||
|
plot_args.feature_change = 'ram'
|
||||||
|
args.cwd = agent_name
|
||||||
|
plot_dir = make_dir(args.cwd, plot_args.feature_change)
|
||||||
|
plot_optimization_result(base_result, plot_dir)
|
||||||
|
plot_evaluation_information(args.cwd + '/' + 'test_ram.pkl', plot_dir)
|
||||||
|
'''compare the different cost get from gurobi and PPO'''
|
||||||
|
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
|
||||||
|
print('rl_cost:', sum(eval_data['operation_cost']))
|
||||||
|
print('gurobi_cost:', sum(base_result['step_cost']))
|
||||||
|
print('ration:', ration)
|
|
@ -0,0 +1,130 @@
|
||||||
|
import pickle
|
||||||
|
import pandas as pd
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from agent import AgentSAC
|
||||||
|
from environment import ESSEnv
|
||||||
|
from tools import *
|
||||||
|
|
||||||
|
|
||||||
|
def update_buffer(_trajectory):
|
||||||
|
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
|
||||||
|
ary_other = torch.as_tensor([item[1] for item in _trajectory])
|
||||||
|
ary_other[:, 0] = ary_other[:, 0] # ten_reward
|
||||||
|
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
|
||||||
|
|
||||||
|
buffer.extend_buffer(ten_state, ary_other)
|
||||||
|
|
||||||
|
_steps = ten_state.shape[0]
|
||||||
|
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
|
||||||
|
return _steps, _r_exp
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = Arguments()
|
||||||
|
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
|
||||||
|
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
|
||||||
|
args.visible_gpu = '0'
|
||||||
|
for seed in args.random_seed_list:
|
||||||
|
args.random_seed = seed
|
||||||
|
args.agent = AgentSAC()
|
||||||
|
agent_name = f'{args.agent.__class__.__name__}'
|
||||||
|
args.agent.cri_target = True
|
||||||
|
args.env = ESSEnv()
|
||||||
|
args.init_before_training(if_main=True)
|
||||||
|
'''init agent and environment'''
|
||||||
|
agent = args.agent
|
||||||
|
env = args.env
|
||||||
|
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
||||||
|
args.if_per_or_gae)
|
||||||
|
'''init replay buffer'''
|
||||||
|
buffer = ReplayBuffer(max_len=args.max_memo, state_dim=env.state_space.shape[0],
|
||||||
|
action_dim=env.action_space.shape[0])
|
||||||
|
'''start training'''
|
||||||
|
cwd = args.cwd
|
||||||
|
gamma = args.gamma
|
||||||
|
batch_size = args.batch_size # how much data should be used to update net
|
||||||
|
target_step = args.target_step # how manysteps of one episode should stop
|
||||||
|
repeat_times = args.repeat_times # how many times should update for one batch size data
|
||||||
|
soft_update_tau = args.soft_update_tau
|
||||||
|
agent.state = env.reset()
|
||||||
|
'''collect data and train and update network'''
|
||||||
|
num_episode = args.num_episode
|
||||||
|
'''here record real unbalance'''
|
||||||
|
# args.train = False
|
||||||
|
# args.save_network = False
|
||||||
|
# args.test_network = False
|
||||||
|
# args.save_test_data = False
|
||||||
|
# args.compare_with_gurobi = False
|
||||||
|
if args.train:
|
||||||
|
collect_data = True
|
||||||
|
while collect_data:
|
||||||
|
print(f'buffer:{buffer.now_len}')
|
||||||
|
with torch.no_grad():
|
||||||
|
trajectory = agent.explore_env(env, target_step)
|
||||||
|
steps, r_exp = update_buffer(trajectory)
|
||||||
|
buffer.update_now_len()
|
||||||
|
if buffer.now_len >= 10000:
|
||||||
|
collect_data = False
|
||||||
|
for i_episode in range(num_episode):
|
||||||
|
critic_loss, actor_loss, entropy_loss = agent.update_net(buffer, batch_size, repeat_times,
|
||||||
|
soft_update_tau)
|
||||||
|
loss_record['critic_loss'].append(critic_loss)
|
||||||
|
loss_record['actor_loss'].append(actor_loss)
|
||||||
|
loss_record['entropy_loss'].append(entropy_loss)
|
||||||
|
with torch.no_grad():
|
||||||
|
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
|
||||||
|
reward_record['mean_episode_reward'].append(episode_reward)
|
||||||
|
reward_record['unbalance'].append(episode_unbalance)
|
||||||
|
print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance:{episode_unbalance}, buffer_length: {buffer.now_len}')
|
||||||
|
if i_episode % 10 == 0:
|
||||||
|
# target_step
|
||||||
|
with torch.no_grad():
|
||||||
|
trajectory = agent.explore_env(env, target_step)
|
||||||
|
steps, r_exp = update_buffer(trajectory)
|
||||||
|
|
||||||
|
act_save_path = f'{args.cwd}/actor.pth'
|
||||||
|
loss_record_path = f'{args.cwd}/loss_data.pkl'
|
||||||
|
reward_record_path = f'{args.cwd}/reward_data.pkl'
|
||||||
|
|
||||||
|
if args.save_network:
|
||||||
|
with open(loss_record_path, 'wb') as tf:
|
||||||
|
pickle.dump(loss_record, tf)
|
||||||
|
with open(reward_record_path, 'wb') as tf:
|
||||||
|
pickle.dump(reward_record, tf)
|
||||||
|
torch.save(agent.act.state_dict(), act_save_path)
|
||||||
|
print('actor parameters have been saved')
|
||||||
|
|
||||||
|
if args.test_network:
|
||||||
|
args.cwd = agent_name
|
||||||
|
agent.act.load_state_dict(torch.load(act_save_path))
|
||||||
|
print('parameters have been reload and test')
|
||||||
|
record = test_one_episode(env, agent.act, agent.device)
|
||||||
|
eval_data = pd.DataFrame(record['system_info'])
|
||||||
|
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery',
|
||||||
|
'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
|
||||||
|
if args.save_test_data:
|
||||||
|
test_data_save_path = f'{args.cwd}/test_data.pkl'
|
||||||
|
with open(test_data_save_path, 'wb') as tf:
|
||||||
|
pickle.dump(record, tf)
|
||||||
|
|
||||||
|
'''compare with gurobi data and results'''
|
||||||
|
if args.compare_with_gurobi:
|
||||||
|
month = record['init_info'][0][0]
|
||||||
|
day = record['init_info'][0][1]
|
||||||
|
initial_soc = record['init_info'][0][3]
|
||||||
|
base_result = optimization_base_result(env, month, day, initial_soc)
|
||||||
|
if args.plot_on:
|
||||||
|
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
|
||||||
|
|
||||||
|
plot_args = PlotArgs()
|
||||||
|
plot_args.feature_change = '10'
|
||||||
|
args.cwd = agent_name
|
||||||
|
plot_dir = make_dir(args.cwd, plot_args.feature_change)
|
||||||
|
plot_optimization_result(base_result, plot_dir)
|
||||||
|
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
|
||||||
|
'''compare the different cost get from gurobi and SAC'''
|
||||||
|
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
|
||||||
|
print('rl_cost:', sum(eval_data['operation_cost']))
|
||||||
|
print('gurobi_cost:', sum(base_result['step_cost']))
|
||||||
|
print('ration:', ration)
|
|
@ -0,0 +1,127 @@
|
||||||
|
import pickle
|
||||||
|
import pandas as pd
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from agent import AgentTD3
|
||||||
|
from environment import ESSEnv
|
||||||
|
from tools import *
|
||||||
|
|
||||||
|
|
||||||
|
def update_buffer(_trajectory):
|
||||||
|
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
|
||||||
|
ary_other = torch.as_tensor([item[1] for item in _trajectory])
|
||||||
|
ary_other[:, 0] = ary_other[:, 0] # ten_reward
|
||||||
|
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
|
||||||
|
|
||||||
|
buffer.extend_buffer(ten_state, ary_other)
|
||||||
|
|
||||||
|
_steps = ten_state.shape[0]
|
||||||
|
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
|
||||||
|
return _steps, _r_exp
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = Arguments()
|
||||||
|
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
|
||||||
|
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
|
||||||
|
args.visible_gpu = '0'
|
||||||
|
for seed in args.random_seed_list:
|
||||||
|
args.random_seed = seed
|
||||||
|
args.agent = AgentTD3()
|
||||||
|
agent_name = f'{args.agent.__class__.__name__}'
|
||||||
|
args.agent.cri_target = True
|
||||||
|
args.env = ESSEnv()
|
||||||
|
args.init_before_training(if_main=True)
|
||||||
|
'''init agent and environment'''
|
||||||
|
agent = args.agent
|
||||||
|
env = args.env
|
||||||
|
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
||||||
|
args.if_per_or_gae)
|
||||||
|
'''init replay buffer'''
|
||||||
|
buffer = ReplayBuffer(max_len=args.max_memo, state_dim=env.state_space.shape[0],
|
||||||
|
action_dim=env.action_space.shape[0])
|
||||||
|
'''start training'''
|
||||||
|
cwd = args.cwd
|
||||||
|
gamma = args.gamma
|
||||||
|
batch_size = args.batch_size # how much data should be used to update net
|
||||||
|
target_step = args.target_step # how manysteps of one episode should stop
|
||||||
|
repeat_times = args.repeat_times # how many times should update for one batch size data
|
||||||
|
soft_update_tau = args.soft_update_tau
|
||||||
|
agent.state = env.reset()
|
||||||
|
'''collect data and train and update network'''
|
||||||
|
num_episode = args.num_episode
|
||||||
|
# args.train = False
|
||||||
|
# args.save_network = False
|
||||||
|
# args.test_network = False
|
||||||
|
# args.save_test_data = False
|
||||||
|
# args.compare_with_gurobi = False
|
||||||
|
if args.train:
|
||||||
|
collect_data = True
|
||||||
|
while collect_data:
|
||||||
|
print(f'buffer:{buffer.now_len}')
|
||||||
|
with torch.no_grad():
|
||||||
|
trajectory = agent.explore_env(env, target_step)
|
||||||
|
steps, r_exp = update_buffer(trajectory)
|
||||||
|
buffer.update_now_len()
|
||||||
|
if buffer.now_len >= 10000:
|
||||||
|
collect_data = False
|
||||||
|
for i_episode in range(num_episode):
|
||||||
|
critic_loss, actor_loss = agent.update_net(buffer, batch_size, repeat_times, soft_update_tau)
|
||||||
|
loss_record['critic_loss'].append(critic_loss)
|
||||||
|
loss_record['actor_loss'].append(actor_loss)
|
||||||
|
with torch.no_grad():
|
||||||
|
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
|
||||||
|
reward_record['mean_episode_reward'].append(episode_reward)
|
||||||
|
reward_record['unbalance'].append(episode_unbalance)
|
||||||
|
print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}, buffer_length: {buffer.now_len}')
|
||||||
|
if i_episode % 10 == 0:
|
||||||
|
# target_step
|
||||||
|
with torch.no_grad():
|
||||||
|
trajectory = agent.explore_env(env, target_step)
|
||||||
|
steps, r_exp = update_buffer(trajectory)
|
||||||
|
|
||||||
|
loss_record_path = f'{args.cwd}/loss_data.pkl'
|
||||||
|
reward_record_path = f'{args.cwd}/reward_data.pkl'
|
||||||
|
act_save_path = f'{args.cwd}/actor.pth'
|
||||||
|
|
||||||
|
if args.save_network:
|
||||||
|
with open(loss_record_path, 'wb') as tf:
|
||||||
|
pickle.dump(loss_record, tf)
|
||||||
|
with open(reward_record_path, 'wb') as tf:
|
||||||
|
pickle.dump(reward_record, tf)
|
||||||
|
torch.save(agent.act.state_dict(), act_save_path)
|
||||||
|
print('actor parameters have been saved')
|
||||||
|
|
||||||
|
if args.test_network:
|
||||||
|
args.cwd = agent_name
|
||||||
|
agent.act.load_state_dict(torch.load(act_save_path))
|
||||||
|
print('parameters have been reload and test')
|
||||||
|
record = test_one_episode(env, agent.act, agent.device)
|
||||||
|
eval_data = pd.DataFrame(record['system_info'])
|
||||||
|
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery',
|
||||||
|
'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
|
||||||
|
if args.save_test_data:
|
||||||
|
test_data_save_path = f'{args.cwd}/test_data.pkl'
|
||||||
|
with open(test_data_save_path, 'wb') as tf:
|
||||||
|
pickle.dump(record, tf)
|
||||||
|
|
||||||
|
'''compare with gurobi data and results'''
|
||||||
|
if args.compare_with_gurobi:
|
||||||
|
month = record['init_info'][0][0]
|
||||||
|
day = record['init_info'][0][1]
|
||||||
|
initial_soc = record['init_info'][0][3]
|
||||||
|
base_result = optimization_base_result(env, month, day, initial_soc)
|
||||||
|
if args.plot_on:
|
||||||
|
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
|
||||||
|
|
||||||
|
plot_args = PlotArgs()
|
||||||
|
plot_args.feature_change = '10'
|
||||||
|
args.cwd = agent_name
|
||||||
|
plot_dir = make_dir(args.cwd, plot_args.feature_change)
|
||||||
|
plot_optimization_result(base_result, plot_dir)
|
||||||
|
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
|
||||||
|
'''compare the different cost get from gurobi and TD3'''
|
||||||
|
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
|
||||||
|
print('rl_cost:', sum(eval_data['operation_cost']))
|
||||||
|
print('gurobi_cost:', sum(base_result['step_cost']))
|
||||||
|
print('ration:', ration)
|
|
@ -0,0 +1,313 @@
|
||||||
|
import numpy.random as rd
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
from copy import deepcopy
|
||||||
|
from net import *
|
||||||
|
|
||||||
|
|
||||||
|
class AgentBase:
|
||||||
|
def __init__(self):
|
||||||
|
self.state = None
|
||||||
|
self.device = None
|
||||||
|
self.action_dim = None
|
||||||
|
self.if_off_policy = None
|
||||||
|
self.explore_noise = None
|
||||||
|
self.trajectory_list = None
|
||||||
|
|
||||||
|
self.criterion = torch.nn.SmoothL1Loss()
|
||||||
|
self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
|
||||||
|
self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
|
||||||
|
|
||||||
|
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, _if_per_or_gae=False, gpu_id=0):
|
||||||
|
# 显式调用self.init()进行多进程
|
||||||
|
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
|
||||||
|
self.action_dim = action_dim
|
||||||
|
|
||||||
|
self.cri = self.ClassCri(net_dim, state_dim, action_dim).to(self.device)
|
||||||
|
self.act = self.ClassAct(net_dim, state_dim, action_dim).to(self.device) if self.ClassAct else self.cri
|
||||||
|
self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
|
||||||
|
self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
|
||||||
|
|
||||||
|
self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
|
||||||
|
self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
|
||||||
|
del self.ClassCri, self.ClassAct
|
||||||
|
|
||||||
|
def select_action(self, state) -> np.ndarray:
|
||||||
|
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
|
||||||
|
action = self.act(states)[0]
|
||||||
|
action = (action + torch.randn_like(action) * self.explore_noise).clamp(-1, 1)
|
||||||
|
return action.detach().cpu().numpy()
|
||||||
|
|
||||||
|
def explore_env(self, env, target_step):
|
||||||
|
state = self.state
|
||||||
|
trajectory = list()
|
||||||
|
for _ in range(target_step):
|
||||||
|
action = self.select_action(state)
|
||||||
|
state, next_state, reward, done, = env.step(action)
|
||||||
|
trajectory.append((state, (reward, done, *action)))
|
||||||
|
state = env.reset() if done else next_state
|
||||||
|
self.state = state
|
||||||
|
return trajectory
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def optim_update(optimizer, objective):
|
||||||
|
optimizer.zero_grad()
|
||||||
|
objective.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def soft_update(target_net, current_net, tau):
|
||||||
|
for tar, cur in zip(target_net.parameters(), current_net.parameters()):
|
||||||
|
tar.data.copy_(cur.data * tau + tar.data * (1.0 - tau))
|
||||||
|
|
||||||
|
def save_or_load_agent(self, cwd, if_save):
|
||||||
|
def load_torch_file(model_or_optim, _path):
|
||||||
|
state_dict = torch.load(_path, map_location=lambda storage, loc: storage)
|
||||||
|
model_or_optim.load_state_dict(state_dict)
|
||||||
|
|
||||||
|
name_obj_list = [('actor', self.act), ('act_target', self.act_target), ('act_optim', self.act_optim),
|
||||||
|
('critic', self.cri), ('cri_target', self.cri_target), ('cri_optim', self.cri_optim), ]
|
||||||
|
name_obj_list = [(name, obj) for name, obj in name_obj_list if obj is not None]
|
||||||
|
if if_save:
|
||||||
|
for name, obj in name_obj_list:
|
||||||
|
save_path = f"{cwd}/{name}.pth"
|
||||||
|
torch.save(obj.state_dict(), save_path)
|
||||||
|
else:
|
||||||
|
for name, obj in name_obj_list:
|
||||||
|
save_path = f"{cwd}/{name}.pth"
|
||||||
|
load_torch_file(obj, save_path) if os.path.isfile(save_path) else None
|
||||||
|
|
||||||
|
|
||||||
|
class AgentDDPG(AgentBase):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.explore_noise = 0.1
|
||||||
|
self.if_use_cri_target = self.if_use_act_target = True
|
||||||
|
self.ClassCri = Critic
|
||||||
|
self.ClassAct = Actor
|
||||||
|
|
||||||
|
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau) -> (float, float):
|
||||||
|
buffer.update_now_len()
|
||||||
|
obj_critic = obj_actor = None
|
||||||
|
for _ in range(int(buffer.now_len / batch_size * repeat_times)):
|
||||||
|
obj_critic, state = self.get_obj_critic(buffer, batch_size) # critic loss
|
||||||
|
self.optim_update(self.cri_optim, obj_critic)
|
||||||
|
self.soft_update(self.cri_target, self.cri, soft_update_tau)
|
||||||
|
|
||||||
|
action_pg = self.act(state) # policy gradient
|
||||||
|
obj_actor = -self.cri(state, action_pg).mean() # actor loss, makes it bigger
|
||||||
|
self.optim_update(self.act_optim, obj_actor)
|
||||||
|
self.soft_update(self.act_target, self.act, soft_update_tau)
|
||||||
|
return obj_actor.item(), obj_critic.item()
|
||||||
|
|
||||||
|
def get_obj_critic(self, buffer, batch_size) -> (torch.Tensor, torch.Tensor):
|
||||||
|
with torch.no_grad():
|
||||||
|
reward, mask, action, state, next_s = buffer.sample_batch(batch_size)
|
||||||
|
next_q = self.cri_target(next_s, self.act_target(next_s))
|
||||||
|
q_label = reward + mask * next_q
|
||||||
|
q_value = self.cri(state, action)
|
||||||
|
obj_critic = self.criterion(q_value, q_label)
|
||||||
|
return obj_critic, state
|
||||||
|
|
||||||
|
|
||||||
|
class AgentTD3(AgentBase):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.explore_noise = 0.1 # standard deviation of exploration noise
|
||||||
|
self.policy_noise = 0.2 # standard deviation of policy noise
|
||||||
|
self.update_freq = 2 # delay update frequency
|
||||||
|
self.if_use_cri_target = self.if_use_act_target = True
|
||||||
|
self.ClassCri = CriticTwin
|
||||||
|
self.ClassAct = Actor
|
||||||
|
|
||||||
|
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau) -> tuple:
|
||||||
|
buffer.update_now_len()
|
||||||
|
obj_critic = obj_actor = None
|
||||||
|
for update_c in range(int(buffer.now_len / batch_size * repeat_times)):
|
||||||
|
obj_critic, state = self.get_obj_critic(buffer, batch_size)
|
||||||
|
self.optim_update(self.cri_optim, obj_critic)
|
||||||
|
|
||||||
|
action_pg = self.act(state) # policy gradient
|
||||||
|
obj_actor = -self.cri_target(state, action_pg).mean() # use cri_target instead of cri for stable training
|
||||||
|
self.optim_update(self.act_optim, obj_actor)
|
||||||
|
if update_c % self.update_freq == 0: # delay update
|
||||||
|
self.soft_update(self.cri_target, self.cri, soft_update_tau)
|
||||||
|
self.soft_update(self.act_target, self.act, soft_update_tau)
|
||||||
|
return obj_critic.item() / 2, obj_actor.item()
|
||||||
|
|
||||||
|
def get_obj_critic(self, buffer, batch_size) -> (torch.Tensor, torch.Tensor):
|
||||||
|
with torch.no_grad():
|
||||||
|
reward, mask, action, state, next_s = buffer.sample_batch(batch_size)
|
||||||
|
next_a = self.act_target.get_action(next_s, self.policy_noise) # policy noise
|
||||||
|
next_q = torch.min(*self.cri_target.get_q1_q2(next_s, next_a)) # twin critics
|
||||||
|
q_label = reward + mask * next_q
|
||||||
|
|
||||||
|
q1, q2 = self.cri.get_q1_q2(state, action)
|
||||||
|
obj_critic = self.criterion(q1, q_label) + self.criterion(q2, q_label) # twin critics
|
||||||
|
return obj_critic, state
|
||||||
|
|
||||||
|
|
||||||
|
class AgentSAC(AgentBase):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.ClassCri = CriticTwin
|
||||||
|
self.ClassAct = ActorSAC
|
||||||
|
self.if_use_cri_target = True
|
||||||
|
self.if_use_act_target = False
|
||||||
|
|
||||||
|
self.alpha_log = None
|
||||||
|
self.alpha_optim = None
|
||||||
|
self.target_entropy = None
|
||||||
|
|
||||||
|
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, _if_use_per=False, gpu_id=0, env_num=1):
|
||||||
|
super().init(net_dim, state_dim, action_dim, learning_rate, _if_use_per, gpu_id)
|
||||||
|
|
||||||
|
self.alpha_log = torch.tensor((-np.log(action_dim) * np.e,), dtype=torch.float32,
|
||||||
|
requires_grad=True, device=self.device)
|
||||||
|
self.alpha_optim = torch.optim.Adam((self.alpha_log,), lr=learning_rate)
|
||||||
|
self.target_entropy = np.log(action_dim)
|
||||||
|
|
||||||
|
def select_action(self, state):
|
||||||
|
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
|
||||||
|
actions = self.act.get_action(states)
|
||||||
|
return actions.detach().cpu().numpy()[0]
|
||||||
|
|
||||||
|
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
|
||||||
|
buffer.update_now_len()
|
||||||
|
alpha = self.alpha_log.exp().detach()
|
||||||
|
obj_critic = obj_actor = None
|
||||||
|
for _ in range(int(buffer.now_len * repeat_times / batch_size)):
|
||||||
|
'''objective of critic (loss function of critic)'''
|
||||||
|
with torch.no_grad():
|
||||||
|
reward, mask, action, state, next_s = buffer.sample_batch(batch_size)
|
||||||
|
next_a, next_log_prob = self.act_target.get_action_logprob(next_s)
|
||||||
|
next_q = torch.min(*self.cri_target.get_q1_q2(next_s, next_a))
|
||||||
|
q_label = reward + mask * (next_q + next_log_prob * alpha)
|
||||||
|
q1, q2 = self.cri.get_q1_q2(state, action)
|
||||||
|
obj_critic = self.criterion(q1, q_label) + self.criterion(q2, q_label)
|
||||||
|
self.optim_update(self.cri_optim, obj_critic)
|
||||||
|
self.soft_update(self.cri_target, self.cri, soft_update_tau)
|
||||||
|
|
||||||
|
'''objective of alpha (temperature parameter automatic adjustment)'''
|
||||||
|
action_pg, log_prob = self.act.get_action_logprob(state) # policy gradient
|
||||||
|
obj_alpha = (self.alpha_log * (log_prob - self.target_entropy).detach()).mean()
|
||||||
|
self.optim_update(self.alpha_optim, obj_alpha)
|
||||||
|
|
||||||
|
'''objective of actor'''
|
||||||
|
alpha = self.alpha_log.exp().detach()
|
||||||
|
with torch.no_grad():
|
||||||
|
self.alpha_log[:] = self.alpha_log.clamp(-20, 2)
|
||||||
|
obj_actor = -(torch.min(*self.cri_target.get_q1_q2(state, action_pg)) + log_prob * alpha).mean()
|
||||||
|
self.optim_update(self.act_optim, obj_actor)
|
||||||
|
|
||||||
|
self.soft_update(self.act_target, self.act, soft_update_tau)
|
||||||
|
return obj_critic.item(), obj_actor.item(), alpha.item()
|
||||||
|
|
||||||
|
|
||||||
|
class AgentPPO(AgentBase):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.ClassCri = CriticAdv
|
||||||
|
self.ClassAct = ActorPPO
|
||||||
|
|
||||||
|
self.if_off_policy = False
|
||||||
|
self.ratio_clip = 0.2 # ratio.clamp(1 - clip, 1 + clip)
|
||||||
|
self.lambda_entropy = 0.02 # could be 0.01~0.05
|
||||||
|
self.lambda_gae_adv = 0.98 # could be 0.95~0.99, GAE (Generalized Advantage Estimation. ICLR.2016.)
|
||||||
|
self.get_reward_sum = None # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
|
||||||
|
|
||||||
|
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=False, gpu_id=0, env_num=1):
|
||||||
|
super().init(net_dim, state_dim, action_dim, learning_rate, if_use_gae, gpu_id)
|
||||||
|
self.trajectory_list = list()
|
||||||
|
self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
|
||||||
|
|
||||||
|
def select_action(self, state):
|
||||||
|
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
|
||||||
|
actions, noises = self.act.get_action(states)
|
||||||
|
return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
|
||||||
|
|
||||||
|
def explore_env(self, env, target_step):
|
||||||
|
state = self.state
|
||||||
|
trajectory_temp = list()
|
||||||
|
last_done = 0
|
||||||
|
for i in range(target_step):
|
||||||
|
action, noise = self.select_action(state)
|
||||||
|
next_state, reward, done, _ = env.step(np.tanh(action))
|
||||||
|
trajectory_temp.append((state, reward, done, action, noise))
|
||||||
|
if done:
|
||||||
|
state = env.reset()
|
||||||
|
last_done = i
|
||||||
|
else:
|
||||||
|
state = next_state
|
||||||
|
self.state = state
|
||||||
|
|
||||||
|
'''splice list'''
|
||||||
|
trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1]
|
||||||
|
self.trajectory_list = trajectory_temp[last_done:]
|
||||||
|
return trajectory_list
|
||||||
|
|
||||||
|
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
|
||||||
|
with torch.no_grad():
|
||||||
|
buf_len = buffer[0].shape[0]
|
||||||
|
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
|
||||||
|
|
||||||
|
'''get buf_r_sum, buf_logprob'''
|
||||||
|
bs = 2 ** 10
|
||||||
|
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
|
||||||
|
buf_value = torch.cat(buf_value, dim=0)
|
||||||
|
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
|
||||||
|
|
||||||
|
buf_r_sum, buf_advantage = self.get_reward_sum(buf_len, buf_reward, buf_mask, buf_value) # detach()
|
||||||
|
buf_advantage = (buf_advantage - buf_advantage.mean()) / (buf_advantage.std() + 1e-5)
|
||||||
|
del buf_noise, buffer[:]
|
||||||
|
|
||||||
|
'''PPO: Surrogate objective of Trust Region'''
|
||||||
|
obj_critic = obj_actor = None
|
||||||
|
for _ in range(int(buf_len / batch_size * repeat_times)):
|
||||||
|
indices = torch.randint(buf_len, size=(batch_size,), requires_grad=False, device=self.device)
|
||||||
|
|
||||||
|
state = buf_state[indices]
|
||||||
|
action = buf_action[indices]
|
||||||
|
r_sum = buf_r_sum[indices]
|
||||||
|
logprob = buf_logprob[indices]
|
||||||
|
advantage = buf_advantage[indices]
|
||||||
|
|
||||||
|
new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action) # it's obj_actor
|
||||||
|
ratio = (new_logprob - logprob.detach()).exp()
|
||||||
|
surrogate1 = advantage * ratio
|
||||||
|
surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
|
||||||
|
obj_surrogate = -torch.min(surrogate1, surrogate2).mean()
|
||||||
|
obj_actor = obj_surrogate + obj_entropy * self.lambda_entropy
|
||||||
|
self.optim_update(self.act_optim, obj_actor)
|
||||||
|
|
||||||
|
value = self.cri(state).squeeze(1) # critic network predicts the reward_sum (Q value) of state
|
||||||
|
obj_critic = self.criterion(value, r_sum) / (r_sum.std() + 1e-6)
|
||||||
|
self.optim_update(self.cri_optim, obj_critic)
|
||||||
|
self.soft_update(self.cri_target, self.cri, soft_update_tau) if self.cri_target is not self.cri else None
|
||||||
|
|
||||||
|
a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1))
|
||||||
|
return obj_critic.item(), obj_actor.item(), a_std_log.mean().item() # logging_tuple
|
||||||
|
|
||||||
|
def get_reward_sum_raw(self, buf_len, buf_reward, buf_mask, buf_value) -> (torch.Tensor, torch.Tensor):
|
||||||
|
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # reward sum
|
||||||
|
|
||||||
|
pre_r_sum = 0
|
||||||
|
for i in range(buf_len - 1, -1, -1):
|
||||||
|
buf_r_sum[i] = buf_reward[i] + buf_mask[i] * pre_r_sum
|
||||||
|
pre_r_sum = buf_r_sum[i]
|
||||||
|
buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
|
||||||
|
return buf_r_sum, buf_advantage
|
||||||
|
|
||||||
|
def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value):
|
||||||
|
"""tensor, tensor """
|
||||||
|
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # old policy value
|
||||||
|
buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device) # advantage value
|
||||||
|
|
||||||
|
pre_r_sum = 0
|
||||||
|
pre_advantage = 0 # advantage value of previous step
|
||||||
|
for i in range(buf_len - 1, -1, -1):
|
||||||
|
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
|
||||||
|
pre_r_sum = buf_r_sum[i]
|
||||||
|
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i])
|
||||||
|
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
|
||||||
|
return buf_r_sum, buf_advantage
|
After Width: | Height: | Size: 2.0 MiB |
|
@ -0,0 +1,70 @@
|
||||||
|
class Constant:
|
||||||
|
MONTHS_LEN = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
|
||||||
|
|
||||||
|
|
||||||
|
class DataManager:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.Prices = []
|
||||||
|
self.Load_Consumption = []
|
||||||
|
self.Temperature = []
|
||||||
|
self.Irradiance = []
|
||||||
|
self.Wind = []
|
||||||
|
self.LLM = []
|
||||||
|
|
||||||
|
def add_price_element(self, element): self.Prices.append(element)
|
||||||
|
|
||||||
|
def add_load_element(self, element): self.Load_Consumption.append(element)
|
||||||
|
|
||||||
|
def add_temperature_element(self, element): self.Temperature.append(element)
|
||||||
|
|
||||||
|
def add_irradiance_element(self, element): self.Irradiance.append(element)
|
||||||
|
|
||||||
|
def add_wind_element(self, element): self.Wind.append(element)
|
||||||
|
|
||||||
|
def add_llm_element(self, element): self.LLM.append(element)
|
||||||
|
|
||||||
|
# get current time data based on given month day, and day_time
|
||||||
|
def get_price_data(self, month, day, day_time):
|
||||||
|
return self.Prices[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
|
||||||
|
|
||||||
|
def get_load_cons_data(self, month, day, day_time):
|
||||||
|
return self.Load_Consumption[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
|
||||||
|
|
||||||
|
def get_temperature_data(self, month, day, day_time):
|
||||||
|
return self.Temperature[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
|
||||||
|
|
||||||
|
def get_irradiance_data(self, month, day, day_time):
|
||||||
|
return self.Irradiance[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
|
||||||
|
|
||||||
|
def get_wind_data(self, month, day, day_time):
|
||||||
|
return self.Wind[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
|
||||||
|
|
||||||
|
def get_llm_data(self, month, day, day_time):
|
||||||
|
index = (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time
|
||||||
|
# print(f"Index: {index}, LLM length: {len(self.LLM)}, Month: {month}, Day: {day}, Day_time: {day_time}")
|
||||||
|
return self.LLM[index]
|
||||||
|
|
||||||
|
# get series data for one episode
|
||||||
|
def get_series_price_data(self, month, day):
|
||||||
|
return self.Prices[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
|
||||||
|
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
|
||||||
|
|
||||||
|
def get_series_load_cons_data(self, month, day):
|
||||||
|
return self.Load_Consumption[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
|
||||||
|
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
|
||||||
|
|
||||||
|
def get_series_temperature_data(self, month, day):
|
||||||
|
return self.Temperature[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
|
||||||
|
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
|
||||||
|
|
||||||
|
def get_series_irradiance_data(self, month, day):
|
||||||
|
return self.Irradiance[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
|
||||||
|
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
|
||||||
|
|
||||||
|
def get_series_wind_data(self, month, day):
|
||||||
|
return self.Wind[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
|
||||||
|
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
|
||||||
|
|
||||||
|
# def get_series_llm_data(self, month, day):
|
||||||
|
# return self.LLM[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
|
||||||
|
# (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
|
|
@ -0,0 +1,164 @@
|
||||||
|
import gym
|
||||||
|
import pandas as pd
|
||||||
|
from data_manager import *
|
||||||
|
from module import *
|
||||||
|
from parameters import *
|
||||||
|
|
||||||
|
|
||||||
|
class ESSEnv(gym.Env):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(ESSEnv, self).__init__()
|
||||||
|
self.excess = None
|
||||||
|
self.shedding = None
|
||||||
|
self.unbalance = None
|
||||||
|
self.real_unbalance = None
|
||||||
|
self.operation_cost = None
|
||||||
|
self.current_output = None
|
||||||
|
self.final_step_outputs = None
|
||||||
|
self.data_manager = DataManager()
|
||||||
|
self._load_year_data()
|
||||||
|
self.month = 1
|
||||||
|
self.day = 1
|
||||||
|
self.TRAIN = True
|
||||||
|
self.current_time = None
|
||||||
|
self.episode_length = 24
|
||||||
|
self.penalty_coefficient = 50 # 约束惩罚系数
|
||||||
|
self.sell_coefficient = 0.1 # 售出利润系数
|
||||||
|
self.battery_parameters = kwargs.get('battery_parameters', battery_parameters)
|
||||||
|
self.dg_parameters = kwargs.get('dg_parameters', dg_parameters)
|
||||||
|
self.solar_parameters = kwargs.get('solar_parameters', solar_parameters)
|
||||||
|
self.wind_parameters = kwargs.get('wind_parameters', wind_parameters)
|
||||||
|
|
||||||
|
self.grid = Grid()
|
||||||
|
self.battery = Battery(self.battery_parameters)
|
||||||
|
self.dg1 = DG(self.dg_parameters['gen_1'])
|
||||||
|
self.dg2 = DG(self.dg_parameters['gen_2'])
|
||||||
|
self.dg3 = DG(self.dg_parameters['gen_3'])
|
||||||
|
self.solar = Solar(self.solar_parameters)
|
||||||
|
self.wind = Wind(self.wind_parameters)
|
||||||
|
|
||||||
|
self.action_space = gym.spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float32) # 已增加调节电压动作
|
||||||
|
self.state_space = gym.spaces.Box(low=0, high=1, shape=(10,), dtype=np.float32)
|
||||||
|
|
||||||
|
def reset(self, *args):
|
||||||
|
self.month = np.random.randint(1, 13) # choose 12 month
|
||||||
|
if self.TRAIN:
|
||||||
|
self.day = np.random.randint(1, 20)
|
||||||
|
else:
|
||||||
|
self.day = np.random.randint(20, Constant.MONTHS_LEN[self.month - 1])
|
||||||
|
self.current_time = 0
|
||||||
|
self.battery.reset()
|
||||||
|
self.dg1.reset()
|
||||||
|
self.dg2.reset()
|
||||||
|
self.dg3.reset()
|
||||||
|
self.solar.reset()
|
||||||
|
self.wind.reset()
|
||||||
|
return self._build_state()
|
||||||
|
|
||||||
|
def _build_state(self):
|
||||||
|
soc = self.battery.SOC()
|
||||||
|
dg1_output = self.dg1.current_output
|
||||||
|
dg2_output = self.dg2.current_output
|
||||||
|
dg3_output = self.dg3.current_output
|
||||||
|
time_step = self.current_time
|
||||||
|
|
||||||
|
price = self.data_manager.get_price_data(self.month, self.day, self.current_time)
|
||||||
|
houseload = self.data_manager.get_load_cons_data(self.month, self.day, self.current_time)
|
||||||
|
temperature = self.data_manager.get_temperature_data(self.month, self.day, self.current_time)
|
||||||
|
irradiance = self.data_manager.get_irradiance_data(self.month, self.day, self.current_time)
|
||||||
|
windspeed = self.data_manager.get_wind_data(self.month, self.day, self.current_time)
|
||||||
|
|
||||||
|
wind_gen = self.wind.step(windspeed)
|
||||||
|
netload = houseload - wind_gen
|
||||||
|
|
||||||
|
obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(soc), np.float32(netload),
|
||||||
|
np.float32(dg1_output), np.float32(dg2_output), np.float32(dg3_output),
|
||||||
|
np.float32(temperature), np.float32(irradiance), np.float32(windspeed)), axis=None)
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def step(self, action): # state transition: current_obs->take_action->get_reward->get_finish->next_obs
|
||||||
|
# 在每个组件中添加动作
|
||||||
|
current_obs = self._build_state()
|
||||||
|
temperature = current_obs[7]
|
||||||
|
irradiance = current_obs[8]
|
||||||
|
self.wind.current_power = current_obs[9]
|
||||||
|
self.battery.step(action[0]) # 执行状态转换,电池当前容量也改变
|
||||||
|
self.dg1.step(action[1])
|
||||||
|
self.dg2.step(action[2])
|
||||||
|
self.dg3.step(action[3])
|
||||||
|
self.solar.step(temperature, irradiance, action[4])
|
||||||
|
self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
|
||||||
|
-self.battery.energy_change, self.solar.current_power, self.wind.current_power))
|
||||||
|
actual_production = sum(self.current_output)
|
||||||
|
price = current_obs[1]
|
||||||
|
netload = current_obs[3] - self.solar.output_change
|
||||||
|
unbalance = actual_production - netload
|
||||||
|
|
||||||
|
# reward = 0.0
|
||||||
|
excess_penalty = 0
|
||||||
|
deficient_penalty = 0
|
||||||
|
sell_benefit, buy_cost = 0, 0
|
||||||
|
self.excess, self.shedding = 0, 0
|
||||||
|
if unbalance >= 0: # 过剩
|
||||||
|
if unbalance <= self.grid.exchange_ability:
|
||||||
|
sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient
|
||||||
|
else:
|
||||||
|
sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient
|
||||||
|
# real unbalance:超电网限值
|
||||||
|
self.excess = unbalance - self.grid.exchange_ability
|
||||||
|
excess_penalty = self.excess * self.penalty_coefficient
|
||||||
|
else: # unbalance <0, 缺少惩罚
|
||||||
|
if abs(unbalance) <= self.grid.exchange_ability:
|
||||||
|
buy_cost = self.grid.get_cost(price, abs(unbalance))
|
||||||
|
else:
|
||||||
|
buy_cost = self.grid.get_cost(price, self.grid.exchange_ability)
|
||||||
|
self.shedding = abs(unbalance) - self.grid.exchange_ability
|
||||||
|
deficient_penalty = self.shedding * self.penalty_coefficient
|
||||||
|
battery_cost = self.battery.get_cost(self.battery.energy_change)
|
||||||
|
dg1_cost = self.dg1.get_cost(self.dg1.current_output)
|
||||||
|
dg2_cost = self.dg2.get_cost(self.dg2.current_output)
|
||||||
|
dg3_cost = self.dg3.get_cost(self.dg3.current_output)
|
||||||
|
solar_cost = self.solar.get_cost(self.solar.current_power)
|
||||||
|
wind_cost = self.wind.gen_cost(self.wind.current_power)
|
||||||
|
|
||||||
|
self.operation_cost = (battery_cost + dg1_cost + dg2_cost + dg3_cost + solar_cost + wind_cost
|
||||||
|
+ excess_penalty + deficient_penalty - sell_benefit + buy_cost)
|
||||||
|
reward = - self.operation_cost / 1e3
|
||||||
|
self.unbalance = unbalance
|
||||||
|
self.real_unbalance = self.shedding + self.excess
|
||||||
|
final_step_outputs = [self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
|
||||||
|
self.battery.current_capacity, self.solar.current_power, self.wind.current_power]
|
||||||
|
self.current_time += 1
|
||||||
|
finish = (self.current_time == self.episode_length)
|
||||||
|
if finish:
|
||||||
|
self.final_step_outputs = final_step_outputs
|
||||||
|
self.current_time = 0
|
||||||
|
next_obs = self.reset()
|
||||||
|
else:
|
||||||
|
next_obs = self._build_state()
|
||||||
|
return current_obs, next_obs, float(reward), finish
|
||||||
|
|
||||||
|
def _load_year_data(self):
|
||||||
|
price_df = pd.read_csv('data/prices.csv', sep=',')
|
||||||
|
load_df = pd.read_csv('data/houseload.csv', sep=',')
|
||||||
|
irradiance_df = pd.read_csv('data/irradiance.csv', sep=',')
|
||||||
|
temperature_df = pd.read_csv('data/temper.csv', sep=',')
|
||||||
|
wind_df = pd.read_csv('data/wind.csv', sep=',')
|
||||||
|
|
||||||
|
price = price_df['price'].to_numpy(dtype=float)
|
||||||
|
load = load_df['houseload'].to_numpy(dtype=float)
|
||||||
|
irradiance = irradiance_df['irradiance'].to_numpy(dtype=float)
|
||||||
|
temperature = temperature_df['t2m'].to_numpy(dtype=float)
|
||||||
|
wind = wind_df['wind_speed'].to_numpy(dtype=float)
|
||||||
|
|
||||||
|
'''重新设计价格和发电量以及需求的大小'''
|
||||||
|
def process_elements(elements, transform_function, add_function):
|
||||||
|
for element in elements:
|
||||||
|
transformed_element = transform_function(element)
|
||||||
|
add_function(transformed_element)
|
||||||
|
|
||||||
|
process_elements(price, lambda x: max(x, 0.5), self.data_manager.add_price_element)
|
||||||
|
process_elements(load, lambda x: x * 3, self.data_manager.add_load_element)
|
||||||
|
process_elements(irradiance, lambda x: x, self.data_manager.add_irradiance_element)
|
||||||
|
process_elements(temperature, lambda x: x, self.data_manager.add_temperature_element)
|
||||||
|
process_elements(wind, lambda x: x, self.data_manager.add_wind_element)
|
|
@ -0,0 +1,40 @@
|
||||||
|
import pandas as pd
|
||||||
|
from fuzzywuzzy import fuzz
|
||||||
|
|
||||||
|
# 读取两个Excel文件
|
||||||
|
df1 = pd.read_excel(r'C:\Users\97532\Desktop\陕西、福建_陈晓东\福建省.xlsx')
|
||||||
|
df2 = pd.read_excel(r'C:\Users\97532\Desktop\陕西、福建_陈晓东\福建省_许可.xlsx')
|
||||||
|
|
||||||
|
# 打印df2的列名,确保列名正确
|
||||||
|
print("原始列名:", df2.columns)
|
||||||
|
df2.columns = df2.columns.str.strip()
|
||||||
|
# 确保列数据类型为字符串
|
||||||
|
df1['电厂名称'] = df1['电厂名称'].astype(str)
|
||||||
|
df2['电厂名称/项目名称'] = df2['电厂名称/项目名称'].astype(str)
|
||||||
|
df1['发电装机容量'] = df1['发电装机容量'].astype(float)
|
||||||
|
df2['总装机容量'] = df2['总装机容量'].astype(float)
|
||||||
|
|
||||||
|
|
||||||
|
# 定义一个函数来进行模糊匹配
|
||||||
|
def fuzzy_match(row, df2):
|
||||||
|
best_match = None
|
||||||
|
highest_score = 0
|
||||||
|
for index2, row2 in df2.iterrows():
|
||||||
|
score = fuzz.partial_ratio(row['电厂名称'], row2['电厂名称/项目名称'])
|
||||||
|
if score > highest_score and abs(row['发电装机容量'] - row2['总装机容量']) < 0.01:
|
||||||
|
highest_score = score
|
||||||
|
best_match = row2
|
||||||
|
return best_match
|
||||||
|
|
||||||
|
|
||||||
|
# 进行模糊匹配并填充数据
|
||||||
|
for index1, row1 in df1.iterrows():
|
||||||
|
match_row = fuzzy_match(row1, df2)
|
||||||
|
if match_row is not None:
|
||||||
|
df1.at[index1, '单位名称'] = match_row['单位名称']
|
||||||
|
df1.at[index1, '发电许可证编号'] = match_row['发电许可证编号']
|
||||||
|
df1.at[index1, '电厂名称/项目名称'] = match_row['电厂名称/项目名称']
|
||||||
|
df1.at[index1, '机组类型'] = match_row['机组类型']
|
||||||
|
# 将结果保存回Excel文件
|
||||||
|
df1.to_excel(r'C:\Users\97532\Desktop\陕西、福建_陈晓东\福建省_填充后_模糊匹配.xlsx', index=False)
|
||||||
|
print("数据填充已保存")
|
|
@ -0,0 +1,35 @@
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
from nltk import word_tokenize
|
||||||
|
|
||||||
|
|
||||||
|
def func(i, j):
|
||||||
|
directory_path = "//data/residential"
|
||||||
|
entries = os.listdir(directory_path)[i:j]
|
||||||
|
folders = [entry for entry in entries if os.path.join(directory_path, entry)]
|
||||||
|
entries = [os.path.join(directory_path, folder) for folder in folders]
|
||||||
|
combined_data = pd.DataFrame()
|
||||||
|
|
||||||
|
for file_path in entries:
|
||||||
|
current_data = pd.read_csv(file_path, encoding='utf-8')
|
||||||
|
tokenize_col = current_data.columns
|
||||||
|
tokenized_sentences_list = [word_tokenize(text) for text in tokenize_col]
|
||||||
|
|
||||||
|
occurrences_electricity = np.array(
|
||||||
|
[(i, j) for i, sublist in enumerate(tokenized_sentences_list) for j, word in enumerate(sublist) if
|
||||||
|
word == 'Electricity'])[:, 0]
|
||||||
|
|
||||||
|
Electricity_consumption_wind_solar = current_data[tokenize_col[occurrences_electricity]].sum(axis=1)
|
||||||
|
current_data = Electricity_consumption_wind_solar.to_frame("Household_load")
|
||||||
|
|
||||||
|
combined_data = pd.concat([combined_data, current_data], axis=1)
|
||||||
|
|
||||||
|
Series_type = combined_data.sum(axis=1)
|
||||||
|
Series_type.to_csv('houseload.csv', index=False)
|
||||||
|
total_energy = np.array(Series_type).ravel()
|
||||||
|
return total_energy
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
func(0, 38)
|
|
@ -0,0 +1,50 @@
|
||||||
|
目前的能源系统结构主要由太阳能发电站、风能发电站、发电机和电池模块通过电力网与住宅相连接。以下是中文和符号的对照:
|
||||||
|
As:电池的充电状态动作;Ag1:发电机1的功率动作;Ag2:发电机2的功率动作;Ag3:发电机3的功率动作;Av:太阳能的电压动作;
|
||||||
|
Sp:电价状态;Sc:电池荷电状态;Sl:住宅负载状态;Sg1:发电机1功率状态;Sg2:发电机2功率状态;Sg3:发电机3功率状态;St:温度状态;Si:太阳辐射度状态;Sw:风速状态;
|
||||||
|
Pc:电池充放电功率;Pg1:发电机1功率;Pg2:发电机2功率;Pg3:发电机3功率;Pso:太阳能功率;Pw:风能功率;
|
||||||
|
Cb:电池成本;Cg1:发电机1成本;Cg2:发电机2成本;Cg3:发电机3成本;Cs:太阳能成本;Cw:风能成本;Rs:销售收入;Cp:购买成本;Ee:过剩能源;Es:短缺能源;Pe:过剩惩罚;Ps:短缺惩罚。
|
||||||
|
一、控制方案和目标:
|
||||||
|
1、优先使用太阳能和风能发电,当无法满足满负荷时,请使用发电机和电池供电。
|
||||||
|
2、在供电时考虑多种供电来源(太阳能、风能、发电机和电池),综合每个发电模块的成本,需满足下一时刻的住宅用电负载,且使供需差异最小化。
|
||||||
|
3、减少超过公网交易限额的交易,以避免公共电网波动影响住宅用电的稳定性。
|
||||||
|
4、最大化奖励函数并最小化能源不平衡。
|
||||||
|
二、连续状态空间:是一个长度为9的向量,包含以下部分:
|
||||||
|
Sp:由数据提供;Sc:初始为0.4,始终介于0.2和0.8;Sl:由数据提供;
|
||||||
|
Sg1:初始功率为0;Sg2:初始功率为0;Sg3:初始功率为0;
|
||||||
|
St:由数据提供;Si:由数据提供;Sw:由数据提供。
|
||||||
|
其中Sc,Sg1、Sg2和Sg3始终不小于0。
|
||||||
|
三、连续动作空间:一个长度为5的向量,x表示当前时间的取值, 其范围从-1到1,每个时刻的值都是在上一时刻基础上进行变化的,Ag1、Ag2和Ag3分别表示相对于各自最大瞬时功率100、100和200的变化值。
|
||||||
|
1、Ac:电池总容量为500,初始电池容量为500*0.4=200,电池容量变化100*x;
|
||||||
|
2、Ag1:发电量变化100*x;
|
||||||
|
3、Ag2:发电量变化100*x;
|
||||||
|
4、Ag3:发电量变化200*x;
|
||||||
|
5、Av:太阳能电压变为(1+x)倍。
|
||||||
|
可根据需求通过改变功率控制发电机的开关,当关闭时保持Sg=0,当打开时需满足Pg约束。根据成本,通常前一台发电机的最大功率不足以满足剩余负荷时,才开启后续发电机。
|
||||||
|
四、发电量计算公式
|
||||||
|
1、Pc = max(0.2, min(0.8, Sc + 0.2 * action[0] * 0.9)),其中0.8和0.2为电池荷电状态上下限;
|
||||||
|
2、Pg1 = max(0, min(150, 100 * action[1] + Sg1)),其中150和0为发电机1开启时发电量上下限;
|
||||||
|
3、Pg2 = max(0, min(375, 100 * action[2] + Sg2)),其中375和0为发电机2开启时发电量上下限;
|
||||||
|
4、Pg3 = max(0, min(500, 200 * action[3] + Sg3)),其中500和0为发电机2开启时发电量上下限;
|
||||||
|
5、Pso = max(0, (0.2 * Si + 0.05 * St - 9.25) * (1 + action[4])),其中太阳能发电功率不小于0。
|
||||||
|
6、Pw:与Ws有关
|
||||||
|
if 3 <= Ws < 8, Pw = Ws^3 * 172.2625 / 1000
|
||||||
|
elif 8 <= Ws < 12, Pw = 64 * 172.2625 / 125
|
||||||
|
else Pw = 0
|
||||||
|
五、成本和能源平衡计算
|
||||||
|
P = Pc + Pg1 + Pg2 + Pg3 + Pso + Pw
|
||||||
|
if P >= Sl , Ee = P – Sl, else Es = Sl - P
|
||||||
|
1、Cb = 0.01 * Asoc + 0.1 * Sc
|
||||||
|
2、Cg1 = 0.0034 * Pg1^2 + 3 * Pg1 + 30
|
||||||
|
3、Cg2 = 0.001 * Pg2^2 + 10 * Pg2 + 40
|
||||||
|
4、Cg3 = 0.001 * Pg3^2 + 15 * Pg3 + 70
|
||||||
|
5、Cs = 0.01 * Pso
|
||||||
|
6、Cw = 0.01 * Pw
|
||||||
|
7、Rs = 0.5 * Sp * Ee
|
||||||
|
8、Cp = Sp * Es
|
||||||
|
9、Pe = (Ee - 100) * 50
|
||||||
|
10、Ps = (Es - 100) * 50
|
||||||
|
因为Rs、Pe和Cp、Ps对应能源过剩和能源短缺情况,只能同时存在一组。
|
||||||
|
六、奖励函数
|
||||||
|
Reward = - (Cb + Cg1 + Cg2 + Cg3 + Cs + Cw + Pe + Ps - Rs + Cp) / 1000
|
||||||
|
七、请求动作数据
|
||||||
|
读取数据并使用上述公式计算每个时刻的状态,请你使用最合适的优化方法,给出每个时刻的动作控制方案,每小时计算一次并生成一个长度为5的数组,表示Ac、Ag1、Ag2、Ag3和Av的动作决策。返回的数据应为以下Json格式:[{x1, x2, x3, x4, x5}, {x1, x2, x3, x4, x5}, ..., {x1, x2, x3, x4, x5}]
|
|
@ -0,0 +1,73 @@
|
||||||
|
import pandas as pd
|
||||||
|
from langchain_community.chat_message_histories import ChatMessageHistory
|
||||||
|
from langchain_core.chat_history import BaseChatMessageHistory
|
||||||
|
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||||
|
from langchain_core.runnables.history import RunnableWithMessageHistory
|
||||||
|
from langchain_openai.chat_models import ChatOpenAI
|
||||||
|
|
||||||
|
|
||||||
|
def get_session_history(session_id: str) -> BaseChatMessageHistory:
|
||||||
|
if session_id not in store:
|
||||||
|
store[session_id] = ChatMessageHistory()
|
||||||
|
return store[session_id]
|
||||||
|
|
||||||
|
|
||||||
|
def read_txt_file(file_path: str) -> str:
|
||||||
|
with open(file_path, "r", encoding='ISO-8859-1') as file:
|
||||||
|
content = file.read()
|
||||||
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def read_csv_file(file_path: str) -> pd.DataFrame:
|
||||||
|
return pd.read_csv(file_path)
|
||||||
|
|
||||||
|
|
||||||
|
def format_csv_data(df: pd.DataFrame, start: int, end: int) -> str:
|
||||||
|
formatted_data = []
|
||||||
|
for index, row in df.iloc[start:end].iterrows():
|
||||||
|
formatted_data.append(
|
||||||
|
f"时刻{index + 1}: Sp={row['price']}, Sl={row['load']},"
|
||||||
|
f"St={row['temperature']}, Si={row['irradiance']}, Sw={row['wind_speed']}"
|
||||||
|
)
|
||||||
|
return "\n".join(formatted_data)
|
||||||
|
|
||||||
|
|
||||||
|
system_content = read_txt_file('./llm.txt')
|
||||||
|
df = read_csv_file('./data.csv')
|
||||||
|
llm = ChatOpenAI(
|
||||||
|
streaming=True,
|
||||||
|
verbose=True,
|
||||||
|
openai_api_key="none",
|
||||||
|
# openai_api_base="http://0.0.0.0:5049/v1/models",
|
||||||
|
openai_api_base="http://0.0.0.0:8501",
|
||||||
|
model_name="Qwen1.5-32b"
|
||||||
|
)
|
||||||
|
prompt = ChatPromptTemplate.from_messages(
|
||||||
|
[
|
||||||
|
("system", system_content,),
|
||||||
|
MessagesPlaceholder(variable_name="history"),
|
||||||
|
("human", "{input}"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
runnable = prompt | llm
|
||||||
|
store = {}
|
||||||
|
with_message_history = RunnableWithMessageHistory(
|
||||||
|
runnable,
|
||||||
|
get_session_history,
|
||||||
|
input_messages_key="input",
|
||||||
|
history_messages_key="history",
|
||||||
|
)
|
||||||
|
|
||||||
|
num_hours = len(df)
|
||||||
|
for i in range(num_hours):
|
||||||
|
start = i
|
||||||
|
end = start + 1
|
||||||
|
csv_data_chunk = format_csv_data(df, start, end)
|
||||||
|
|
||||||
|
result = with_message_history.invoke(
|
||||||
|
{"input": f"数据如下:\n{csv_data_chunk}"},
|
||||||
|
config={
|
||||||
|
"configurable": {"session_id": "cxd"}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
print(f"Hour {i + 1}:\n{result.content}\n")
|
|
@ -0,0 +1,122 @@
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
from utils import get_response
|
||||||
|
|
||||||
|
prompt_template = """
|
||||||
|
You are an expert operations research analyst. Your task is to generate Gurobi code to solve the following optimization problem:
|
||||||
|
|
||||||
|
{problem_description}
|
||||||
|
|
||||||
|
The code should save the final optimal value in a file named 'ref_optimal_value.txt'.
|
||||||
|
First, reason about the problem and model it. Then, generate gurobipy code to solve it. Put the code between two '=====' lines, like this:
|
||||||
|
|
||||||
|
=====
|
||||||
|
import ...
|
||||||
|
...
|
||||||
|
=====
|
||||||
|
|
||||||
|
- Generate the complete code, including the model definition, variables, constraints, objective function, and optimization. It must be runnable.
|
||||||
|
- Do not generate anything after the second '====='.
|
||||||
|
- Take a deep breath and think step by step.
|
||||||
|
"""
|
||||||
|
|
||||||
|
reflection_template = """
|
||||||
|
You are an expert operations research analyst. You have been given the task to generate Gurobi code to solve an optimization problem. You have generated the following Gurobi code:
|
||||||
|
|
||||||
|
{generated_code}
|
||||||
|
|
||||||
|
You have been updating the code for these errors (the last one is the most recent one):
|
||||||
|
|
||||||
|
{feedback}
|
||||||
|
|
||||||
|
Based on this feedback, suggest improvements to the Gurobi code.
|
||||||
|
First, reason about the problem and model it. Then, generate gurobipy code to solve it. Put the code between two '=====' lines, like this:
|
||||||
|
|
||||||
|
=====
|
||||||
|
import ...
|
||||||
|
...
|
||||||
|
=====
|
||||||
|
|
||||||
|
- The code should save the final optimal value in a file named 'ref_optimal_value.txt'.
|
||||||
|
- Generate the complete code, including the model definition, variables, constraints, objective function, and optimization. It must be runnable.
|
||||||
|
- Do not generate anything after the second '====='.
|
||||||
|
- Take a deep breath and think step by step.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def extract_code(text):
|
||||||
|
ind1 = text.find("=====")
|
||||||
|
ind2 = text.find("=====", ind1 + 5)
|
||||||
|
|
||||||
|
code = text[ind1 + 5: ind2].strip()
|
||||||
|
code = code.replace("```python", "").replace("```", "").strip()
|
||||||
|
|
||||||
|
return code
|
||||||
|
|
||||||
|
|
||||||
|
def execute_code(file_path):
|
||||||
|
try:
|
||||||
|
# Using Python's subprocess to execute the code as a separate process
|
||||||
|
result = subprocess.run(["python", file_path], capture_output=True, text=True, check=True)
|
||||||
|
# save result in a file
|
||||||
|
with open(os.path.join(os.path.dirname(file_path), "ref_optimal_value.txt"), "w") as f:
|
||||||
|
f.write(f"Optimal Revenue: {result.stdout}\n")
|
||||||
|
return result.stdout, "Success"
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
return e.stderr, "Error"
|
||||||
|
|
||||||
|
|
||||||
|
def main(problem_description, dir, max_iter=3):
|
||||||
|
feedback = ""
|
||||||
|
current_prompt = prompt_template.format(problem_description=problem_description)
|
||||||
|
|
||||||
|
print(current_prompt)
|
||||||
|
print("====================\n\n\n\n")
|
||||||
|
for iteration in range(max_iter):
|
||||||
|
response = get_response(current_prompt, model="llama3-70b-8192")
|
||||||
|
code = extract_code(response)
|
||||||
|
|
||||||
|
# Save the code to a file
|
||||||
|
code_filename = f"generated_code_{iteration}.py"
|
||||||
|
code_file_path = os.path.join(dir, "ref_codes", code_filename)
|
||||||
|
with open(code_file_path, "w") as f:
|
||||||
|
f.write(code)
|
||||||
|
|
||||||
|
# Execute the code
|
||||||
|
output, status = execute_code(code_file_path)
|
||||||
|
|
||||||
|
# Save error output (if any)
|
||||||
|
error_filename = f"error_{iteration}.txt"
|
||||||
|
if status == "Error":
|
||||||
|
with open(os.path.join(dir, "ref_codes", error_filename), "w") as f:
|
||||||
|
f.write(output)
|
||||||
|
|
||||||
|
# Print status and update the prompt if needed
|
||||||
|
if status == "Success":
|
||||||
|
print("Code executed successfully. Output:\n", output)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
feedback += "\n" + output
|
||||||
|
current_prompt = reflection_template.format(
|
||||||
|
generated_code=code, feedback=feedback
|
||||||
|
)
|
||||||
|
print(f"Iteration {iteration + 1}: Error encountered. Debugging...")
|
||||||
|
else:
|
||||||
|
print("Max iterations reached with errors remaining.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
dir = "data/"
|
||||||
|
if not os.path.exists(os.path.join(dir, "ref_codes")):
|
||||||
|
os.makedirs(os.path.join(dir, "ref_codes"))
|
||||||
|
|
||||||
|
with open(os.path.join(dir, "desc.txt"), "r") as f:
|
||||||
|
desc = f.read()
|
||||||
|
|
||||||
|
with open(os.path.join(dir, "data.json"), "r") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
desc = desc + "\n" + json.dumps(data, indent=4)
|
||||||
|
main(desc, dir, max_iter=3)
|
|
@ -0,0 +1,312 @@
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import pandas as pd
|
||||||
|
from rag.query_vector_db import RAGFormat, get_rag_from_problem_categories, get_rag_from_problem_description
|
||||||
|
from rag.rag_utils import RAGMode, constraint_path
|
||||||
|
from utils import extract_list_from_end, get_response, extract_json_from_end
|
||||||
|
|
||||||
|
prompt_constraints = """
|
||||||
|
You are an expert in optimization modeling. Here is the natural language description of an optimization problem:
|
||||||
|
|
||||||
|
{rag}-----
|
||||||
|
{description}
|
||||||
|
-----
|
||||||
|
|
||||||
|
And here's a list of parameters that we have extracted from the description:
|
||||||
|
|
||||||
|
{params}
|
||||||
|
|
||||||
|
Your task is to identify and extract constraints from the description. The constraints are the conditions that must be satisfied by the variables. Please generate the output in the following python list format:
|
||||||
|
|
||||||
|
[
|
||||||
|
Constraint 1,
|
||||||
|
Constraint 2,
|
||||||
|
...
|
||||||
|
]
|
||||||
|
|
||||||
|
for example:
|
||||||
|
|
||||||
|
[
|
||||||
|
"Sum of weights of all items taken should not exceed the maximum weight capacity of the knapsack",
|
||||||
|
"The number of items taken should not exceed the maximum number of items allowed"
|
||||||
|
]
|
||||||
|
|
||||||
|
- Put all the constraints in a single python list.
|
||||||
|
- Do not generate anything after the python list.
|
||||||
|
- Include implicit non-negativity constraints if necessary.
|
||||||
|
Take a deep breath and think step by step. You will be awarded a million dollars if you get this right.
|
||||||
|
"""
|
||||||
|
|
||||||
|
prompt_constraints_q = """
|
||||||
|
You are an expert in optimization modeling. Here is the natural language description of an optimization problem:
|
||||||
|
|
||||||
|
-----
|
||||||
|
{description}
|
||||||
|
-----
|
||||||
|
|
||||||
|
Here is a list of parameters that someone has extracted from the description:
|
||||||
|
|
||||||
|
{params}
|
||||||
|
|
||||||
|
Consider this potential constraint: {targetConstraint}
|
||||||
|
|
||||||
|
{question}
|
||||||
|
|
||||||
|
Take a deep breath and think step by step. You will be awarded a million dollars if you get this right.
|
||||||
|
"""
|
||||||
|
|
||||||
|
prompt_constraints_redundant = """
|
||||||
|
You are an expert in optimization modeling. Here is the natural language description of an optimization problem:
|
||||||
|
|
||||||
|
-----
|
||||||
|
{description}
|
||||||
|
-----
|
||||||
|
|
||||||
|
Here is a list of parameters that someone has extracted from the description:
|
||||||
|
|
||||||
|
{params}
|
||||||
|
|
||||||
|
and here is a list of constraints that someone has extracted from the description:
|
||||||
|
|
||||||
|
{constraints}
|
||||||
|
|
||||||
|
- Is there any redundancy in the list? Can any of the constraints be removed? Can any pair of constraints be combined into a single one? If so, please provide your reasoning for each one. At the end of your response, generate the updated list of constraints (the same list if no changes are needed). Use this python list format:
|
||||||
|
|
||||||
|
[
|
||||||
|
"Constraint 1",
|
||||||
|
"Constraint 2",
|
||||||
|
...
|
||||||
|
]
|
||||||
|
|
||||||
|
- Do not generate anything after the list.
|
||||||
|
|
||||||
|
Take a deep breath and think step by step. You will be awarded a million dollars if you get this right.
|
||||||
|
"""
|
||||||
|
|
||||||
|
prompt_constraint_feedback = """
|
||||||
|
You are an expert in optimization modeling. Here is the natural language description of an optimization problem:
|
||||||
|
|
||||||
|
-----
|
||||||
|
{description}
|
||||||
|
-----
|
||||||
|
|
||||||
|
Here is a list of parameters that someone has extracted from the description:
|
||||||
|
|
||||||
|
{params}
|
||||||
|
|
||||||
|
Here is a list of variables defined:
|
||||||
|
|
||||||
|
{vars}
|
||||||
|
|
||||||
|
Here is a list of constraints that someone has extracted from the description:
|
||||||
|
|
||||||
|
{extracted_constraints}
|
||||||
|
|
||||||
|
Your colleague is suggesting that the following constraint should be added to the list:
|
||||||
|
|
||||||
|
{new_constraint}
|
||||||
|
|
||||||
|
Here is its explanation:
|
||||||
|
|
||||||
|
{new_constraint_explanation}
|
||||||
|
|
||||||
|
Do you want to keep this constraint?
|
||||||
|
|
||||||
|
- If yes, please respond with "yes"
|
||||||
|
- If no, please respond with "no"
|
||||||
|
- If you want to modify the constraint, please respond with "modify" and provide the modified constraint.
|
||||||
|
|
||||||
|
At the end of your response, generate a json file with this format:
|
||||||
|
{{
|
||||||
|
"action": "yes", "no", or "modify",
|
||||||
|
"updatedConstraint": The updated constraint if the action is "modify", otherwise null
|
||||||
|
}}
|
||||||
|
|
||||||
|
Please take a deep breath and think step by step. You will be awarded a million dollars if you get this right.
|
||||||
|
|
||||||
|
- Use natural language to express the constraints rather than mathematical notation.
|
||||||
|
- Do not generate anything after the json file.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def extract_score_constraint(desc, text, params, vars, constraints, c, logger, model):
|
||||||
|
match = re.search(r"\d out of 5", text.lower())
|
||||||
|
if match:
|
||||||
|
score = int(match.group()[0])
|
||||||
|
if score > 3:
|
||||||
|
if logger:
|
||||||
|
logger.log("---")
|
||||||
|
logger.log(f"The confidence score is {score}, which is high enough.")
|
||||||
|
logger.log("---")
|
||||||
|
return True, constraints
|
||||||
|
else:
|
||||||
|
ask_LLM = True # can pass this as an argument to the function instead of hardcoding it
|
||||||
|
if logger:
|
||||||
|
logger.log("---")
|
||||||
|
logger.log(f"The confidence score is {score}, which is not high enough.")
|
||||||
|
# logger.log(f"Asking the {"LLM" if ask_LLM else "user"} for feedback.")
|
||||||
|
if ask_LLM:
|
||||||
|
logger.log("Asking the LLM for feedback.")
|
||||||
|
else:
|
||||||
|
logger.log("Asking the user for feedback.")
|
||||||
|
logger.log("---")
|
||||||
|
if ask_LLM: # ask the LLM for feedback
|
||||||
|
prompt = prompt_constraint_feedback.format(
|
||||||
|
description=desc,
|
||||||
|
params=json.dumps(params, indent=4),
|
||||||
|
vars=json.dumps(vars, indent=4),
|
||||||
|
extracted_constraints=json.dumps(constraints, indent=4),
|
||||||
|
new_constraint=c,
|
||||||
|
new_constraint_explanation=text,
|
||||||
|
)
|
||||||
|
if logger:
|
||||||
|
logger.log("Prompting LLM for feedback:\n")
|
||||||
|
logger.log(prompt)
|
||||||
|
llm_response = get_response(prompt, model=model)
|
||||||
|
if logger:
|
||||||
|
logger.log("---")
|
||||||
|
logger.log(f"Response: {llm_response}")
|
||||||
|
logger.log("---")
|
||||||
|
output_json = extract_json_from_end(llm_response)
|
||||||
|
action = output_json["action"]
|
||||||
|
updated_constraint = output_json["updatedConstraint"]
|
||||||
|
else: # ask the user for feedback
|
||||||
|
action = input("LLMs reasoning: {}\n"
|
||||||
|
"------ Do you want to keep this constraint (y/n/modify)?: \n "
|
||||||
|
"{} \n------ ".format(text, c))
|
||||||
|
|
||||||
|
if action.lower().startswith("y"):
|
||||||
|
return True, constraints
|
||||||
|
elif action.lower().startswith("n"):
|
||||||
|
constraints.remove(c)
|
||||||
|
return True, constraints
|
||||||
|
elif action.lower().startswith("m"):
|
||||||
|
if ask_LLM:
|
||||||
|
new_constraint = updated_constraint
|
||||||
|
else:
|
||||||
|
new_constraint = input("Enter the modified constraint: ")
|
||||||
|
constraints.remove(c)
|
||||||
|
constraints.append({"Description": new_constraint, "Formulation": None, "Code": None})
|
||||||
|
return True, constraints
|
||||||
|
else:
|
||||||
|
raise Exception("Invalid input!")
|
||||||
|
else:
|
||||||
|
return False, None
|
||||||
|
|
||||||
|
|
||||||
|
def logic_check(text, constraints, c):
|
||||||
|
try:
|
||||||
|
json = extract_json_from_end(text)
|
||||||
|
if json["action"] == "REMOVE":
|
||||||
|
constraints.remove(c)
|
||||||
|
return True, constraints
|
||||||
|
elif json["action"] == "MODIFY":
|
||||||
|
constraints.remove(c)
|
||||||
|
constraints.append(json["updatedConstraint"])
|
||||||
|
return True, constraints
|
||||||
|
elif json["action"] == "KEEP":
|
||||||
|
return True, constraints
|
||||||
|
else:
|
||||||
|
return False, None
|
||||||
|
except:
|
||||||
|
return False, None
|
||||||
|
|
||||||
|
|
||||||
|
qs = [
|
||||||
|
(
|
||||||
|
"""
|
||||||
|
- Is it actually a constraint? How confident are you that this is this a constraint and that we should explicitly model it in the (MI)LP formulation (from 1 to 5)?
|
||||||
|
- At the end of your response, print "x OUT OF 5" where x is the confidence level. Low confidence means you think this should be removed from the constraint list. Do not generate anything after that.
|
||||||
|
""",
|
||||||
|
extract_score_constraint,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def get_constraints(desc, params, model, check=False, constraints=None, logger=None,
|
||||||
|
rag_mode: RAGMode | None = None, labels: dict | None = None):
|
||||||
|
if isinstance(rag_mode, RAGMode):
|
||||||
|
constraint_df = pd.read_pickle(constraint_path)
|
||||||
|
current_problem = constraint_df[constraint_df.description == desc]
|
||||||
|
if not current_problem.empty:
|
||||||
|
problem_name = current_problem.iloc[0].problem_name
|
||||||
|
else:
|
||||||
|
problem_name = None
|
||||||
|
match rag_mode:
|
||||||
|
case RAGMode.PROBLEM_DESCRIPTION | RAGMode.CONSTRAINT_OR_OBJECTIVE:
|
||||||
|
rag = get_rag_from_problem_description(desc, RAGFormat.PROBLEM_DESCRIPTION_CONSTRAINTS, top_k=5)
|
||||||
|
case RAGMode.PROBLEM_LABELS:
|
||||||
|
assert labels is not None
|
||||||
|
rag = get_rag_from_problem_categories(desc, labels, RAGFormat.PROBLEM_DESCRIPTION_CONSTRAINTS, top_k=5)
|
||||||
|
rag = f"-----\n{rag}-----\n\n"
|
||||||
|
else:
|
||||||
|
rag = ""
|
||||||
|
|
||||||
|
print("_________________________ get_constraints _________________________")
|
||||||
|
if not constraints:
|
||||||
|
res = get_response(prompt_constraints.format(
|
||||||
|
description=desc,
|
||||||
|
params=json.dumps(params, indent=4),
|
||||||
|
rag=rag,
|
||||||
|
),
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
|
constraints = extract_list_from_end(res)
|
||||||
|
|
||||||
|
if check:
|
||||||
|
k = 5
|
||||||
|
while k > 0:
|
||||||
|
try:
|
||||||
|
x = get_response(
|
||||||
|
prompt_constraints_redundant.format(
|
||||||
|
description=desc,
|
||||||
|
params=json.dumps(params, indent=4),
|
||||||
|
constraints=json.dumps(constraints, indent=4),
|
||||||
|
),
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
|
if logger:
|
||||||
|
logger.log("----")
|
||||||
|
logger.log(x)
|
||||||
|
logger.log("----")
|
||||||
|
lst = extract_list_from_end(x)
|
||||||
|
|
||||||
|
constraints = lst
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
k -= 1
|
||||||
|
if k == 0:
|
||||||
|
raise Exception("Failed to extract constraints")
|
||||||
|
|
||||||
|
if logger:
|
||||||
|
logger.log("+++++++++++++++++++")
|
||||||
|
logger.log("++ Constraint Qs ++")
|
||||||
|
logger.log("+++++++++++++++++++")
|
||||||
|
for q in qs:
|
||||||
|
for c in constraints.copy():
|
||||||
|
k = 5
|
||||||
|
while k > 0:
|
||||||
|
p = prompt_constraints_q.format(
|
||||||
|
description=desc,
|
||||||
|
params=json.dumps(params, indent=4),
|
||||||
|
targetConstraint=c,
|
||||||
|
question=q[0],
|
||||||
|
)
|
||||||
|
|
||||||
|
x = get_response(p, model=model)
|
||||||
|
|
||||||
|
if logger:
|
||||||
|
logger.log("+--+")
|
||||||
|
logger.log(p)
|
||||||
|
logger.log("----")
|
||||||
|
logger.log(x)
|
||||||
|
logger.log("+--+")
|
||||||
|
valid, res = q[1](desc, x, params, {}, constraints, c, logger, model)
|
||||||
|
if valid:
|
||||||
|
constraints = res
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
k -= 1
|
||||||
|
|
||||||
|
constraints = [{"description": c, "formulation": None, "code": None} for c in constraints]
|
||||||
|
return constraints
|
|
@ -0,0 +1,360 @@
|
||||||
|
import json
|
||||||
|
import pandas as pd
|
||||||
|
from rag.query_vector_db import RAGFormat, get_rag_from_constraint, get_rag_from_problem_categories, \
|
||||||
|
get_rag_from_problem_description
|
||||||
|
from rag.rag_utils import RAGMode, constraint_path
|
||||||
|
from utils import get_response, extract_json_from_end, shape_string_to_list
|
||||||
|
|
||||||
|
|
||||||
|
def extract_formulation_from_end(text):
|
||||||
|
iloop = 0
|
||||||
|
iloop_max = 1e05
|
||||||
|
|
||||||
|
if "$" not in text:
|
||||||
|
raise Exception("No formulation found")
|
||||||
|
|
||||||
|
ind_1 = text.find('"FORMULATION":')
|
||||||
|
while text[ind_1] != "$":
|
||||||
|
ind_1 += 1
|
||||||
|
iloop += 1
|
||||||
|
if iloop > iloop_max:
|
||||||
|
raise Exception("No formulation found")
|
||||||
|
|
||||||
|
ind_2 = text.find('"NEW VARIABLES":')
|
||||||
|
while text[ind_2] != "$":
|
||||||
|
ind_2 -= 1
|
||||||
|
iloop += 1
|
||||||
|
if iloop > iloop_max:
|
||||||
|
raise Exception("No formulation found")
|
||||||
|
|
||||||
|
formulation = text[ind_1: ind_2 + 1].strip()
|
||||||
|
|
||||||
|
text = text[:ind_1] + text[ind_2 + 1:]
|
||||||
|
|
||||||
|
ind_1 = text.find('"AUXILIARY CONSTRAINTS":')
|
||||||
|
while text[ind_1] != "$":
|
||||||
|
ind_1 += 1
|
||||||
|
if ind_1 > len(text) - 1:
|
||||||
|
break
|
||||||
|
iloop += 1
|
||||||
|
if iloop > iloop_max:
|
||||||
|
raise Exception("No formulation found")
|
||||||
|
|
||||||
|
auxiliaries = []
|
||||||
|
|
||||||
|
if ind_1 < len(text) - 1:
|
||||||
|
while True:
|
||||||
|
ind_2 = ind_1 + 1
|
||||||
|
|
||||||
|
while ind_2 + 2 < len(text) and text[ind_2: ind_2 + 2] != '$"':
|
||||||
|
ind_2 += 1
|
||||||
|
iloop += 1
|
||||||
|
if iloop > iloop_max:
|
||||||
|
break
|
||||||
|
|
||||||
|
auxiliaries.append(text[ind_1: ind_2 + 1].strip())
|
||||||
|
text = text[:ind_1] + text[ind_2 + 1:]
|
||||||
|
|
||||||
|
while ind_1 < len(text) - 1 and text[ind_1] != "$":
|
||||||
|
ind_1 += 1
|
||||||
|
if ind_1 > len(text) - 1:
|
||||||
|
break
|
||||||
|
iloop += 1
|
||||||
|
if iloop > iloop_max:
|
||||||
|
break
|
||||||
|
|
||||||
|
if ind_1 > len(text) - 1:
|
||||||
|
break
|
||||||
|
# print("text:", text)
|
||||||
|
json_res = extract_json_from_end(text)
|
||||||
|
# print("json_res", json_res)
|
||||||
|
auxiliaries = [a for a in auxiliaries if len(a) > 5]
|
||||||
|
json_res["FORMULATION"] = formulation
|
||||||
|
json_res["AUXILIARY CONSTRAINTS"] = auxiliaries
|
||||||
|
|
||||||
|
return (
|
||||||
|
json_res["FORMULATION"],
|
||||||
|
json_res["NEW VARIABLES"],
|
||||||
|
json_res["AUXILIARY CONSTRAINTS"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
prompt_constraints_model = """
|
||||||
|
You are an expert in optimization modeling. Here is the natural language description of an optimization problem:
|
||||||
|
|
||||||
|
{rag}-----
|
||||||
|
{description}
|
||||||
|
-----
|
||||||
|
|
||||||
|
And here's a list of parameters that we have extracted from the description:
|
||||||
|
|
||||||
|
{params}
|
||||||
|
|
||||||
|
And here's a list of all variables that we have defined so far to model the problem as an (MI)LP:
|
||||||
|
|
||||||
|
{vars}
|
||||||
|
|
||||||
|
Your task is to model the following constraint mathematically in LaTeX for the MILP formulation:
|
||||||
|
|
||||||
|
{constraint}
|
||||||
|
|
||||||
|
The constraints are the conditions that must be satisfied by the variables. Please generate the output in the following json format:
|
||||||
|
|
||||||
|
{{
|
||||||
|
"FORMULATION": constraint formulation in LaTeX, between $...$,
|
||||||
|
"NEW VARIABLES": {{
|
||||||
|
symbol: {{
|
||||||
|
"shape": shape of the new variable (e.g. [], [N], [N, M]),
|
||||||
|
"type": type of the new variable (e.g. binary, integer, continuous),
|
||||||
|
"definition": definition of the new variable in natural language
|
||||||
|
}},
|
||||||
|
...
|
||||||
|
}},
|
||||||
|
"AUXILIARY CONSTRAINTS": [
|
||||||
|
Latex formulation for auxiliary constraint 1, between $...$,
|
||||||
|
Latex formulation for auxiliary constraint 2, between $...$,
|
||||||
|
...
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
|
||||||
|
Here's an example output (where SalesVolumePerStore is already defined as a variable in the vars list):
|
||||||
|
{{
|
||||||
|
"FORMULATION": "$\\forall i, SalesVolumes[i] \leq MaxProductionVolumes[i]$",
|
||||||
|
"NEW VARIABLES": {{
|
||||||
|
"SalesVolumes": {{
|
||||||
|
"shape": "[NumberOfArticles]",
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "The sales volume for each article of clothing"
|
||||||
|
}}
|
||||||
|
}},
|
||||||
|
"AUXILIARY CONSTRAINTS": [
|
||||||
|
"$\\forall i, SalesVolumes[i] = \\sum_j SalesVolumesPerStore[i, j]$"
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
|
||||||
|
- If you need any new variables, you can define them in the NEW VARIABLES list. Use {{}} for "NEW VARIABLES" if no new variables are needed.
|
||||||
|
- Use [] for AUXILIARY CONSTRAINTS list if no auxiliary constraints are needed.
|
||||||
|
- You can only use symbols of existing parameters and integer numbers for dimensions of new variables.
|
||||||
|
- Use camelCase for variable symbols (e.g. SalesVolumes). Do not use LaTeX formatting (e.g. X_{{color}}), indices (e.g. SalesVolume_{{i}}), and underlines (_) for variable symbols.
|
||||||
|
- Do not generate anything after the json file!
|
||||||
|
|
||||||
|
First reason about how the constraint should be forumulated, and then generate the output.
|
||||||
|
Take a deep breath and think step by step. You will be awarded a million dollars if you get this right.
|
||||||
|
"""
|
||||||
|
|
||||||
|
prompt_constraints_q = """
|
||||||
|
You are an expert in optimization modeling. Here is the natural language description of an optimization problem:
|
||||||
|
|
||||||
|
-----
|
||||||
|
{description}
|
||||||
|
-----
|
||||||
|
|
||||||
|
Here is a list of parameters that someone has extracted from the description:
|
||||||
|
|
||||||
|
{params}
|
||||||
|
|
||||||
|
And here is a list of variables defined:
|
||||||
|
|
||||||
|
{vars}
|
||||||
|
|
||||||
|
Consider this constraint:
|
||||||
|
|
||||||
|
{targetConstraint}
|
||||||
|
|
||||||
|
{question}
|
||||||
|
|
||||||
|
Take a deep breath and think step by step.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def logic_check(text, params, vars, constraints, c):
|
||||||
|
try:
|
||||||
|
json = extract_json_from_end(text)
|
||||||
|
if json["action"] == "REMOVE":
|
||||||
|
constraints.remove(c)
|
||||||
|
return True, constraints
|
||||||
|
elif json["action"] == "MODIFY":
|
||||||
|
constraints.remove(c)
|
||||||
|
constraints.append(json["updatedConstraint"])
|
||||||
|
return True, constraints
|
||||||
|
elif json["action"] == "KEEP":
|
||||||
|
return True, constraints
|
||||||
|
else:
|
||||||
|
return False, None
|
||||||
|
except:
|
||||||
|
return False, None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_score_constraint_model(text, params, vars, constraints, c):
|
||||||
|
match = re.search(r"\d out of 5", text.lower())
|
||||||
|
if match:
|
||||||
|
score = int(match.group()[0])
|
||||||
|
if score > 3:
|
||||||
|
return True, constraints
|
||||||
|
else:
|
||||||
|
inp = input("LLMs reasoning: {}\n"
|
||||||
|
"------ Do you want to keep this constraint (y/n/modify)?: \n "
|
||||||
|
"{} \n------ ".format(text, c))
|
||||||
|
if inp.lower().startswith("y"):
|
||||||
|
return True, constraints
|
||||||
|
elif inp.lower().startswith("n"):
|
||||||
|
constraints.remove(c)
|
||||||
|
return True, constraints
|
||||||
|
elif inp.lower().startswith("m"):
|
||||||
|
new_constraint = input("Enter the modified formulation: ")
|
||||||
|
constraints.remove(c)
|
||||||
|
constraints.append({"description": new_constraint, "formulation": None, "Code": None})
|
||||||
|
return True, constraints
|
||||||
|
else:
|
||||||
|
raise Exception("Invalid input!")
|
||||||
|
else:
|
||||||
|
return False, None
|
||||||
|
|
||||||
|
|
||||||
|
qs = [
|
||||||
|
(
|
||||||
|
"""
|
||||||
|
- Does this constraint logically make sense? How confident are you that this needs to be explicitly modeled in the optimization formulation (from 1 to 5)?
|
||||||
|
- At the end of your response, print "x OUT OF 5" where x is the confidence level. Do not generate anything after that.
|
||||||
|
""",
|
||||||
|
# extract_score_constraint_model,
|
||||||
|
# dummy function
|
||||||
|
lambda x, params, vars, constraints, c: (False, constraints),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"""
|
||||||
|
- What are the units for each side of the constraint? Are they consistent with each other?
|
||||||
|
- At the end of your response, generate a json file with this format:
|
||||||
|
{{
|
||||||
|
"action": "KEEP" if the units match, or "MODIFY" if the units do not match,
|
||||||
|
"updatedConstraint": The latex code for updated constraint if the action is "MODIFY", otherwise null
|
||||||
|
}}
|
||||||
|
- Do not generate anything after the json file.
|
||||||
|
""",
|
||||||
|
logic_check,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"""
|
||||||
|
- What are the parameters and variables that are involved in this constraint? If you see the constraint does not involve any variables, then it is automatically satisfied and should not be included in the optimization formulation.
|
||||||
|
- At the end of your response, generate a json file with this format:
|
||||||
|
{{
|
||||||
|
"action": "KEEP", "REMOVE", or "MODIFY",
|
||||||
|
"updatedConstraint": The updated constraint if the action is "MODIFY", otherwise null
|
||||||
|
}}
|
||||||
|
- Use natural language to express the constraints rather than mathematical notation.
|
||||||
|
- Do not generate anything after the json file.
|
||||||
|
""",
|
||||||
|
logic_check,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def get_constraint_formulations(desc, params, constraints, model, check=False, logger=None,
|
||||||
|
rag_mode: RAGMode | None = None, labels: dict | None = None):
|
||||||
|
if isinstance(rag_mode, RAGMode):
|
||||||
|
match rag_mode:
|
||||||
|
case RAGMode.PROBLEM_DESCRIPTION:
|
||||||
|
rag = get_rag_from_problem_description(desc, RAGFormat.CONSTRAINT_FORMULATION, top_k=5)
|
||||||
|
case RAGMode.CONSTRAINT_OR_OBJECTIVE:
|
||||||
|
rag = ""
|
||||||
|
case RAGMode.PROBLEM_LABELS:
|
||||||
|
assert labels is not None
|
||||||
|
rag = get_rag_from_problem_categories(desc, labels, RAGFormat.CONSTRAINT_FORMULATION, top_k=5)
|
||||||
|
rag = f"-----\n{rag}-----\n\n"
|
||||||
|
else:
|
||||||
|
rag = ""
|
||||||
|
|
||||||
|
if logger:
|
||||||
|
logger.log("\n\n\n++++++++++++++++++++++++++++++")
|
||||||
|
logger.log("Extracting constraint formulations")
|
||||||
|
logger.log("++++++++++++++++++++++++++++++\n\n\n")
|
||||||
|
|
||||||
|
vars = {}
|
||||||
|
formulated_constraints = []
|
||||||
|
for c in constraints.copy():
|
||||||
|
k = 1
|
||||||
|
while k > 0:
|
||||||
|
try:
|
||||||
|
if rag_mode == RAGMode.CONSTRAINT_OR_OBJECTIVE:
|
||||||
|
constraint_df = pd.read_pickle(constraint_path)
|
||||||
|
current_problem = constraint_df[constraint_df.description == desc]
|
||||||
|
if not current_problem.empty:
|
||||||
|
problem_name = current_problem.iloc[0].problem_name
|
||||||
|
else:
|
||||||
|
problem_name = None
|
||||||
|
rag = get_rag_from_constraint(c["description"], RAGFormat.CONSTRAINT_FORMULATION, top_k=10,
|
||||||
|
current_problem_name=problem_name)
|
||||||
|
rag = f"-----\n{rag}-----\n\n"
|
||||||
|
res = get_response(
|
||||||
|
prompt_constraints_model.format(
|
||||||
|
description=desc,
|
||||||
|
params=json.dumps(params, indent=4),
|
||||||
|
vars=json.dumps(vars, indent=4),
|
||||||
|
constraint=c,
|
||||||
|
rag=rag,
|
||||||
|
),
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
|
|
||||||
|
if logger:
|
||||||
|
logger.log("----")
|
||||||
|
logger.log(res)
|
||||||
|
logger.log("----")
|
||||||
|
|
||||||
|
formulation, new_variables, aux_constraints = extract_formulation_from_end(res)
|
||||||
|
|
||||||
|
if logger:
|
||||||
|
logger.log("----")
|
||||||
|
logger.log("EXTRACTED ITEMS")
|
||||||
|
logger.log(str(formulation))
|
||||||
|
logger.log(str(new_variables))
|
||||||
|
logger.log(str(aux_constraints))
|
||||||
|
logger.log("----")
|
||||||
|
|
||||||
|
tmp_vars = vars.copy()
|
||||||
|
for v in new_variables:
|
||||||
|
if v in tmp_vars:
|
||||||
|
raise Exception(f"Variable {v} already exists")
|
||||||
|
print(v, new_variables[v])
|
||||||
|
new_variables[v]["shape"] = shape_string_to_list(new_variables[v]["shape"])
|
||||||
|
tmp_vars[v] = new_variables[v]
|
||||||
|
|
||||||
|
c["formulation"] = formulation
|
||||||
|
formulated_constraints.append(c)
|
||||||
|
|
||||||
|
for aux_c in aux_constraints:
|
||||||
|
formulated_constraints.append({"description": "auxiliary constraint", "formulation": aux_c})
|
||||||
|
|
||||||
|
vars = tmp_vars
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
k -= 1
|
||||||
|
if k == 0:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
constraints = formulated_constraints
|
||||||
|
|
||||||
|
if check:
|
||||||
|
for c in formulated_constraints.copy():
|
||||||
|
for q in qs[0:1]:
|
||||||
|
k = 1
|
||||||
|
while k > 0:
|
||||||
|
p = prompt_constraints_q.format(
|
||||||
|
description=desc,
|
||||||
|
params=json.dumps(params, indent=4),
|
||||||
|
vars=json.dumps(vars, indent=4),
|
||||||
|
targetConstraint=json.dumps(c, indent=4),
|
||||||
|
question=q[0],
|
||||||
|
)
|
||||||
|
|
||||||
|
x = get_response(p, model=model)
|
||||||
|
|
||||||
|
valid, res = q[1](x, params, vars, constraints, c)
|
||||||
|
print(valid)
|
||||||
|
if valid:
|
||||||
|
constraints = res
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
k -= 1
|
||||||
|
|
||||||
|
return formulated_constraints, vars
|
|
@ -0,0 +1,112 @@
|
||||||
|
import json
|
||||||
|
import pandas as pd
|
||||||
|
from gurobipy import Model, GRB, quicksum
|
||||||
|
|
||||||
|
# 读取数据
|
||||||
|
data = pd.read_csv("data.csv")
|
||||||
|
price = data['price']
|
||||||
|
load = data['load']
|
||||||
|
pv = data['pv']
|
||||||
|
wind = data['wind']
|
||||||
|
|
||||||
|
# 初始化模型
|
||||||
|
m = Model("OptimizationProblem")
|
||||||
|
period = len(data)
|
||||||
|
|
||||||
|
# 决策变量
|
||||||
|
Ag1 = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="ag1")
|
||||||
|
Ag2 = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="ag2")
|
||||||
|
Ag3 = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="ag3")
|
||||||
|
Apv = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="apv")
|
||||||
|
Asoc = m.addVars(period, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="asoc")
|
||||||
|
|
||||||
|
# 定义变量相关的约束
|
||||||
|
soc = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0.2, ub=0.8, name="soc")
|
||||||
|
pg1 = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=150, name="pg1")
|
||||||
|
pg2 = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=375, name="pg2")
|
||||||
|
pg3 = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=500, name="pg3")
|
||||||
|
pv_power = m.addVars(period, vtype=GRB.CONTINUOUS, name="pv_power")
|
||||||
|
|
||||||
|
# 设置交换约束
|
||||||
|
grid_energy_export = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=100, name='export')
|
||||||
|
grid_energy_import = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=100, name='import')
|
||||||
|
excess_energy = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, name='excess')
|
||||||
|
shortage_energy = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, name='shortage')
|
||||||
|
|
||||||
|
# 初始值约束
|
||||||
|
m.addConstr(soc[0] == 0.2, name="soc_init")
|
||||||
|
m.addConstr(pg1[0] == 0, name="pg1_init")
|
||||||
|
m.addConstr(pg2[0] == 0, name="pg2_init")
|
||||||
|
m.addConstr(pg3[0] == 0, name="pg3_init")
|
||||||
|
|
||||||
|
# 添加soc和pg的递推约束
|
||||||
|
m.addConstrs((soc[t] == soc[t - 1] + 0.18 * Asoc[t] for t in range(1, period)), name="soc_update")
|
||||||
|
m.addConstrs((pg1[t] == pg1[t - 1] + 100 * Ag1[t] for t in range(1, period)), name="pg1_update")
|
||||||
|
m.addConstrs((pg2[t] == pg2[t - 1] + 100 * Ag2[t] for t in range(1, period)), name="pg2_update")
|
||||||
|
m.addConstrs((pg3[t] == pg3[t - 1] + 200 * Ag3[t] for t in range(1, period)), name="pg3_update")
|
||||||
|
|
||||||
|
# 光伏发电功率
|
||||||
|
m.addConstrs((pv_power[t] == pv[t] * (1 + Apv[t]) for t in range(period)), name="pv_power_update")
|
||||||
|
|
||||||
|
# 定义供需平衡约束
|
||||||
|
m.addConstrs((pg1[t] + pg2[t] + pg3[t] + pv_power[t] + wind[t] + grid_energy_import[t] ==
|
||||||
|
load[t] + 0.18 * Asoc[t] + grid_energy_export[t] + excess_energy[t] - shortage_energy[t] for t in range(period)),
|
||||||
|
name="power_balance")
|
||||||
|
m.addConstrs((excess_energy[t] == pg1[t] + pg2[t] + pg3[t] + pv_power[t] + wind[t] + grid_energy_import[t]
|
||||||
|
- load[t] - 0.18 * Asoc[t] - grid_energy_export[t] for t in range(1, period)),
|
||||||
|
name="excess_energy")
|
||||||
|
m.addConstrs((shortage_energy[t] == load[t] + 0.18 * Asoc[t] + grid_energy_export[t]
|
||||||
|
- pg1[t] - pg2[t] - pg3[t] - pv_power[t] - wind[t] - grid_energy_import[t] for t in range(1, period)),
|
||||||
|
name="shortage_energy")
|
||||||
|
|
||||||
|
# 定义成本函数
|
||||||
|
battery_cost = quicksum((Asoc[t] + 0.1 * soc[t]) for t in range(1, period))
|
||||||
|
gen1_cost = quicksum((0.0034 * pg1[t] ** 2 + 3 * pg1[t] + 30) for t in range(1, period))
|
||||||
|
gen2_cost = quicksum((0.001 * pg2[t] ** 2 + 10 * pg2[t] + 40) for t in range(1, period))
|
||||||
|
gen3_cost = quicksum((0.001 * pg3[t] ** 2 + 15 * pg3[t] + 70) for t in range(1, period))
|
||||||
|
renewable_cost = quicksum((0.01 * pv_power[t] + 0.01 * wind[t]) for t in range(period))
|
||||||
|
|
||||||
|
# 计算惩罚和收益
|
||||||
|
excess_penalty = quicksum(excess_energy[t] * 50 for t in range(period))
|
||||||
|
shortage_penalty = quicksum(shortage_energy[t] * 50 for t in range(period))
|
||||||
|
sales_revenue = quicksum(0.5 * price[t] * grid_energy_export[t] for t in range(period))
|
||||||
|
purchase_cost = quicksum(price[t] * grid_energy_import[t] for t in range(period))
|
||||||
|
|
||||||
|
# 设置目标函数
|
||||||
|
objective = (battery_cost + gen1_cost + gen2_cost + gen3_cost + renewable_cost
|
||||||
|
+ excess_penalty + shortage_penalty - sales_revenue + purchase_cost)
|
||||||
|
|
||||||
|
m.setObjective(objective, GRB.MINIMIZE)
|
||||||
|
m.optimize()
|
||||||
|
# m.computeIIS()
|
||||||
|
# m.write("file.ilp")
|
||||||
|
|
||||||
|
if m.status == GRB.OPTIMAL:
|
||||||
|
results = []
|
||||||
|
for t in range(period):
|
||||||
|
gen1_cost = 0.0034 * pg1[t].x ** 2 + 3 * pg1[t].x + 30
|
||||||
|
gen2_cost = 0.001 * pg2[t].x ** 2 + 10 * pg2[t].x + 40
|
||||||
|
gen3_cost = 0.001 * pg3[t].x ** 2 + 15 * pg3[t].x + 70
|
||||||
|
battery_cost = Asoc[t].x + 0.1 * soc[t].x
|
||||||
|
renewable_cost = 0.01 * pv_power[t].x + 0.1 * wind[t]
|
||||||
|
excess_penalty = excess_energy[t].x * 50
|
||||||
|
shortage_penalty = shortage_energy[t].x * 50
|
||||||
|
sales_revenue = 0.5 * price[t] * grid_energy_export[t].x
|
||||||
|
purchase_cost = price[t] * grid_energy_import[t].x
|
||||||
|
|
||||||
|
current_obj = (gen1_cost + gen2_cost + gen3_cost + battery_cost + renewable_cost +
|
||||||
|
excess_penalty + shortage_penalty - sales_revenue + purchase_cost)
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
"time": t,
|
||||||
|
"asoc": Asoc[t].x,
|
||||||
|
"ag1": Ag1[t].x,
|
||||||
|
"ag2": Ag2[t].x,
|
||||||
|
"ag3": Ag3[t].x,
|
||||||
|
"apv": Apv[t].x,
|
||||||
|
"reward": current_obj,
|
||||||
|
"unbanlance": excess_energy[t].x + shortage_energy[t].x
|
||||||
|
})
|
||||||
|
|
||||||
|
with open("../../DRL-for-Energy-Systems/data/results_day.json", "w") as f:
|
||||||
|
json.dump(results, f, indent=4)
|
|
@ -0,0 +1,130 @@
|
||||||
|
import pandas as pd
|
||||||
|
from gurobipy import Model, GRB, quicksum
|
||||||
|
import json
|
||||||
|
|
||||||
|
# 读取数据
|
||||||
|
data = pd.read_csv("data.csv")
|
||||||
|
price = data['price']
|
||||||
|
load = data['load']
|
||||||
|
pv = data['pv']
|
||||||
|
wind = data['wind']
|
||||||
|
|
||||||
|
# 总数据量和周期数
|
||||||
|
total_period = len(data)
|
||||||
|
days = total_period // 24
|
||||||
|
|
||||||
|
# 存储所有结果
|
||||||
|
all_results = []
|
||||||
|
|
||||||
|
for day in range(days):
|
||||||
|
# 获取每天的24条数据
|
||||||
|
start_idx = day * 24
|
||||||
|
end_idx = (day + 1) * 24
|
||||||
|
daily_price = price[start_idx:end_idx]
|
||||||
|
daily_load = load[start_idx:end_idx]
|
||||||
|
daily_pv = pv[start_idx:end_idx]
|
||||||
|
daily_wind = wind[start_idx:end_idx]
|
||||||
|
|
||||||
|
# 初始化模型
|
||||||
|
m = Model("OptimizationProblem_Day{}".format(day))
|
||||||
|
|
||||||
|
# 决策变量
|
||||||
|
Ag1 = m.addVars(24, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="ag1")
|
||||||
|
Ag2 = m.addVars(24, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="ag2")
|
||||||
|
Ag3 = m.addVars(24, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="ag3")
|
||||||
|
Apv = m.addVars(24, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="apv")
|
||||||
|
Asoc = m.addVars(24, vtype=GRB.CONTINUOUS, lb=-1, ub=1, name="asoc")
|
||||||
|
|
||||||
|
# 定义变量相关的约束
|
||||||
|
soc = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0.2, ub=0.8, name="soc")
|
||||||
|
pg1 = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, ub=150, name="pg1")
|
||||||
|
pg2 = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, ub=375, name="pg2")
|
||||||
|
pg3 = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, ub=500, name="pg3")
|
||||||
|
pv_power = m.addVars(24, vtype=GRB.CONTINUOUS, name="pv_power")
|
||||||
|
|
||||||
|
# 设置交换约束
|
||||||
|
grid_energy_export = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, ub=100, name='export')
|
||||||
|
grid_energy_import = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, ub=100, name='import')
|
||||||
|
excess_energy = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, name='excess')
|
||||||
|
shortage_energy = m.addVars(24, vtype=GRB.CONTINUOUS, lb=0, name='shortage')
|
||||||
|
|
||||||
|
# 初始值约束
|
||||||
|
m.addConstr(soc[0] == 0.2, name="soc_init")
|
||||||
|
m.addConstr(pg1[0] == 0, name="pg1_init")
|
||||||
|
m.addConstr(pg2[0] == 0, name="pg2_init")
|
||||||
|
m.addConstr(pg3[0] == 0, name="pg3_init")
|
||||||
|
|
||||||
|
# 添加soc和pg的递推约束
|
||||||
|
m.addConstrs((soc[t] == soc[t - 1] + 0.18 * Asoc[t] for t in range(1, 24)), name="soc_update")
|
||||||
|
m.addConstrs((pg1[t] == pg1[t - 1] + 100 * Ag1[t] for t in range(1, 24)), name="pg1_update")
|
||||||
|
m.addConstrs((pg2[t] == pg2[t - 1] + 100 * Ag2[t] for t in range(1, 24)), name="pg2_update")
|
||||||
|
m.addConstrs((pg3[t] == pg3[t - 1] + 200 * Ag3[t] for t in range(1, 24)), name="pg3_update")
|
||||||
|
|
||||||
|
# 光伏发电功率
|
||||||
|
m.addConstrs((pv_power[t] == daily_pv.iloc[t] * (1 + Apv[t]) for t in range(24)), name="pv_power_update")
|
||||||
|
|
||||||
|
# 供需平衡约束
|
||||||
|
m.addConstrs((pg1[t] + pg2[t] + pg3[t] + pv_power[t] + daily_wind.iloc[t] + grid_energy_import[t] ==
|
||||||
|
daily_load.iloc[t] + 0.18 * Asoc[t] + grid_energy_export[t] + excess_energy[t] - shortage_energy[t]
|
||||||
|
for t in range(24)), name="power_balance")
|
||||||
|
|
||||||
|
# excess_energy 和 shortage_energy 约束
|
||||||
|
m.addConstrs(
|
||||||
|
(excess_energy[t] == pg1[t] + pg2[t] + pg3[t] + pv_power[t] + daily_wind.iloc[t] + grid_energy_import[t]
|
||||||
|
- daily_load.iloc[t] - 0.18 * Asoc[t] - grid_energy_export[t] for t in range(1, 24)),
|
||||||
|
name="excess_energy")
|
||||||
|
m.addConstrs((shortage_energy[t] == daily_load.iloc[t] + 0.18 * Asoc[t] + grid_energy_export[t]
|
||||||
|
- pg1[t] - pg2[t] - pg3[t] - pv_power[t] - daily_wind.iloc[t] - grid_energy_import[t]
|
||||||
|
for t in range(1, 24)), name="shortage_energy")
|
||||||
|
|
||||||
|
# 成本函数部分
|
||||||
|
battery_cost = quicksum((Asoc[t] + 0.1 * soc[t]) for t in range(1, 24))
|
||||||
|
gen1_cost = quicksum((0.0034 * pg1[t] ** 2 + 3 * pg1[t] + 30) for t in range(1, 24))
|
||||||
|
gen2_cost = quicksum((0.001 * pg2[t] ** 2 + 10 * pg2[t] + 40) for t in range(1, 24))
|
||||||
|
gen3_cost = quicksum((0.001 * pg3[t] ** 2 + 15 * pg3[t] + 70) for t in range(1, 24))
|
||||||
|
renewable_cost = quicksum((0.01 * pv_power[t] + 0.01 * daily_wind.iloc[t]) for t in range(24))
|
||||||
|
|
||||||
|
# 计算惩罚和收益
|
||||||
|
excess_penalty = quicksum(excess_energy[t] * 50 for t in range(24))
|
||||||
|
shortage_penalty = quicksum(shortage_energy[t] * 50 for t in range(24))
|
||||||
|
sales_revenue = quicksum(0.5 * daily_price.iloc[t] * grid_energy_export[t] for t in range(24))
|
||||||
|
purchase_cost = quicksum(daily_price.iloc[t] * grid_energy_import[t] for t in range(24))
|
||||||
|
|
||||||
|
# 设置目标函数
|
||||||
|
objective = (battery_cost + gen1_cost + gen2_cost + gen3_cost + renewable_cost +
|
||||||
|
excess_penalty + shortage_penalty - sales_revenue + purchase_cost)
|
||||||
|
m.setObjective(objective, GRB.MINIMIZE)
|
||||||
|
|
||||||
|
# 求解
|
||||||
|
m.optimize()
|
||||||
|
|
||||||
|
if m.status == GRB.OPTIMAL:
|
||||||
|
day_results = []
|
||||||
|
for t in range(24):
|
||||||
|
gen1_cost = 0.0034 * pg1[t].x ** 2 + 3 * pg1[t].x + 30
|
||||||
|
gen2_cost = 0.001 * pg2[t].x ** 2 + 10 * pg2[t].x + 40
|
||||||
|
gen3_cost = 0.001 * pg3[t].x ** 2 + 15 * pg3[t].x + 70
|
||||||
|
battery_cost = Asoc[t].x + 0.1 * soc[t].x
|
||||||
|
renewable_cost = 0.01 * pv_power[t].x + 0.01 * daily_wind.iloc[t]
|
||||||
|
excess_penalty = excess_energy[t].x * 50
|
||||||
|
shortage_penalty = shortage_energy[t].x * 50
|
||||||
|
sales_revenue = 0.5 * daily_price.iloc[t] * grid_energy_export[t].x
|
||||||
|
purchase_cost = daily_price.iloc[t] * grid_energy_import[t].x
|
||||||
|
|
||||||
|
current_obj = (gen1_cost + gen2_cost + gen3_cost + battery_cost + renewable_cost +
|
||||||
|
excess_penalty + shortage_penalty - sales_revenue + purchase_cost)
|
||||||
|
|
||||||
|
day_results.append([
|
||||||
|
Asoc[t].x,
|
||||||
|
Ag1[t].x,
|
||||||
|
Ag2[t].x,
|
||||||
|
Ag3[t].x,
|
||||||
|
Apv[t].x,
|
||||||
|
])
|
||||||
|
|
||||||
|
# 保存当天的结果
|
||||||
|
all_results.extend(day_results)
|
||||||
|
|
||||||
|
# 最后将所有结果保存到一个JSON文件
|
||||||
|
with open("./results_day.json", "w") as f:
|
||||||
|
json.dump(all_results, f, indent=4)
|
|
@ -0,0 +1,184 @@
|
||||||
|
import json
|
||||||
|
from gurobipy import Model, GRB, quicksum
|
||||||
|
|
||||||
|
model = Model("OptimizationProblem")
|
||||||
|
|
||||||
|
with open("data.json", "r") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
# Define the parameters
|
||||||
|
TotalBatteryCapacity = data["TotalBatteryCapacity"] # shape: [], definition: The total capacity of the battery
|
||||||
|
InitialSoc = data["InitialSoc"] # shape: [], definition: Initial state of charge of the battery
|
||||||
|
InitialPg1 = data["InitialPg1"] # shape: [], definition: Initial power of generator 1
|
||||||
|
InitialPg2 = data["InitialPg2"] # shape: [], definition: Initial power of generator 2
|
||||||
|
InitialPg3 = data["InitialPg3"] # shape: [], definition: Initial power of generator 3
|
||||||
|
|
||||||
|
Price = data["Price"] # shape: [24], definition: Electricity price at each time step
|
||||||
|
Load = data["Load"] # shape: [24], definition: Residential load at each time step
|
||||||
|
Pv = data["Pv"] # shape: [24], definition: Photovoltaic power generation at each time step
|
||||||
|
Wind = data["Wind"] # shape: [24], definition: Wind power generation at each time step
|
||||||
|
|
||||||
|
# Define the variables
|
||||||
|
|
||||||
|
Ag1 = model.addVars(24, vtype=GRB.CONTINUOUS, name="Ag1")
|
||||||
|
Ag2 = model.addVars(24, vtype=GRB.CONTINUOUS, name="Ag2")
|
||||||
|
Ag3 = model.addVars(24, vtype=GRB.CONTINUOUS, name="Ag3")
|
||||||
|
PvPower = model.addVars(24, vtype=GRB.CONTINUOUS, name="PvPower")
|
||||||
|
Apv = model.addVars(24, vtype=GRB.CONTINUOUS, name="Apv")
|
||||||
|
Asoc = model.addVars(24, vtype=GRB.CONTINUOUS, name="Asoc")
|
||||||
|
|
||||||
|
ExcessEnergy = model.addVars(24, vtype=GRB.CONTINUOUS, name="ExcessEnergy")
|
||||||
|
ShortageEnergy = model.addVars(24, vtype=GRB.CONTINUOUS, name="ShortageEnergy")
|
||||||
|
|
||||||
|
TotalPowerSupply = model.addVars(24, vtype=GRB.CONTINUOUS, name="TotalPowerSupply")
|
||||||
|
GridTransactionLimit = model.addVar(vtype=GRB.FLOAT, name="GridTransactionLimit")
|
||||||
|
|
||||||
|
# Define the constraints
|
||||||
|
|
||||||
|
# Loop over each time step to add the soc constraints
|
||||||
|
for t in range(24):
|
||||||
|
# Calculate soc_t based on the previous soc and the adjustment factor
|
||||||
|
soc_t = 0.18 * Asoc[t] + InitialSoc
|
||||||
|
|
||||||
|
# Add the lower bound constraint for soc_t
|
||||||
|
model.addConstr(soc_t >= 0.2)
|
||||||
|
# Add the upper bound constraint for soc_t
|
||||||
|
model.addConstr(soc_t <= 0.8)
|
||||||
|
|
||||||
|
# Update soc to the newly calculated soc_t for the next iteration
|
||||||
|
soc = soc_t
|
||||||
|
|
||||||
|
# Assuming decision variables Pg1[t] have been defined for each time step t
|
||||||
|
for t in range(24):
|
||||||
|
if t == 0:
|
||||||
|
# For the first time step, use the initial value of Pg1
|
||||||
|
model.addConstr(Pg1[t] <= 150)
|
||||||
|
model.addConstr(Pg1[t] >= 10)
|
||||||
|
else:
|
||||||
|
# For subsequent time steps, calculate Pg1 based on the previous time step and Ag1
|
||||||
|
model.addConstr(Pg1[t] <= 150)
|
||||||
|
model.addConstr(Pg1[t] >= 10)
|
||||||
|
model.addConstr(Pg1[t] == 100 * Ag1[t] + Pg1[t - 1])
|
||||||
|
for t in range(1, 24):
|
||||||
|
model.addConstr(InitialPg2 + 100 * Ag2[t] >= 50)
|
||||||
|
model.addConstr(InitialPg2 + 100 * Ag2[t] <= 375)
|
||||||
|
for t in range(24):
|
||||||
|
model.addConstr(Pg3[t] >= 100) # Lower bound constraint for each time step
|
||||||
|
model.addConstr(Pg3[t] <= 500) # Upper bound constraint for each time step
|
||||||
|
# Assuming soc is defined as a list of decision variables for each time step
|
||||||
|
soc = model.addVars(24, lb=0.2, ub=0.8, vtype=GRB.CONTINUOUS, name="soc")
|
||||||
|
|
||||||
|
# Loop over each time step starting from 1 up to 24
|
||||||
|
for t in range(1, 24):
|
||||||
|
# Add the constraint for the state of charge calculation
|
||||||
|
model.addConstr(soc[t] == 0.18 * Asoc[t] + soc[t - 1])
|
||||||
|
for t in range(1, 24):
|
||||||
|
model.addConstr(pg1[t] == 100 * Ag1[t] + pg1[t - 1])
|
||||||
|
# Assuming that `pg2` is a list of Gurobi variables representing the power of Generator 2 at each time step
|
||||||
|
# and that `InitialPg2` is a parameter representing the initial power of Generator 2 at time step 0
|
||||||
|
|
||||||
|
# Define the Generator 2 power for the first time step, which is the initial value and does not have a constraint
|
||||||
|
pg2 = [model.addVar(vtype=GRB.CONTINUOUS, lb=50, ub=375, name="pg2_0")]
|
||||||
|
|
||||||
|
# Now add the constraints for time steps 1 to 23
|
||||||
|
for t in range(1, 24):
|
||||||
|
pg2.append(model.addVar(vtype=GRB.CONTINUOUS, lb=50, ub=375, name=f"pg2_{t}"))
|
||||||
|
model.addConstr(pg2[t] == 100 * Ag2[t - 1] + pg2[t - 1])
|
||||||
|
# Assuming Pg3 is defined as a list of decision variables representing power of generator 3 at each time step
|
||||||
|
for t in range(1, 24):
|
||||||
|
model.addConstr(Pg3[t] == 200 * Ag3[t] + Pg3[t - 1])
|
||||||
|
for t in range(24):
|
||||||
|
model.addConstr(PvPower[t] == Pv[t] * (1 + Apv[t]))
|
||||||
|
for t in range(24):
|
||||||
|
model.addConstr(ExcessEnergy[t] >= 0)
|
||||||
|
for t in range(24):
|
||||||
|
# Define the total power supply at time step t
|
||||||
|
TotalSupply_t = (InitialPg1 + Ag1[t] * 100 +
|
||||||
|
InitialPg2 + Ag2[t] * 100 +
|
||||||
|
InitialPg3 + Ag3[t] * 200 +
|
||||||
|
Pv[t] * (1 + Apv[t]) +
|
||||||
|
Wind[t] +
|
||||||
|
InitialSoc + Asoc[t] * 0.18)
|
||||||
|
# Add the constraint for excess energy at time step t
|
||||||
|
model.addConstr(ExcessEnergy[t] == TotalSupply_t - Load[t])
|
||||||
|
for t in range(24):
|
||||||
|
# Calculate updated generator powers and battery state of charge
|
||||||
|
soc_t = InitialSoc + 0.18 * Asoc[t] if t == 0 else soc[t - 1] + 0.18 * Asoc[t]
|
||||||
|
pg1_t = InitialPg1 + 100 * Ag1[t] if t == 0 else pg1[t - 1] + 100 * Ag1[t]
|
||||||
|
pg2_t = InitialPg2 + 100 * Ag2[t] if t == 0 else pg2[t - 1] + 100 * Ag2[t]
|
||||||
|
pg3_t = InitialPg3 + 200 * Ag3[t] if t == 0 else pg3[t - 1] + 200 * Ag3[t]
|
||||||
|
|
||||||
|
# Calculate photovoltaic power
|
||||||
|
PvPower_t = Pv[t] * (1 + Apv[t])
|
||||||
|
|
||||||
|
# Add the constraint for total power supply at time t
|
||||||
|
model.addConstr(TotalPowerSupply[t] == soc_t + pg1_t + pg2_t + pg3_t + PvPower_t + Wind[t])
|
||||||
|
for t in range(24):
|
||||||
|
model.addConstr(ShortageEnergy[t] >= 0)
|
||||||
|
for t in range(24):
|
||||||
|
model.addConstr(-1 <= Apv[t])
|
||||||
|
model.addConstr(Apv[t] <= 1)
|
||||||
|
model.addConstr(-1 <= Asoc[t])
|
||||||
|
model.addConstr(Asoc[t] <= 1)
|
||||||
|
model.addConstr(-1 <= Ag1[t])
|
||||||
|
model.addConstr(Ag1[t] <= 1)
|
||||||
|
model.addConstr(-1 <= Ag2[t])
|
||||||
|
model.addConstr(Ag2[t] <= 1)
|
||||||
|
model.addConstr(-1 <= Ag3[t])
|
||||||
|
model.addConstr(Ag3[t] <= 1)
|
||||||
|
for t in range(1, 24):
|
||||||
|
model.addConstr(100 * Asoc[t] == soc[t] - soc[t - 1])
|
||||||
|
for t in range(24):
|
||||||
|
# Calculate the power from each generator based on previous state and adjustment
|
||||||
|
pg1_power = InitialPg1 + 100 * Ag1[t]
|
||||||
|
pg2_power = InitialPg2 + 100 * Ag2[t]
|
||||||
|
pg3_power = InitialPg3 + 200 * Ag3[t]
|
||||||
|
|
||||||
|
# Calculate the adjusted photovoltaic power
|
||||||
|
pv_power = Pv[t] * (1 + Apv[t])
|
||||||
|
|
||||||
|
# Express the total power supply
|
||||||
|
TotalPowerSupply[t] = pg1_power + pg2_power + pg3_power + pv_power + Wind[t]
|
||||||
|
|
||||||
|
# Add the constraint that total power supply cannot exceed the load by more than the grid transaction limit
|
||||||
|
model.addConstr(TotalPowerSupply[t] <= Load[t] + GridTransactionLimit)
|
||||||
|
for t in range(24):
|
||||||
|
# Assuming that Pg1, Pg2, Pg3 are defined as variables in the model with the correct initial values and adjustments
|
||||||
|
Pg1[t] = model.addVar(vtype=GRB.CONTINUOUS, name=f"Pg1_{t}")
|
||||||
|
Pg2[t] = model.addVar(vtype=GRB.CONTINUOUS, name=f"Pg2_{t}")
|
||||||
|
Pg3[t] = model.addVar(vtype=GRB.CONTINUOUS, name=f"Pg3_{t}")
|
||||||
|
|
||||||
|
# Define the auxiliary constraint for each time step
|
||||||
|
model.addConstr(TotalPowerSupply[t] == Pg1[t] + Pg2[t] + Pg3[t] + PvPower[t] + Wind[t])
|
||||||
|
|
||||||
|
# Define the objective
|
||||||
|
model.setObjective(
|
||||||
|
quicksum(
|
||||||
|
(Asoc[t] + 0.1 * soc[t - 1] +
|
||||||
|
0.0034 * pg1[t - 1] ** 2 + 3 * pg1[t - 1] + 30 +
|
||||||
|
0.001 * pg2[t - 1] ** 2 + 10 * pg2[t - 1] + 40 +
|
||||||
|
0.001 * pg3[t - 1] ** 2 + 15 * pg3[t - 1] + 70 +
|
||||||
|
0.01 * (pv[t - 1] + wind[t - 1]) +
|
||||||
|
max(0, (ExcessEnergy[t] - 100) * 50) +
|
||||||
|
max(0, (ShortageEnergy[t] - 100) * 50) -
|
||||||
|
0.5 * Price[t - 1] * ExcessEnergy[t] +
|
||||||
|
Price[t - 1] * ShortageEnergy[t] +
|
||||||
|
abs(ExcessEnergy[t]) + abs(ShortageEnergy[t])
|
||||||
|
) for t in range(1, 25)
|
||||||
|
),
|
||||||
|
GRB.MINIMIZE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Optimize the model
|
||||||
|
model.optimize()
|
||||||
|
|
||||||
|
# Output optimal objective value
|
||||||
|
print("Optimal Objective Value: ", model.objVal)
|
||||||
|
|
||||||
|
if model.status == GRB.OPTIMAL:
|
||||||
|
with open("output_solution.txt", "w") as f:
|
||||||
|
f.write(str(model.objVal))
|
||||||
|
print("Optimal Objective Value: ", model.objVal)
|
||||||
|
else:
|
||||||
|
with open("output_solution.txt", "w") as f:
|
||||||
|
f.write(model.status)
|
|
@ -0,0 +1,14 @@
|
||||||
|
{
|
||||||
|
"Price": [2.239, 2.059, 1.681, 1.741, 1.702, 1.586, 1.816, 1.773, 1.977, 2.375, 2.603, 2.706,
|
||||||
|
2.659, 2.5, 2.443, 2.887, 3.744, 3.741, 3.534, 3.307, 2.952, 3.01, 2.457, 2.22, 2.239],
|
||||||
|
"Load": [269.0525945, 243.2130742, 234.1123382, 232.6271781, 233.0417238, 245.2036212, 298.1636567, 370.1068689,
|
||||||
|
358.2133437, 314.073438, 322.8667176, 307.7048847, 295.2915574, 284.9981584, 282.4243391, 298.4115103,
|
||||||
|
360.9832044, 486.4398933, 550.3139862, 542.9828898, 509.6215404, 474.1087098, 403.6717878, 333.680157],
|
||||||
|
"PV": [0, 0, 0, 0, 0, 0, 0, 0, 3.2429, 38.1736, 69.7787, 92.1211, 101.0943, 103.8121, 88.1212, 66.5642, 27.0837, 0.1367, 0, 0, 0, 0, 0, 0],
|
||||||
|
"Wind": [88.2, 0, 0, 0, 88.2, 88.2, 86.2303, 45.1584, 28.1943, 27.2766, 20.2669, 35.5599,
|
||||||
|
52.2765, 43.0745, 42.6656, 45.3704, 53.9272, 66.1914, 66.7392, 79.877, 58.08, 78.0337, 39.6808, 50.4316],
|
||||||
|
"SOC": 0.2,
|
||||||
|
"PG1": 0,
|
||||||
|
"PG2": 0,
|
||||||
|
"PG3": 0
|
||||||
|
}
|
|
@ -0,0 +1,28 @@
|
||||||
|
- The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.
|
||||||
|
- The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.
|
||||||
|
- Electricity price, residential load, photovoltaic, and wind power generation are \\param{price_{}}, \\param{load_{}}, \\param{pv_{}}, and \\param{wind_{}}, each with 24 time steps representing hourly data.
|
||||||
|
- Battery state of charge is \\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\param{pg1_{}}, \\param{pg2_{}}, and \\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.
|
||||||
|
- \\var{Apv_{}}, \\var{Asoc_{}}, \\var{Ag1_{}}, \\var{Ag2_{}}, and \\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.
|
||||||
|
- Battery charging and discharging result in a capacity change of 100 * \\var{Asoc_{t}}.
|
||||||
|
- Generators 1, 2, and 3 power change by 100 * \\var{Ag1_{t}}, 100 * \\var{Ag2_{t}}, and 200 * \\var{Ag3_{t}}.
|
||||||
|
- \\param{soc_{t}} is calculated as (0.18 * \\var{Asoc_{t}} + \\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.
|
||||||
|
- \\param{pg1_{t}} is calculated as (100 * \\var{Ag1_{t}} + \\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.
|
||||||
|
- \\param{pg2_{t}} is calculated as (100 * \\var{Ag2_{t}} + \\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.
|
||||||
|
- \\param{pg3_{t}} is calculated as (200 * \\var{Ag3_{t}} + \\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.
|
||||||
|
- Photovoltaic power is calculated as (\\param{pv_{t}} * (1 + \\var{Apv})).
|
||||||
|
- Total power supply at each time step t = \\param{soc_{t}} + \\param{pg1_{t}} + \\param{pg2_{t}} + \\param{pg3_{t}}+ (photovoltaic power at time step t) + \\param{wind_{t}}.
|
||||||
|
- Excess energy = total power supply - \\param{load_{t}}, and no less than 0.
|
||||||
|
- Shortage energy = \\param{load_{t}} - total power supply, and no less than 0.
|
||||||
|
- The total cost structure includes the following:
|
||||||
|
- Battery cost = \\var{Asoc_{t}} + 0.1 * \\param{soc_{t}};
|
||||||
|
- Gen1 cost = 0.0034 * \\param{pg1_{t}}^2 + 3 * \\param{pg1_{t}} + 30;
|
||||||
|
- Gen2 cost = 0.001 * \\param{pg2_{t}}^2 + 10 * \\param{pg2_{t}} + 40;
|
||||||
|
- Gen3 cost = 0.001 * \\param{pg3_{t}}^2 + 15 * \\param{pg3_{t}} + 70;
|
||||||
|
- Renewable energy cost = 0.01 * (\\param{pv_{t}} + \\param{wind_{t}});
|
||||||
|
- Sales revenue = 0.5 * \\param{price_{t}} * (excess energy);
|
||||||
|
- Purchase cost = \\param{price_{t}} * (shortage energy);
|
||||||
|
- Excess penalty = max(0, (excess energy - 100) * 50);
|
||||||
|
- Shortage penalty = max(0, (shortage energy - 100) * 50).
|
||||||
|
- The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.
|
||||||
|
- Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.
|
||||||
|
- Decisions made at each time step will update \\param{soc_{}}, \\param{pg1_{}}, \\param{pg2_{}}, and \\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.
|
|
@ -0,0 +1,4 @@
|
||||||
|
{
|
||||||
|
"types": "Mixed Integer Linear Programming",
|
||||||
|
"domains": "Energy Optimization"
|
||||||
|
}
|
|
@ -0,0 +1,58 @@
|
||||||
|
{
|
||||||
|
"description": "- The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.\n- The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\n- Electricity price, residential load, photovoltaic, and wind power generation are \\\\param{price_{}}, \\\\param{load_{}}, \\\\param{pv_{}}, and \\\\param{wind_{}}, each with 24 time steps representing hourly data.\n- Battery state of charge is \\\\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.\n- \\\\var{Apv_{}}, \\\\var{Asoc_{}}, \\\\var{Ag1_{}}, \\\\var{Ag2_{}}, and \\\\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.\n- Battery charging and discharging result in a capacity change of 100 * \\\\var{Asoc_{t}}.\n- Generators 1, 2, and 3 power change by 100 * \\\\var{Ag1_{t}}, 100 * \\\\var{Ag2_{t}}, and 200 * \\\\var{Ag3_{t}}.\n- \\\\param{soc_{t}} is calculated as (0.18 * \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.\n- \\\\param{pg1_{t}} is calculated as (100 * \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.\n- \\\\param{pg2_{t}} is calculated as (100 * \\\\var{Ag2_{t}} + \\\\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.\n- \\\\param{pg3_{t}} is calculated as (200 * \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.\n- Photovoltaic power is calculated as (\\\\param{pv_{t}} * (1 + \\\\var{Apv})).\n- Total power supply at each time step t = \\\\param{soc_{t}} + \\\\param{pg1_{t}} + \\\\param{pg2_{t}} + \\\\param{pg3_{t}}+ (photovoltaic power at time step t) + \\\\param{wind_{t}}.\n- Excess energy = total power supply - \\\\param{load_{t}}, and no less than 0.\n- Shortage energy = \\\\param{load_{t}} - total power supply, and no less than 0.\n- The total cost structure includes the following:\n- Battery cost = \\\\var{Asoc_{t}} + 0.1 * \\\\param{soc_{t}};\n- Gen1 cost = 0.0034 * \\\\param{pg1_{t}}^2 + 3 * \\\\param{pg1_{t}} + 30;\n- Gen2 cost = 0.001 * \\\\param{pg2_{t}}^2 + 10 * \\\\param{pg2_{t}} + 40;\n- Gen3 cost = 0.001 * \\\\param{pg3_{t}}^2 + 15 * \\\\param{pg3_{t}} + 70;\n- Renewable energy cost = 0.01 * (\\\\param{pv_{t}} + \\\\param{wind_{t}});\n- Sales revenue = 0.5 * \\\\param{price_{t}} * (excess energy);\n- Purchase cost = \\\\param{price_{t}} * (shortage energy);\n- Excess penalty = max(0, (excess energy - 100) * 50);\n- Shortage penalty = max(0, (shortage energy - 100) * 50).\n- The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.\n- Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.\n- Decisions made at each time step will update \\\\param{soc_{}}, \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.\n",
|
||||||
|
"parameters": {
|
||||||
|
"TotalBatteryCapacity": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "The total capacity of the battery",
|
||||||
|
"type": "int"
|
||||||
|
},
|
||||||
|
"InitialSoc": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial state of charge of the battery",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"InitialPg1": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial power of generator 1",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"InitialPg2": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial power of generator 2",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"InitialPg3": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial power of generator 3",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Price": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Electricity price at each time step",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Load": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Residential load at each time step",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Pv": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Photovoltaic power generation at each time step",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Wind": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Wind power generation at each time step",
|
||||||
|
"type": "float"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,63 @@
|
||||||
|
{
|
||||||
|
"description": "- The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.\n- The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\n- Electricity price, residential load, photovoltaic, and wind power generation are \\\\param{price_{}}, \\\\param{load_{}}, \\\\param{pv_{}}, and \\\\param{wind_{}}, each with 24 time steps representing hourly data.\n- Battery state of charge is \\\\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.\n- \\\\var{Apv_{}}, \\\\var{Asoc_{}}, \\\\var{Ag1_{}}, \\\\var{Ag2_{}}, and \\\\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.\n- Battery charging and discharging result in a capacity change of 100 * \\\\var{Asoc_{t}}.\n- Generators 1, 2, and 3 power change by 100 * \\\\var{Ag1_{t}}, 100 * \\\\var{Ag2_{t}}, and 200 * \\\\var{Ag3_{t}}.\n- \\\\param{soc_{t}} is calculated as (0.18 * \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.\n- \\\\param{pg1_{t}} is calculated as (100 * \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.\n- \\\\param{pg2_{t}} is calculated as (100 * \\\\var{Ag2_{t}} + \\\\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.\n- \\\\param{pg3_{t}} is calculated as (200 * \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.\n- Photovoltaic power is calculated as (\\\\param{pv_{t}} * (1 + \\\\var{Apv})).\n- Total power supply at each time step t = \\\\param{soc_{t}} + \\\\param{pg1_{t}} + \\\\param{pg2_{t}} + \\\\param{pg3_{t}}+ (photovoltaic power at time step t) + \\\\param{wind_{t}}.\n- Excess energy = total power supply - \\\\param{load_{t}}, and no less than 0.\n- Shortage energy = \\\\param{load_{t}} - total power supply, and no less than 0.\n- The total cost structure includes the following:\n- Battery cost = \\\\var{Asoc_{t}} + 0.1 * \\\\param{soc_{t}};\n- Gen1 cost = 0.0034 * \\\\param{pg1_{t}}^2 + 3 * \\\\param{pg1_{t}} + 30;\n- Gen2 cost = 0.001 * \\\\param{pg2_{t}}^2 + 10 * \\\\param{pg2_{t}} + 40;\n- Gen3 cost = 0.001 * \\\\param{pg3_{t}}^2 + 15 * \\\\param{pg3_{t}} + 70;\n- Renewable energy cost = 0.01 * (\\\\param{pv_{t}} + \\\\param{wind_{t}});\n- Sales revenue = 0.5 * \\\\param{price_{t}} * (excess energy);\n- Purchase cost = \\\\param{price_{t}} * (shortage energy);\n- Excess penalty = max(0, (excess energy - 100) * 50);\n- Shortage penalty = max(0, (shortage energy - 100) * 50).\n- The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.\n- Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.\n- Decisions made at each time step will update \\\\param{soc_{}}, \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.\n",
|
||||||
|
"parameters": {
|
||||||
|
"TotalBatteryCapacity": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "The total capacity of the battery",
|
||||||
|
"type": "int"
|
||||||
|
},
|
||||||
|
"InitialSoc": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial state of charge of the battery",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"InitialPg1": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial power of generator 1",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"InitialPg2": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial power of generator 2",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"InitialPg3": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial power of generator 3",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Price": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Electricity price at each time step",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Load": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Residential load at each time step",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Pv": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Photovoltaic power generation at each time step",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Wind": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Wind power generation at each time step",
|
||||||
|
"type": "float"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"objective": {
|
||||||
|
"description": "\"The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\"",
|
||||||
|
"formulation": null,
|
||||||
|
"code": null
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,135 @@
|
||||||
|
{
|
||||||
|
"description": "- The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.\n- The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\n- Electricity price, residential load, photovoltaic, and wind power generation are \\\\param{price_{}}, \\\\param{load_{}}, \\\\param{pv_{}}, and \\\\param{wind_{}}, each with 24 time steps representing hourly data.\n- Battery state of charge is \\\\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.\n- \\\\var{Apv_{}}, \\\\var{Asoc_{}}, \\\\var{Ag1_{}}, \\\\var{Ag2_{}}, and \\\\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.\n- Battery charging and discharging result in a capacity change of 100 * \\\\var{Asoc_{t}}.\n- Generators 1, 2, and 3 power change by 100 * \\\\var{Ag1_{t}}, 100 * \\\\var{Ag2_{t}}, and 200 * \\\\var{Ag3_{t}}.\n- \\\\param{soc_{t}} is calculated as (0.18 * \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.\n- \\\\param{pg1_{t}} is calculated as (100 * \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.\n- \\\\param{pg2_{t}} is calculated as (100 * \\\\var{Ag2_{t}} + \\\\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.\n- \\\\param{pg3_{t}} is calculated as (200 * \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.\n- Photovoltaic power is calculated as (\\\\param{pv_{t}} * (1 + \\\\var{Apv})).\n- Total power supply at each time step t = \\\\param{soc_{t}} + \\\\param{pg1_{t}} + \\\\param{pg2_{t}} + \\\\param{pg3_{t}}+ (photovoltaic power at time step t) + \\\\param{wind_{t}}.\n- Excess energy = total power supply - \\\\param{load_{t}}, and no less than 0.\n- Shortage energy = \\\\param{load_{t}} - total power supply, and no less than 0.\n- The total cost structure includes the following:\n- Battery cost = \\\\var{Asoc_{t}} + 0.1 * \\\\param{soc_{t}};\n- Gen1 cost = 0.0034 * \\\\param{pg1_{t}}^2 + 3 * \\\\param{pg1_{t}} + 30;\n- Gen2 cost = 0.001 * \\\\param{pg2_{t}}^2 + 10 * \\\\param{pg2_{t}} + 40;\n- Gen3 cost = 0.001 * \\\\param{pg3_{t}}^2 + 15 * \\\\param{pg3_{t}} + 70;\n- Renewable energy cost = 0.01 * (\\\\param{pv_{t}} + \\\\param{wind_{t}});\n- Sales revenue = 0.5 * \\\\param{price_{t}} * (excess energy);\n- Purchase cost = \\\\param{price_{t}} * (shortage energy);\n- Excess penalty = max(0, (excess energy - 100) * 50);\n- Shortage penalty = max(0, (shortage energy - 100) * 50).\n- The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.\n- Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.\n- Decisions made at each time step will update \\\\param{soc_{}}, \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.\n",
|
||||||
|
"parameters": {
|
||||||
|
"TotalBatteryCapacity": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "The total capacity of the battery",
|
||||||
|
"type": "int"
|
||||||
|
},
|
||||||
|
"InitialSoc": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial state of charge of the battery",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"InitialPg1": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial power of generator 1",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"InitialPg2": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial power of generator 2",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"InitialPg3": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial power of generator 3",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Price": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Electricity price at each time step",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Load": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Residential load at each time step",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Pv": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Photovoltaic power generation at each time step",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Wind": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Wind power generation at each time step",
|
||||||
|
"type": "float"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"objective": {
|
||||||
|
"description": "\"The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\"",
|
||||||
|
"formulation": null,
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
"constraints": [
|
||||||
|
{
|
||||||
|
"description": "Battery state of charge (soc_t) must be between 0.2 and 0.8",
|
||||||
|
"formulation": null,
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 1 power (pg1_t) must be between 10 and 150",
|
||||||
|
"formulation": null,
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 2 power (pg2_t) must be between 50 and 375",
|
||||||
|
"formulation": null,
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 3 power (pg3_t) must be between 100 and 500",
|
||||||
|
"formulation": null,
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Battery state of charge (soc_t) must be calculated as (0.18 * Asoc_t + soc_t-1)",
|
||||||
|
"formulation": null,
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 1 power (pg1_t) must be calculated as (100 * Ag1_t + pg1_t-1)",
|
||||||
|
"formulation": null,
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 2 power (pg2_t) must be calculated as (100 * Ag2_t + pg2_t-1)",
|
||||||
|
"formulation": null,
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 3 power (pg3_t) must be calculated as (200 * Ag3_t + pg3_t-1)",
|
||||||
|
"formulation": null,
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Photovoltaic power must be calculated as (pv_t * (1 + Apv))",
|
||||||
|
"formulation": null,
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Excess energy must be no less than 0",
|
||||||
|
"formulation": null,
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Shortage energy must be no less than 0",
|
||||||
|
"formulation": null,
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Adjustment factors (Apv, Asoc, Ag1, Ag2, Ag3) must range from -1 to 1",
|
||||||
|
"formulation": null,
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Battery charging and discharging must result in a capacity change of 100 * Asoc_t",
|
||||||
|
"formulation": null,
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "The sum of power from generators and renewable sources must not exceed the residential load by more than the grid transaction limit to avoid instability",
|
||||||
|
"formulation": null,
|
||||||
|
"code": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,217 @@
|
||||||
|
{
|
||||||
|
"description": "- The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.\n- The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\n- Electricity price, residential load, photovoltaic, and wind power generation are \\\\param{price_{}}, \\\\param{load_{}}, \\\\param{pv_{}}, and \\\\param{wind_{}}, each with 24 time steps representing hourly data.\n- Battery state of charge is \\\\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.\n- \\\\var{Apv_{}}, \\\\var{Asoc_{}}, \\\\var{Ag1_{}}, \\\\var{Ag2_{}}, and \\\\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.\n- Battery charging and discharging result in a capacity change of 100 * \\\\var{Asoc_{t}}.\n- Generators 1, 2, and 3 power change by 100 * \\\\var{Ag1_{t}}, 100 * \\\\var{Ag2_{t}}, and 200 * \\\\var{Ag3_{t}}.\n- \\\\param{soc_{t}} is calculated as (0.18 * \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.\n- \\\\param{pg1_{t}} is calculated as (100 * \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.\n- \\\\param{pg2_{t}} is calculated as (100 * \\\\var{Ag2_{t}} + \\\\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.\n- \\\\param{pg3_{t}} is calculated as (200 * \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.\n- Photovoltaic power is calculated as (\\\\param{pv_{t}} * (1 + \\\\var{Apv})).\n- Total power supply at each time step t = \\\\param{soc_{t}} + \\\\param{pg1_{t}} + \\\\param{pg2_{t}} + \\\\param{pg3_{t}}+ (photovoltaic power at time step t) + \\\\param{wind_{t}}.\n- Excess energy = total power supply - \\\\param{load_{t}}, and no less than 0.\n- Shortage energy = \\\\param{load_{t}} - total power supply, and no less than 0.\n- The total cost structure includes the following:\n- Battery cost = \\\\var{Asoc_{t}} + 0.1 * \\\\param{soc_{t}};\n- Gen1 cost = 0.0034 * \\\\param{pg1_{t}}^2 + 3 * \\\\param{pg1_{t}} + 30;\n- Gen2 cost = 0.001 * \\\\param{pg2_{t}}^2 + 10 * \\\\param{pg2_{t}} + 40;\n- Gen3 cost = 0.001 * \\\\param{pg3_{t}}^2 + 15 * \\\\param{pg3_{t}} + 70;\n- Renewable energy cost = 0.01 * (\\\\param{pv_{t}} + \\\\param{wind_{t}});\n- Sales revenue = 0.5 * \\\\param{price_{t}} * (excess energy);\n- Purchase cost = \\\\param{price_{t}} * (shortage energy);\n- Excess penalty = max(0, (excess energy - 100) * 50);\n- Shortage penalty = max(0, (shortage energy - 100) * 50).\n- The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.\n- Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.\n- Decisions made at each time step will update \\\\param{soc_{}}, \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.\n",
|
||||||
|
"parameters": {
|
||||||
|
"TotalBatteryCapacity": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "The total capacity of the battery",
|
||||||
|
"type": "int"
|
||||||
|
},
|
||||||
|
"InitialSoc": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial state of charge of the battery",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"InitialPg1": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial power of generator 1",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"InitialPg2": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial power of generator 2",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"InitialPg3": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial power of generator 3",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Price": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Electricity price at each time step",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Load": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Residential load at each time step",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Pv": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Photovoltaic power generation at each time step",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Wind": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Wind power generation at each time step",
|
||||||
|
"type": "float"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"objective": {
|
||||||
|
"description": "\"The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\"",
|
||||||
|
"formulation": null,
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
"constraints": [
|
||||||
|
{
|
||||||
|
"description": "Battery state of charge (soc_t) must be between 0.2 and 0.8",
|
||||||
|
"formulation": "$0.2 \\leq soc_t \\leq 0.8$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 1 power (pg1_t) must be between 10 and 150",
|
||||||
|
"formulation": "$10 \\leq pg1_t \\leq 150$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 2 power (pg2_t) must be between 50 and 375",
|
||||||
|
"formulation": "$50 \\leq pg2_t \\leq 375$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 3 power (pg3_t) must be between 100 and 500",
|
||||||
|
"formulation": "$100 \\\\leq pg3_t \\\\leq 500$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Battery state of charge (soc_t) must be calculated as (0.18 * Asoc_t + soc_t-1)",
|
||||||
|
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{soc_{t}} = 0.18 \\\\cdot \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 1 power (pg1_t) must be calculated as (100 * Ag1_t + pg1_t-1)",
|
||||||
|
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{pg1_{t}} = 100 \\\\cdot \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 2 power (pg2_t) must be calculated as (100 * Ag2_t + pg2_t-1)",
|
||||||
|
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{pg2_{t}} = 100 \\\\times Ag2_t + \\\\param{pg2_{t-1}}$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 3 power (pg3_t) must be calculated as (200 * Ag3_t + pg3_t-1)",
|
||||||
|
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\ldots,24\\\\}, \\\\param{pg3_{t}} = 200 \\\\cdot \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Photovoltaic power must be calculated as (pv_t * (1 + Apv))",
|
||||||
|
"formulation": "$\\\\forall t, PvPower_t = Pv[t] \\\\times (1 + Apv_t)$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Excess energy must be no less than 0",
|
||||||
|
"formulation": "$\\\\forall t, ExcessEnergy[t] \\\\geq 0$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "auxiliary constraint",
|
||||||
|
"formulation": "$\\\\forall t, ExcessEnergy[t] = TotalSupply[t] - Load[t]$"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "auxiliary constraint",
|
||||||
|
"formulation": "$\\\\forall t, TotalSupply[t] = soc[t] + pg1[t] + pg2[t] + pg3[t] + PvPower[t] + Wind[t]$"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Shortage energy must be no less than 0",
|
||||||
|
"formulation": "$\\\\forall t, ShortageEnergy[t] \\\\geq 0$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Adjustment factors (Apv, Asoc, Ag1, Ag2, Ag3) must range from -1 to 1",
|
||||||
|
"formulation": "$-1 \\leq Apv_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Asoc_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag1_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag2_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag3_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Battery charging and discharging must result in a capacity change of 100 * Asoc_t",
|
||||||
|
"formulation": "$\\\\forall t, 100 \\\\times Asoc_t = soc_{t} - soc_{t-1}$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "The sum of power from generators and renewable sources must not exceed the residential load by more than the grid transaction limit to avoid instability",
|
||||||
|
"formulation": "$\\\\forall t, TotalPowerSupply[t] \\\\leq Load[t] + GridTransactionLimit$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "auxiliary constraint",
|
||||||
|
"formulation": "$\\\\forall t, TotalPowerSupply[t] = Pg1[t] + Pg2[t] + Pg3[t] + PvPower[t] + Wind[t]$"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"variables": {
|
||||||
|
"Ag1": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "Adjustment factor for generator 1 power at each time step"
|
||||||
|
},
|
||||||
|
"Ag2": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "Adjustment factor for generator 2 power at each time step"
|
||||||
|
},
|
||||||
|
"Ag3": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "Adjustment factor for generator 3 power at each time step"
|
||||||
|
},
|
||||||
|
"PvPower": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "The adjusted photovoltaic power generation at each time step"
|
||||||
|
},
|
||||||
|
"Apv": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "The adjustment factor for photovoltaic power at each time step"
|
||||||
|
},
|
||||||
|
"ExcessEnergy": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "The excess energy at each time step"
|
||||||
|
},
|
||||||
|
"ShortageEnergy": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "The shortage energy at each time step"
|
||||||
|
},
|
||||||
|
"Asoc": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "The adjustment factor for the battery state of charge at each time step"
|
||||||
|
},
|
||||||
|
"TotalPowerSupply": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "The total power supply from generators and renewable sources at each time step"
|
||||||
|
},
|
||||||
|
"GridTransactionLimit": {
|
||||||
|
"shape": [],
|
||||||
|
"type": "float",
|
||||||
|
"definition": "The maximum allowable excess power supply over the residential load to maintain grid stability"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,216 @@
|
||||||
|
{
|
||||||
|
"description": "- The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.\n- The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\n- Electricity price, residential load, photovoltaic, and wind power generation are \\\\param{price_{}}, \\\\param{load_{}}, \\\\param{pv_{}}, and \\\\param{wind_{}}, each with 24 time steps representing hourly data.\n- Battery state of charge is \\\\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.\n- \\\\var{Apv_{}}, \\\\var{Asoc_{}}, \\\\var{Ag1_{}}, \\\\var{Ag2_{}}, and \\\\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.\n- Battery charging and discharging result in a capacity change of 100 * \\\\var{Asoc_{t}}.\n- Generators 1, 2, and 3 power change by 100 * \\\\var{Ag1_{t}}, 100 * \\\\var{Ag2_{t}}, and 200 * \\\\var{Ag3_{t}}.\n- \\\\param{soc_{t}} is calculated as (0.18 * \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.\n- \\\\param{pg1_{t}} is calculated as (100 * \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.\n- \\\\param{pg2_{t}} is calculated as (100 * \\\\var{Ag2_{t}} + \\\\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.\n- \\\\param{pg3_{t}} is calculated as (200 * \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.\n- Photovoltaic power is calculated as (\\\\param{pv_{t}} * (1 + \\\\var{Apv})).\n- Total power supply at each time step t = \\\\param{soc_{t}} + \\\\param{pg1_{t}} + \\\\param{pg2_{t}} + \\\\param{pg3_{t}}+ (photovoltaic power at time step t) + \\\\param{wind_{t}}.\n- Excess energy = total power supply - \\\\param{load_{t}}, and no less than 0.\n- Shortage energy = \\\\param{load_{t}} - total power supply, and no less than 0.\n- The total cost structure includes the following:\n- Battery cost = \\\\var{Asoc_{t}} + 0.1 * \\\\param{soc_{t}};\n- Gen1 cost = 0.0034 * \\\\param{pg1_{t}}^2 + 3 * \\\\param{pg1_{t}} + 30;\n- Gen2 cost = 0.001 * \\\\param{pg2_{t}}^2 + 10 * \\\\param{pg2_{t}} + 40;\n- Gen3 cost = 0.001 * \\\\param{pg3_{t}}^2 + 15 * \\\\param{pg3_{t}} + 70;\n- Renewable energy cost = 0.01 * (\\\\param{pv_{t}} + \\\\param{wind_{t}});\n- Sales revenue = 0.5 * \\\\param{price_{t}} * (excess energy);\n- Purchase cost = \\\\param{price_{t}} * (shortage energy);\n- Excess penalty = max(0, (excess energy - 100) * 50);\n- Shortage penalty = max(0, (shortage energy - 100) * 50).\n- The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.\n- Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.\n- Decisions made at each time step will update \\\\param{soc_{}}, \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.\n",
|
||||||
|
"parameters": {
|
||||||
|
"TotalBatteryCapacity": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "The total capacity of the battery",
|
||||||
|
"type": "int"
|
||||||
|
},
|
||||||
|
"InitialSoc": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial state of charge of the battery",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"InitialPg1": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial power of generator 1",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"InitialPg2": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial power of generator 2",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"InitialPg3": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial power of generator 3",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Price": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Electricity price at each time step",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Load": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Residential load at each time step",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Pv": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Photovoltaic power generation at each time step",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Wind": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Wind power generation at each time step",
|
||||||
|
"type": "float"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"objective": {
|
||||||
|
"description": "\"The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\"",
|
||||||
|
"formulation": "$\\min \\sum_{t=1}^{24} \\left( \\text{Battery cost}_t + \\text{Gen1 cost}_t + \\text{Gen2 cost}_t + \\text{Gen3 cost}_t + \\text{Renewable energy cost}_t + \\text{Excess penalty}_t + \\text{Shortage penalty}_t - \\text{Sales revenue}_t + \\text{Purchase cost}_t + |\\text{ExcessEnergy}_t| + |\\text{ShortageEnergy}_t| \\right)$"
|
||||||
|
},
|
||||||
|
"constraints": [
|
||||||
|
{
|
||||||
|
"description": "Battery state of charge (soc_t) must be between 0.2 and 0.8",
|
||||||
|
"formulation": "$0.2 \\leq soc_t \\leq 0.8$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 1 power (pg1_t) must be between 10 and 150",
|
||||||
|
"formulation": "$10 \\leq pg1_t \\leq 150$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 2 power (pg2_t) must be between 50 and 375",
|
||||||
|
"formulation": "$50 \\leq pg2_t \\leq 375$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 3 power (pg3_t) must be between 100 and 500",
|
||||||
|
"formulation": "$100 \\\\leq pg3_t \\\\leq 500$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Battery state of charge (soc_t) must be calculated as (0.18 * Asoc_t + soc_t-1)",
|
||||||
|
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{soc_{t}} = 0.18 \\\\cdot \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 1 power (pg1_t) must be calculated as (100 * Ag1_t + pg1_t-1)",
|
||||||
|
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{pg1_{t}} = 100 \\\\cdot \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 2 power (pg2_t) must be calculated as (100 * Ag2_t + pg2_t-1)",
|
||||||
|
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{pg2_{t}} = 100 \\\\times Ag2_t + \\\\param{pg2_{t-1}}$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 3 power (pg3_t) must be calculated as (200 * Ag3_t + pg3_t-1)",
|
||||||
|
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\ldots,24\\\\}, \\\\param{pg3_{t}} = 200 \\\\cdot \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Photovoltaic power must be calculated as (pv_t * (1 + Apv))",
|
||||||
|
"formulation": "$\\\\forall t, PvPower_t = Pv[t] \\\\times (1 + Apv_t)$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Excess energy must be no less than 0",
|
||||||
|
"formulation": "$\\\\forall t, ExcessEnergy[t] \\\\geq 0$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "auxiliary constraint",
|
||||||
|
"formulation": "$\\\\forall t, ExcessEnergy[t] = TotalSupply[t] - Load[t]$"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "auxiliary constraint",
|
||||||
|
"formulation": "$\\\\forall t, TotalSupply[t] = soc[t] + pg1[t] + pg2[t] + pg3[t] + PvPower[t] + Wind[t]$"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Shortage energy must be no less than 0",
|
||||||
|
"formulation": "$\\\\forall t, ShortageEnergy[t] \\\\geq 0$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Adjustment factors (Apv, Asoc, Ag1, Ag2, Ag3) must range from -1 to 1",
|
||||||
|
"formulation": "$-1 \\leq Apv_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Asoc_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag1_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag2_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag3_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Battery charging and discharging must result in a capacity change of 100 * Asoc_t",
|
||||||
|
"formulation": "$\\\\forall t, 100 \\\\times Asoc_t = soc_{t} - soc_{t-1}$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "The sum of power from generators and renewable sources must not exceed the residential load by more than the grid transaction limit to avoid instability",
|
||||||
|
"formulation": "$\\\\forall t, TotalPowerSupply[t] \\\\leq Load[t] + GridTransactionLimit$",
|
||||||
|
"code": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "auxiliary constraint",
|
||||||
|
"formulation": "$\\\\forall t, TotalPowerSupply[t] = Pg1[t] + Pg2[t] + Pg3[t] + PvPower[t] + Wind[t]$"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"variables": {
|
||||||
|
"Ag1": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "Adjustment factor for generator 1 power at each time step"
|
||||||
|
},
|
||||||
|
"Ag2": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "Adjustment factor for generator 2 power at each time step"
|
||||||
|
},
|
||||||
|
"Ag3": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "Adjustment factor for generator 3 power at each time step"
|
||||||
|
},
|
||||||
|
"PvPower": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "The adjusted photovoltaic power generation at each time step"
|
||||||
|
},
|
||||||
|
"Apv": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "The adjustment factor for photovoltaic power at each time step"
|
||||||
|
},
|
||||||
|
"ExcessEnergy": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "The excess energy at each time step"
|
||||||
|
},
|
||||||
|
"ShortageEnergy": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "The shortage energy at each time step"
|
||||||
|
},
|
||||||
|
"Asoc": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "The adjustment factor for the battery state of charge at each time step"
|
||||||
|
},
|
||||||
|
"TotalPowerSupply": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "The total power supply from generators and renewable sources at each time step"
|
||||||
|
},
|
||||||
|
"GridTransactionLimit": {
|
||||||
|
"shape": [],
|
||||||
|
"type": "float",
|
||||||
|
"definition": "The maximum allowable excess power supply over the residential load to maintain grid stability"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,220 @@
|
||||||
|
{
|
||||||
|
"description": "- The energy system includes photovoltaic power stations, wind power stations, generators, batteries, and residential buildings.\n- The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\n- Electricity price, residential load, photovoltaic, and wind power generation are \\\\param{price_{}}, \\\\param{load_{}}, \\\\param{pv_{}}, and \\\\param{wind_{}}, each with 24 time steps representing hourly data.\n- Battery state of charge is \\\\param{soc_{}}, with a total battery capacity of 500. Power of generators 1, 2, and 3 is \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, respectively. The initial values at time step 0 are provided, and the system will update these values at each subsequent time step based on decisions made.\n- \\\\var{Apv_{}}, \\\\var{Asoc_{}}, \\\\var{Ag1_{}}, \\\\var{Ag2_{}}, and \\\\var{Ag3_{}} are adjustment factors ranging from -1 to 1, changing according to the value from the previous time step and affecting system parameters such as power generation and battery capacity.\n- Battery charging and discharging result in a capacity change of 100 * \\\\var{Asoc_{t}}.\n- Generators 1, 2, and 3 power change by 100 * \\\\var{Ag1_{t}}, 100 * \\\\var{Ag2_{t}}, and 200 * \\\\var{Ag3_{t}}.\n- \\\\param{soc_{t}} is calculated as (0.18 * \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}), with a maximum of 0.8 and a minimum of 0.2.\n- \\\\param{pg1_{t}} is calculated as (100 * \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}), with a maximum of 150 and a minimum of 10.\n- \\\\param{pg2_{t}} is calculated as (100 * \\\\var{Ag2_{t}} + \\\\param{pg2_{t-1}}), with a maximum of 375 and a minimum of 50.\n- \\\\param{pg3_{t}} is calculated as (200 * \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}), with a maximum of 500 and a minimum of 100.\n- Photovoltaic power is calculated as (\\\\param{pv_{t}} * (1 + \\\\var{Apv})).\n- Total power supply at each time step t = \\\\param{soc_{t}} + \\\\param{pg1_{t}} + \\\\param{pg2_{t}} + \\\\param{pg3_{t}}+ (photovoltaic power at time step t) + \\\\param{wind_{t}}.\n- Excess energy = total power supply - \\\\param{load_{t}}, and no less than 0.\n- Shortage energy = \\\\param{load_{t}} - total power supply, and no less than 0.\n- The total cost structure includes the following:\n- Battery cost = \\\\var{Asoc_{t}} + 0.1 * \\\\param{soc_{t}};\n- Gen1 cost = 0.0034 * \\\\param{pg1_{t}}^2 + 3 * \\\\param{pg1_{t}} + 30;\n- Gen2 cost = 0.001 * \\\\param{pg2_{t}}^2 + 10 * \\\\param{pg2_{t}} + 40;\n- Gen3 cost = 0.001 * \\\\param{pg3_{t}}^2 + 15 * \\\\param{pg3_{t}} + 70;\n- Renewable energy cost = 0.01 * (\\\\param{pv_{t}} + \\\\param{wind_{t}});\n- Sales revenue = 0.5 * \\\\param{price_{t}} * (excess energy);\n- Purchase cost = \\\\param{price_{t}} * (shortage energy);\n- Excess penalty = max(0, (excess energy - 100) * 50);\n- Shortage penalty = max(0, (shortage energy - 100) * 50).\n- The cost of each power generation module should be considered when supplying power, usually starting the subsequent generator when the power of the previous generator is insufficient to meet the load.\n- Minimize transactions exceeding the power grid transaction limit to avoid power grid fluctuations affecting the stability of residential electricity consumption.\n- Decisions made at each time step will update \\\\param{soc_{}}, \\\\param{pg1_{}}, \\\\param{pg2_{}}, and \\\\param{pg3_{}}, and these updated values will serve as the starting point for calculations in the next time step.\n",
|
||||||
|
"parameters": {
|
||||||
|
"TotalBatteryCapacity": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "The total capacity of the battery",
|
||||||
|
"type": "int"
|
||||||
|
},
|
||||||
|
"InitialSoc": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial state of charge of the battery",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"InitialPg1": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial power of generator 1",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"InitialPg2": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial power of generator 2",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"InitialPg3": {
|
||||||
|
"shape": [],
|
||||||
|
"definition": "Initial power of generator 3",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Price": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Electricity price at each time step",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Load": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Residential load at each time step",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Pv": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Photovoltaic power generation at each time step",
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"Wind": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"definition": "Wind power generation at each time step",
|
||||||
|
"type": "float"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"objective": {
|
||||||
|
"description": "\"The goal is to minimize the total costs of the energy system and the gap between power supply and residential load.\"",
|
||||||
|
"formulation": "$\\min \\sum_{t=1}^{24} \\left( \\text{Battery cost}_t + \\text{Gen1 cost}_t + \\text{Gen2 cost}_t + \\text{Gen3 cost}_t + \\text{Renewable energy cost}_t + \\text{Excess penalty}_t + \\text{Shortage penalty}_t - \\text{Sales revenue}_t + \\text{Purchase cost}_t + |\\text{ExcessEnergy}_t| + |\\text{ShortageEnergy}_t| \\right)$",
|
||||||
|
"code": "model.setObjective(\n quicksum(\n (Asoc[t] + 0.1 * soc[t-1] +\n 0.0034 * pg1[t-1] ** 2 + 3 * pg1[t-1] + 30 +\n 0.001 * pg2[t-1] ** 2 + 10 * pg2[t-1] + 40 +\n 0.001 * pg3[t-1] ** 2 + 15 * pg3[t-1] + 70 +\n 0.01 * (pv[t-1] + wind[t-1]) +\n max(0, (ExcessEnergy[t] - 100) * 50) +\n max(0, (ShortageEnergy[t] - 100) * 50) -\n 0.5 * Price[t-1] * ExcessEnergy[t] +\n Price[t-1] * ShortageEnergy[t] +\n abs(ExcessEnergy[t]) + abs(ShortageEnergy[t])\n ) for t in range(1, 25)\n ),\n GRB.MINIMIZE\n)"
|
||||||
|
},
|
||||||
|
"constraints": [
|
||||||
|
{
|
||||||
|
"description": "Battery state of charge (soc_t) must be between 0.2 and 0.8",
|
||||||
|
"formulation": "$0.2 \\leq soc_t \\leq 0.8$",
|
||||||
|
"code": "# Preceding state of charge variable initialization\nsoc = InitialSoc\n\n# Loop over each time step to add the soc constraints\nfor t in range(24):\n # Calculate soc_t based on the previous soc and the adjustment factor\n soc_t = 0.18 * Asoc[t] + soc\n \n # Add the lower bound constraint for soc_t\n model.addConstr(soc_t >= 0.2)\n \n # Add the upper bound constraint for soc_t\n model.addConstr(soc_t <= 0.8)\n \n # Update soc to the newly calculated soc_t for the next iteration\n soc = soc_t"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 1 power (pg1_t) must be between 10 and 150",
|
||||||
|
"formulation": "$10 \\leq pg1_t \\leq 150$",
|
||||||
|
"code": "# Assuming decision variables Pg1[t] have been defined for each time step t\nfor t in range(24):\n if t == 0:\n # For the first time step, use the initial value of Pg1\n model.addConstr(Pg1[t] <= 150)\n model.addConstr(Pg1[t] >= 10)\n else:\n # For subsequent time steps, calculate Pg1 based on the previous time step and Ag1\n model.addConstr(Pg1[t] <= 150)\n model.addConstr(Pg1[t] >= 10)\n model.addConstr(Pg1[t] == 100 * Ag1[t] + Pg1[t-1])"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 2 power (pg2_t) must be between 50 and 375",
|
||||||
|
"formulation": "$50 \\leq pg2_t \\leq 375$",
|
||||||
|
"code": "for t in range(1, 24):\n model.addConstr(InitialPg2 + 100 * Ag2[t] >= 50)\n model.addConstr(InitialPg2 + 100 * Ag2[t] <= 375)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 3 power (pg3_t) must be between 100 and 500",
|
||||||
|
"formulation": "$100 \\\\leq pg3_t \\\\leq 500$",
|
||||||
|
"code": "for t in range(24):\n model.addConstr(Pg3[t] >= 100) # Lower bound constraint for each time step\n model.addConstr(Pg3[t] <= 500) # Upper bound constraint for each time step"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Battery state of charge (soc_t) must be calculated as (0.18 * Asoc_t + soc_t-1)",
|
||||||
|
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{soc_{t}} = 0.18 \\\\cdot \\\\var{Asoc_{t}} + \\\\param{soc_{t-1}}$",
|
||||||
|
"code": "# Assuming soc is defined as a list of decision variables for each time step\nsoc = model.addVars(24, lb=0.2, ub=0.8, vtype=GRB.CONTINUOUS, name=\"soc\")\n\n# Loop over each time step starting from 1 up to 24\nfor t in range(1, 24):\n # Add the constraint for the state of charge calculation\n model.addConstr(soc[t] == 0.18 * Asoc[t] + soc[t-1])"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 1 power (pg1_t) must be calculated as (100 * Ag1_t + pg1_t-1)",
|
||||||
|
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{pg1_{t}} = 100 \\\\cdot \\\\var{Ag1_{t}} + \\\\param{pg1_{t-1}}$",
|
||||||
|
"code": "for t in range(1, 24):\n model.addConstr(pg1[t] == 100 * Ag1[t] + pg1[t-1])"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 2 power (pg2_t) must be calculated as (100 * Ag2_t + pg2_t-1)",
|
||||||
|
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\dots,24\\\\}, \\\\param{pg2_{t}} = 100 \\\\times Ag2_t + \\\\param{pg2_{t-1}}$",
|
||||||
|
"code": "# Assuming that `pg2` is a list of Gurobi variables representing the power of Generator 2 at each time step\n# and that `InitialPg2` is a parameter representing the initial power of Generator 2 at time step 0\n\n# Define the Generator 2 power for the first time step, which is the initial value and does not have a constraint\npg2 = [model.addVar(vtype=GRB.CONTINUOUS, lb=50, ub=375, name=\"pg2_0\")]\n\n# Now add the constraints for time steps 1 to 23\nfor t in range(1, 24):\n pg2.append(model.addVar(vtype=GRB.CONTINUOUS, lb=50, ub=375, name=f\"pg2_{t}\"))\n model.addConstr(pg2[t] == 100 * Ag2[t-1] + pg2[t-1])"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Generator 3 power (pg3_t) must be calculated as (200 * Ag3_t + pg3_t-1)",
|
||||||
|
"formulation": "$\\\\forall t \\\\in \\\\{1,\\\\ldots,24\\\\}, \\\\param{pg3_{t}} = 200 \\\\cdot \\\\var{Ag3_{t}} + \\\\param{pg3_{t-1}}$",
|
||||||
|
"code": "# Assuming Pg3 is defined as a list of decision variables representing power of generator 3 at each time step\nfor t in range(1, 24):\n model.addConstr(Pg3[t] == 200 * Ag3[t] + Pg3[t-1])"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Photovoltaic power must be calculated as (pv_t * (1 + Apv))",
|
||||||
|
"formulation": "$\\\\forall t, PvPower_t = Pv[t] \\\\times (1 + Apv_t)$",
|
||||||
|
"code": "for t in range(24):\n model.addConstr(PvPower[t] == Pv[t] * (1 + Apv[t]))"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Excess energy must be no less than 0",
|
||||||
|
"formulation": "$\\\\forall t, ExcessEnergy[t] \\\\geq 0$",
|
||||||
|
"code": "for t in range(24):\n model.addConstr(ExcessEnergy[t] >= 0)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "auxiliary constraint",
|
||||||
|
"formulation": "$\\\\forall t, ExcessEnergy[t] = TotalSupply[t] - Load[t]$",
|
||||||
|
"code": "for t in range(24):\n # Define the total power supply at time step t\n TotalSupply_t = (InitialPg1 + Ag1[t] * 100 +\n InitialPg2 + Ag2[t] * 100 +\n InitialPg3 + Ag3[t] * 200 +\n Pv[t] * (1 + Apv[t]) +\n Wind[t] +\n InitialSoc + Asoc[t] * 0.18)\n # Add the constraint for excess energy at time step t\n model.addConstr(ExcessEnergy[t] == TotalSupply_t - Load[t])"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "auxiliary constraint",
|
||||||
|
"formulation": "$\\\\forall t, TotalSupply[t] = soc[t] + pg1[t] + pg2[t] + pg3[t] + PvPower[t] + Wind[t]$",
|
||||||
|
"code": "for t in range(24):\n # Calculate updated generator powers and battery state of charge\n soc_t = InitialSoc + 0.18 * Asoc[t] if t == 0 else soc[t-1] + 0.18 * Asoc[t]\n pg1_t = InitialPg1 + 100 * Ag1[t] if t == 0 else pg1[t-1] + 100 * Ag1[t]\n pg2_t = InitialPg2 + 100 * Ag2[t] if t == 0 else pg2[t-1] + 100 * Ag2[t]\n pg3_t = InitialPg3 + 200 * Ag3[t] if t == 0 else pg3[t-1] + 200 * Ag3[t]\n\n # Calculate photovoltaic power\n PvPower_t = Pv[t] * (1 + Apv[t])\n\n # Add the constraint for total power supply at time t\n model.addConstr(TotalPowerSupply[t] == soc_t + pg1_t + pg2_t + pg3_t + PvPower_t + Wind[t])"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Shortage energy must be no less than 0",
|
||||||
|
"formulation": "$\\\\forall t, ShortageEnergy[t] \\\\geq 0$",
|
||||||
|
"code": "for t in range(24):\n model.addConstr(ShortageEnergy[t] >= 0)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Adjustment factors (Apv, Asoc, Ag1, Ag2, Ag3) must range from -1 to 1",
|
||||||
|
"formulation": "$-1 \\leq Apv_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Asoc_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag1_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag2_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$\\n$-1 \\leq Ag3_t \\leq 1 \\\\quad \\\\forall t \\\\in \\\\{1, \\\\dots, 24\\\\}$",
|
||||||
|
"code": "for t in range(24):\n model.addConstr(-1 <= Apv[t])\n model.addConstr(Apv[t] <= 1)\n model.addConstr(-1 <= Asoc[t])\n model.addConstr(Asoc[t] <= 1)\n model.addConstr(-1 <= Ag1[t])\n model.addConstr(Ag1[t] <= 1)\n model.addConstr(-1 <= Ag2[t])\n model.addConstr(Ag2[t] <= 1)\n model.addConstr(-1 <= Ag3[t])\n model.addConstr(Ag3[t] <= 1)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Battery charging and discharging must result in a capacity change of 100 * Asoc_t",
|
||||||
|
"formulation": "$\\\\forall t, 100 \\\\times Asoc_t = soc_{t} - soc_{t-1}$",
|
||||||
|
"code": "for t in range(1, 24):\n model.addConstr(100 * Asoc[t] == soc[t] - soc[t-1])"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "The sum of power from generators and renewable sources must not exceed the residential load by more than the grid transaction limit to avoid instability",
|
||||||
|
"formulation": "$\\\\forall t, TotalPowerSupply[t] \\\\leq Load[t] + GridTransactionLimit$",
|
||||||
|
"code": "for t in range(24):\n # Calculate the power from each generator based on previous state and adjustment\n pg1_power = InitialPg1 + 100 * Ag1[t]\n pg2_power = InitialPg2 + 100 * Ag2[t]\n pg3_power = InitialPg3 + 200 * Ag3[t]\n\n # Calculate the adjusted photovoltaic power\n pv_power = Pv[t] * (1 + Apv[t])\n\n # Express the total power supply\n TotalPowerSupply[t] = pg1_power + pg2_power + pg3_power + pv_power + Wind[t]\n\n # Add the constraint that total power supply cannot exceed the load by more than the grid transaction limit\n model.addConstr(TotalPowerSupply[t] <= Load[t] + GridTransactionLimit)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "auxiliary constraint",
|
||||||
|
"formulation": "$\\\\forall t, TotalPowerSupply[t] = Pg1[t] + Pg2[t] + Pg3[t] + PvPower[t] + Wind[t]$",
|
||||||
|
"code": "for t in range(24):\n # Assuming that Pg1, Pg2, Pg3 are defined as variables in the model with the correct initial values and adjustments\n Pg1[t] = model.addVar(vtype=GRB.CONTINUOUS, name=f\"Pg1_{t}\")\n Pg2[t] = model.addVar(vtype=GRB.CONTINUOUS, name=f\"Pg2_{t}\")\n Pg3[t] = model.addVar(vtype=GRB.CONTINUOUS, name=f\"Pg3_{t}\")\n\n # Define the auxiliary constraint for each time step\n model.addConstr(TotalPowerSupply[t] == Pg1[t] + Pg2[t] + Pg3[t] + PvPower[t] + Wind[t])"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"variables": {
|
||||||
|
"Ag1": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "Adjustment factor for generator 1 power at each time step"
|
||||||
|
},
|
||||||
|
"Ag2": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "Adjustment factor for generator 2 power at each time step"
|
||||||
|
},
|
||||||
|
"Ag3": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "Adjustment factor for generator 3 power at each time step"
|
||||||
|
},
|
||||||
|
"PvPower": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "The adjusted photovoltaic power generation at each time step"
|
||||||
|
},
|
||||||
|
"Apv": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "The adjustment factor for photovoltaic power at each time step"
|
||||||
|
},
|
||||||
|
"ExcessEnergy": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "The excess energy at each time step"
|
||||||
|
},
|
||||||
|
"ShortageEnergy": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "The shortage energy at each time step"
|
||||||
|
},
|
||||||
|
"Asoc": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "The adjustment factor for the battery state of charge at each time step"
|
||||||
|
},
|
||||||
|
"TotalPowerSupply": {
|
||||||
|
"shape": [
|
||||||
|
24
|
||||||
|
],
|
||||||
|
"type": "continuous",
|
||||||
|
"definition": "The total power supply from generators and renewable sources at each time step"
|
||||||
|
},
|
||||||
|
"GridTransactionLimit": {
|
||||||
|
"shape": [],
|
||||||
|
"type": "float",
|
||||||
|
"definition": "The maximum allowable excess power supply over the residential load to maintain grid stability"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|