update logic
This commit is contained in:
parent
d811d28ac7
commit
741fba6cd5
4
PPO.py
4
PPO.py
|
@ -330,8 +330,8 @@ if __name__ == '__main__':
|
||||||
buffer = list()
|
buffer = list()
|
||||||
'''init training parameters'''
|
'''init training parameters'''
|
||||||
num_episode = args.num_episode
|
num_episode = args.num_episode
|
||||||
args.train = False
|
# args.train = False
|
||||||
args.save_network = False
|
# args.save_network = False
|
||||||
# args.test_network = False
|
# args.test_network = False
|
||||||
# args.save_test_data = False
|
# args.save_test_data = False
|
||||||
# args.compare_with_gurobi = False
|
# args.compare_with_gurobi = False
|
||||||
|
|
|
@ -133,14 +133,10 @@ class AgentPPO:
|
||||||
action_rl, noise = self.act.get_action(states[0])
|
action_rl, noise = self.act.get_action(states[0])
|
||||||
action_rl = action_rl.detach().cpu().numpy().flatten()
|
action_rl = action_rl.detach().cpu().numpy().flatten()
|
||||||
noises = noise.detach().cpu().numpy().flatten()
|
noises = noise.detach().cpu().numpy().flatten()
|
||||||
# print(f"Action from RL model: {action_rl}")
|
|
||||||
# print(f"Noise: {noise}")
|
|
||||||
# print(f"Expected action dimension: {self.action_dim}")
|
|
||||||
index = self.current_step % len(self.llm_actions)
|
index = self.current_step % len(self.llm_actions)
|
||||||
self.current_step += 1
|
self.current_step += 1
|
||||||
action_llm = self.llm_actions[index]
|
action_llm = self.llm_actions[index]
|
||||||
action_llm = np.array(action_llm, dtype=np.float32)
|
action_llm = np.array(action_llm, dtype=np.float32)
|
||||||
# print(f"Action from LLM: {action_llm}")
|
|
||||||
action_combined = 0.5 * action_rl + 0.5 * action_llm
|
action_combined = 0.5 * action_rl + 0.5 * action_llm
|
||||||
if action_combined.shape[0] != self.action_dim:
|
if action_combined.shape[0] != self.action_dim:
|
||||||
raise ValueError("Combined action dimension mismatch. Check the action generation process.")
|
raise ValueError("Combined action dimension mismatch. Check the action generation process.")
|
||||||
|
|
|
@ -0,0 +1,393 @@
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
from copy import deepcopy
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
from environment import ESSEnv
|
||||||
|
from tools import get_episode_return, test_one_episode, optimization_base_result
|
||||||
|
|
||||||
|
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
|
||||||
|
|
||||||
|
|
||||||
|
class ActorPPO(nn.Module):
|
||||||
|
def __init__(self, mid_dim, state_dim, action_dim, layer_norm=False):
|
||||||
|
super().__init__()
|
||||||
|
self.layer_norm = layer_norm
|
||||||
|
self.net = nn.Sequential(
|
||||||
|
nn.Linear(state_dim, mid_dim), nn.ReLU(),
|
||||||
|
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
|
||||||
|
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
||||||
|
nn.Linear(mid_dim, action_dim)
|
||||||
|
)
|
||||||
|
self.a_logstd = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True)
|
||||||
|
self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi))
|
||||||
|
|
||||||
|
if self.layer_norm:
|
||||||
|
self.apply_layer_norm()
|
||||||
|
|
||||||
|
def apply_layer_norm(self):
|
||||||
|
def init_weights(layer):
|
||||||
|
if isinstance(layer, nn.Linear):
|
||||||
|
nn.init.orthogonal_(layer.weight, 1.0)
|
||||||
|
nn.init.constant_(layer.bias, 0.0)
|
||||||
|
|
||||||
|
self.net.apply(init_weights)
|
||||||
|
|
||||||
|
def forward(self, state):
|
||||||
|
return self.net(state).tanh()
|
||||||
|
|
||||||
|
def get_action(self, state):
|
||||||
|
a_avg = self.forward(state)
|
||||||
|
a_std = self.a_logstd.exp()
|
||||||
|
noise = torch.randn_like(a_avg)
|
||||||
|
action = a_avg + noise * a_std
|
||||||
|
return action, noise
|
||||||
|
|
||||||
|
def get_logprob_entropy(self, state, action):
|
||||||
|
a_avg = self.forward(state)
|
||||||
|
a_std = self.a_logstd.exp()
|
||||||
|
delta = ((a_avg - action) / a_std).pow(2) * 0.5
|
||||||
|
logprob = -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1)
|
||||||
|
dist_entropy = (logprob.exp() * logprob).mean()
|
||||||
|
return logprob, dist_entropy
|
||||||
|
|
||||||
|
def get_old_logprob(self, _action, noise):
|
||||||
|
delta = noise.pow(2) * 0.5
|
||||||
|
return -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1)
|
||||||
|
|
||||||
|
|
||||||
|
class CriticAdv(nn.Module):
|
||||||
|
def __init__(self, mid_dim, state_dim, _action_dim, layer_norm=False):
|
||||||
|
super().__init__()
|
||||||
|
self.layer_norm = layer_norm
|
||||||
|
self.net = nn.Sequential(
|
||||||
|
nn.Linear(state_dim, mid_dim), nn.ReLU(),
|
||||||
|
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
|
||||||
|
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
||||||
|
nn.Linear(mid_dim, 1)
|
||||||
|
)
|
||||||
|
if self.layer_norm:
|
||||||
|
self.apply_layer_norm()
|
||||||
|
|
||||||
|
def apply_layer_norm(self):
|
||||||
|
def init_weights(layer):
|
||||||
|
if isinstance(layer, nn.Linear):
|
||||||
|
nn.init.orthogonal_(layer.weight, 1.0)
|
||||||
|
nn.init.constant_(layer.bias, 0.0)
|
||||||
|
|
||||||
|
self.net.apply(init_weights)
|
||||||
|
|
||||||
|
def forward(self, state):
|
||||||
|
return self.net(state)
|
||||||
|
|
||||||
|
|
||||||
|
class AgentPrimalDualPPO:
|
||||||
|
def __init__(self):
|
||||||
|
self.state = None
|
||||||
|
self.device = None
|
||||||
|
self.action_dim = None
|
||||||
|
self.get_obj_critic = None
|
||||||
|
|
||||||
|
self.criterion = torch.nn.SmoothL1Loss()
|
||||||
|
self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
|
||||||
|
self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
|
||||||
|
|
||||||
|
self.ClassCri = CriticAdv
|
||||||
|
self.ClassAct = ActorPPO
|
||||||
|
|
||||||
|
self.ratio_clip = 0.2
|
||||||
|
self.lambda_entropy = 0.02
|
||||||
|
self.lambda_gae_adv = 0.98
|
||||||
|
self.get_reward_sum = None
|
||||||
|
self.trajectory_list = None
|
||||||
|
|
||||||
|
self.lambda_cost = 1.0 # 初始对偶变量
|
||||||
|
self.constraint_value = 1.0 # 约束值,例如安全成本限制
|
||||||
|
|
||||||
|
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=False, gpu_id=0, layer_norm=False):
|
||||||
|
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
|
||||||
|
self.trajectory_list = list()
|
||||||
|
self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
|
||||||
|
|
||||||
|
self.cri = self.ClassCri(net_dim, state_dim, action_dim, layer_norm).to(self.device)
|
||||||
|
self.act = self.ClassAct(net_dim, state_dim, action_dim, layer_norm).to(
|
||||||
|
self.device) if self.ClassAct else self.cri
|
||||||
|
self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
|
||||||
|
self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
|
||||||
|
|
||||||
|
self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
|
||||||
|
self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
|
||||||
|
|
||||||
|
def select_action(self, state):
|
||||||
|
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
|
||||||
|
actions, noises = self.act.get_action(states)
|
||||||
|
return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
|
||||||
|
|
||||||
|
def explore_env(self, env, target_step):
|
||||||
|
state = self.state
|
||||||
|
trajectory_temp = list()
|
||||||
|
last_done = 0
|
||||||
|
for i in range(target_step):
|
||||||
|
action, noise = self.select_action(state)
|
||||||
|
state, next_state, reward, done, cost = env.step(np.tanh(action))
|
||||||
|
trajectory_temp.append((state, reward, done, action, noise, cost))
|
||||||
|
if done:
|
||||||
|
state = env.reset()
|
||||||
|
last_done = i
|
||||||
|
else:
|
||||||
|
state = next_state
|
||||||
|
self.state = state
|
||||||
|
|
||||||
|
trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1]
|
||||||
|
self.trajectory_list = trajectory_temp[last_done:]
|
||||||
|
return trajectory_list
|
||||||
|
|
||||||
|
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
|
||||||
|
with torch.no_grad():
|
||||||
|
buf_len = buffer[0].shape[0]
|
||||||
|
buf_state, buf_action, buf_noise, buf_reward, buf_mask, buf_cost = [ten.to(self.device) for ten in buffer]
|
||||||
|
|
||||||
|
buf_value = torch.cat([self.cri_target(buf_state[i:i + 4096]) for i in range(0, buf_len, 4096)], dim=0)
|
||||||
|
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
|
||||||
|
|
||||||
|
buf_r_sum, buf_advantage = self.get_reward_sum(buf_len, buf_reward, buf_mask, buf_value)
|
||||||
|
buf_advantage = (buf_advantage - buf_advantage.mean()) / (buf_advantage.std() + 1e-5)
|
||||||
|
|
||||||
|
cost_sum = buf_cost.sum().item()
|
||||||
|
if cost_sum > self.constraint_value:
|
||||||
|
self.lambda_cost += 0.01 * (cost_sum - self.constraint_value)
|
||||||
|
else:
|
||||||
|
self.lambda_cost -= 0.01 * (self.constraint_value - cost_sum)
|
||||||
|
self.lambda_cost = max(self.lambda_cost, 0)
|
||||||
|
|
||||||
|
obj_critic = obj_actor = None
|
||||||
|
for _ in range(int(buf_len / batch_size * repeat_times)):
|
||||||
|
indices = torch.randint(buf_len, size=(batch_size,), requires_grad=False, device=self.device)
|
||||||
|
state = buf_state[indices]
|
||||||
|
action = buf_action[indices]
|
||||||
|
r_sum = buf_r_sum[indices]
|
||||||
|
logprob = buf_logprob[indices]
|
||||||
|
advantage = buf_advantage[indices]
|
||||||
|
|
||||||
|
new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action)
|
||||||
|
ratio = (new_logprob - logprob.detach()).exp()
|
||||||
|
surrogate1 = advantage * ratio
|
||||||
|
surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
|
||||||
|
obj_surrogate = -torch.min(surrogate1, surrogate2).mean()
|
||||||
|
obj_actor = obj_surrogate + obj_entropy * self.lambda_entropy - self.lambda_cost * buf_cost[indices].mean()
|
||||||
|
self.optim_update(self.act_optim, obj_actor)
|
||||||
|
|
||||||
|
value = self.cri(state).squeeze(1)
|
||||||
|
obj_critic = self.criterion(value, r_sum)
|
||||||
|
self.optim_update(self.cri_optim, obj_critic)
|
||||||
|
if self.cri_target is not self.cri:
|
||||||
|
self.soft_update(self.cri_target, self.cri, soft_update_tau)
|
||||||
|
|
||||||
|
a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1))
|
||||||
|
return obj_critic.item(), obj_actor.item(), a_std_log.mean().item()
|
||||||
|
|
||||||
|
def get_reward_sum_raw(self, buf_len, buf_reward, buf_mask, buf_value) -> (torch.Tensor, torch.Tensor):
|
||||||
|
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device)
|
||||||
|
pre_r_sum = 0
|
||||||
|
for i in range(buf_len - 1, -1, -1):
|
||||||
|
buf_r_sum[i] = buf_reward[i] + buf_mask[i] * pre_r_sum
|
||||||
|
pre_r_sum = buf_r_sum[i]
|
||||||
|
buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
|
||||||
|
return buf_r_sum, buf_advantage
|
||||||
|
|
||||||
|
def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value) -> (torch.Tensor, torch.Tensor):
|
||||||
|
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device)
|
||||||
|
buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device)
|
||||||
|
pre_r_sum = 0
|
||||||
|
pre_advantage = 0
|
||||||
|
for i in range(buf_len - 1, -1, -1):
|
||||||
|
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
|
||||||
|
pre_r_sum = buf_r_sum[i]
|
||||||
|
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i])
|
||||||
|
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
|
||||||
|
return buf_r_sum, buf_advantage
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def optim_update(optimizer, objective):
|
||||||
|
optimizer.zero_grad()
|
||||||
|
objective.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def soft_update(target_net, current_net, tau):
|
||||||
|
for tar, cur in zip(target_net.parameters(), current_net.parameters()):
|
||||||
|
tar.data.copy_(cur.data * tau + tar.data * (1.0 - tau))
|
||||||
|
|
||||||
|
|
||||||
|
class Arguments:
|
||||||
|
def __init__(self, agent=None, env=None):
|
||||||
|
self.agent = agent # Deep Reinforcement Learning algorithm
|
||||||
|
self.env = env # the environment for training
|
||||||
|
self.cwd = None # current work directory. None means set automatically
|
||||||
|
self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
|
||||||
|
self.visible_gpu = '0' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
|
||||||
|
# self.worker_num = 2 # rollout workers number pre GPU (adjust it to get high GPU usage)
|
||||||
|
self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
|
||||||
|
|
||||||
|
'''Arguments for training'''
|
||||||
|
self.num_episode = 1000 # to control the train episodes for PPO
|
||||||
|
self.gamma = 0.995 # discount factor of future rewards
|
||||||
|
self.learning_rate = 2 ** -14 # 2e-4
|
||||||
|
self.soft_update_tau = 2 ** -8 # 2 ** -8 ~= 5e-3
|
||||||
|
|
||||||
|
self.net_dim = 256 # the network width
|
||||||
|
self.batch_size = 4096 # num of transitions sampled from replay buffer.
|
||||||
|
self.repeat_times = 2 ** 3 # collect target_step, then update network
|
||||||
|
self.target_step = 4096 # repeatedly update network to keep critic's loss small
|
||||||
|
self.max_memo = self.target_step # capacity of replay buffer
|
||||||
|
self.if_per_or_gae = False # GAE for on-policy sparse reward: Generalized Advantage Estimation.
|
||||||
|
|
||||||
|
'''Arguments for evaluate'''
|
||||||
|
self.random_seed = 0 # initialize random seed in self.init_before_training()
|
||||||
|
# self.random_seed_list = [1234, 2234, 3234, 4234, 5234]
|
||||||
|
self.random_seed_list = [1234]
|
||||||
|
self.train = True
|
||||||
|
self.save_network = True
|
||||||
|
self.test_network = True
|
||||||
|
self.save_test_data = True
|
||||||
|
self.compare_with_gurobi = True
|
||||||
|
self.plot_on = True
|
||||||
|
|
||||||
|
def init_before_training(self, if_main):
|
||||||
|
if self.cwd is None:
|
||||||
|
agent_name = self.agent.__class__.__name__
|
||||||
|
self.cwd = f'./{agent_name}'
|
||||||
|
|
||||||
|
if if_main:
|
||||||
|
import shutil # remove history according to bool(if_remove)
|
||||||
|
if self.if_remove is None:
|
||||||
|
self.if_remove = bool(input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y')
|
||||||
|
elif self.if_remove:
|
||||||
|
shutil.rmtree(self.cwd, ignore_errors=True)
|
||||||
|
print(f"| Remove cwd: {self.cwd}")
|
||||||
|
os.makedirs(self.cwd, exist_ok=True)
|
||||||
|
|
||||||
|
np.random.seed(self.random_seed)
|
||||||
|
torch.manual_seed(self.random_seed)
|
||||||
|
torch.set_num_threads(self.num_threads)
|
||||||
|
torch.set_default_dtype(torch.float32)
|
||||||
|
|
||||||
|
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
|
||||||
|
|
||||||
|
|
||||||
|
def update_buffer(_trajectory):
|
||||||
|
_trajectory = list(map(list, zip(*_trajectory))) # 2D-list transpose, here cut the trajectory into 5 parts
|
||||||
|
ten_state = torch.as_tensor(_trajectory[0]) # tensor state here
|
||||||
|
ten_reward = torch.as_tensor(_trajectory[1], dtype=torch.float32)
|
||||||
|
# _trajectory[2] = done, replace done by mask, save memory
|
||||||
|
ten_mask = (1.0 - torch.as_tensor(_trajectory[2], dtype=torch.float32)) * gamma
|
||||||
|
ten_action = torch.as_tensor(_trajectory[3])
|
||||||
|
ten_noise = torch.as_tensor(_trajectory[4], dtype=torch.float32)
|
||||||
|
|
||||||
|
buffer[:] = (ten_state, ten_action, ten_noise, ten_reward, ten_mask) # list store tensors
|
||||||
|
|
||||||
|
_steps = ten_reward.shape[0] # how many steps are collected in all trajectories
|
||||||
|
_r_exp = ten_reward.mean() # the mean reward
|
||||||
|
return _steps, _r_exp
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = Arguments()
|
||||||
|
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
|
||||||
|
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
|
||||||
|
args.visible_gpu = '0'
|
||||||
|
for seed in args.random_seed_list:
|
||||||
|
args.random_seed = seed
|
||||||
|
args.agent = AgentPrimalDualPPO()
|
||||||
|
|
||||||
|
agent_name = f'{args.agent.__class__.__name__}'
|
||||||
|
args.agent.cri_target = True
|
||||||
|
args.env = ESSEnv()
|
||||||
|
args.init_before_training(if_main=True)
|
||||||
|
agent = args.agent
|
||||||
|
env = args.env
|
||||||
|
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
||||||
|
args.if_per_or_gae, layer_norm=True)
|
||||||
|
|
||||||
|
cwd = args.cwd
|
||||||
|
gamma = args.gamma
|
||||||
|
batch_size = args.batch_size # how much data should be used to update net
|
||||||
|
target_step = args.target_step # how many steps of one episode should stop
|
||||||
|
repeat_times = args.repeat_times # how many times should update for one batch size data
|
||||||
|
soft_update_tau = args.soft_update_tau
|
||||||
|
agent.state = env.reset()
|
||||||
|
buffer = list()
|
||||||
|
num_episode = args.num_episode
|
||||||
|
|
||||||
|
if args.train:
|
||||||
|
for i_episode in range(num_episode):
|
||||||
|
with torch.no_grad():
|
||||||
|
trajectory_list = []
|
||||||
|
for _ in range(target_step):
|
||||||
|
current_obs = agent.state
|
||||||
|
action, noise = agent.select_action(current_obs)
|
||||||
|
next_obs, reward, done, info, cost = env.step(action)
|
||||||
|
trajectory_list.append((current_obs, reward, done, action, noise, cost))
|
||||||
|
agent.state = next_obs
|
||||||
|
if done:
|
||||||
|
break
|
||||||
|
steps, r_exp = update_buffer(trajectory_list)
|
||||||
|
critic_loss, actor_loss, entropy_loss = agent.update_net(buffer, batch_size, repeat_times, soft_update_tau)
|
||||||
|
loss_record['critic_loss'].append(critic_loss)
|
||||||
|
loss_record['actor_loss'].append(actor_loss)
|
||||||
|
loss_record['entropy_loss'].append(entropy_loss)
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
|
||||||
|
reward_record['mean_episode_reward'].append(episode_reward)
|
||||||
|
reward_record['unbalance'].append(episode_unbalance)
|
||||||
|
print(f'current episode is {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}')
|
||||||
|
|
||||||
|
act_save_path = f'{args.cwd}/actor.pth'
|
||||||
|
loss_record_path = f'{args.cwd}/loss_data.pkl'
|
||||||
|
reward_record_path = f'{args.cwd}/reward_data.pkl'
|
||||||
|
|
||||||
|
with open(loss_record_path, 'wb') as tf:
|
||||||
|
pickle.dump(loss_record, tf)
|
||||||
|
with open(reward_record_path, 'wb') as tf:
|
||||||
|
pickle.dump(reward_record, tf)
|
||||||
|
|
||||||
|
if args.save_network:
|
||||||
|
torch.save(agent.act.state_dict(), act_save_path)
|
||||||
|
print('actor parameters have been saved')
|
||||||
|
|
||||||
|
if args.test_network:
|
||||||
|
args.cwd = agent_name
|
||||||
|
agent.act.load_stateDict(torch.load(act_save_path))
|
||||||
|
print('parameters have been reloaded and test')
|
||||||
|
record = test_one_episode(env, agent.act, agent.device)
|
||||||
|
eval_data = pd.DataFrame(record['system_info'])
|
||||||
|
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2',
|
||||||
|
'gen3', 'temperature', 'irradiance', 'unbalance', 'operation_cost']
|
||||||
|
if args.save_test_data:
|
||||||
|
test_data_save_path = f'{args.cwd}/test_data.pkl'
|
||||||
|
with open(test_data_save_path, 'wb') as tf:
|
||||||
|
pickle.dump(record, tf)
|
||||||
|
|
||||||
|
'''compare with gurobi data and results'''
|
||||||
|
if args.compare_with_gurobi:
|
||||||
|
month = record['init_info'][0][0]
|
||||||
|
day = record['init_info'][0][1]
|
||||||
|
initial_soc = record['init_info'][0][3]
|
||||||
|
base_result = optimization_base_result(env, month, day, initial_soc)
|
||||||
|
if args.plot_on:
|
||||||
|
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
|
||||||
|
|
||||||
|
plot_args = PlotArgs()
|
||||||
|
plot_args.feature_change = 'primal_dual'
|
||||||
|
args.cwd = agent_name
|
||||||
|
plot_dir = make_dir(args.cwd, plot_args.feature_change)
|
||||||
|
plot_optimization_result(base_result, plot_dir)
|
||||||
|
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
|
||||||
|
'''compare the different cost get from gurobi and PPO'''
|
||||||
|
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
|
||||||
|
print('operation_cost_sum:', sum(eval_data['operation_cost']))
|
||||||
|
print('step_cost_sum:', sum(base_result['step_cost']))
|
||||||
|
print('ration:', ration)
|
10
SAC.py
10
SAC.py
|
@ -55,11 +55,11 @@ if __name__ == '__main__':
|
||||||
'''here record real unbalance'''
|
'''here record real unbalance'''
|
||||||
|
|
||||||
##
|
##
|
||||||
# args.train=False
|
args.train = False
|
||||||
# args.save_network=False
|
args.save_network = False
|
||||||
# args.test_network=False
|
# args.test_network = False
|
||||||
# args.save_test_data=False
|
# args.save_test_data = False
|
||||||
# args.compare_with_gurobi=False
|
# args.compare_with_gurobi = False
|
||||||
#
|
#
|
||||||
if args.train:
|
if args.train:
|
||||||
collect_data = True
|
collect_data = True
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
class Constant:
|
class Constant:
|
||||||
MONTHS_LEN = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
|
MONTHS_LEN = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
|
||||||
MAX_STEP_HOURS = 24 * 30
|
|
||||||
|
|
||||||
|
|
||||||
class DataManager:
|
class DataManager:
|
||||||
|
@ -10,6 +9,7 @@ class DataManager:
|
||||||
self.Temperature = []
|
self.Temperature = []
|
||||||
self.Irradiance = []
|
self.Irradiance = []
|
||||||
self.Wind = []
|
self.Wind = []
|
||||||
|
self.LLM = []
|
||||||
|
|
||||||
def add_price_element(self, element): self.Prices.append(element)
|
def add_price_element(self, element): self.Prices.append(element)
|
||||||
|
|
||||||
|
@ -21,6 +21,8 @@ class DataManager:
|
||||||
|
|
||||||
def add_wind_element(self, element): self.Wind.append(element)
|
def add_wind_element(self, element): self.Wind.append(element)
|
||||||
|
|
||||||
|
def add_llm_element(self, element): self.LLM.append(element)
|
||||||
|
|
||||||
# get current time data based on given month day, and day_time
|
# get current time data based on given month day, and day_time
|
||||||
def get_price_data(self, month, day, day_time):
|
def get_price_data(self, month, day, day_time):
|
||||||
return self.Prices[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
|
return self.Prices[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
|
||||||
|
@ -37,6 +39,9 @@ class DataManager:
|
||||||
def get_wind_data(self, month, day, day_time):
|
def get_wind_data(self, month, day, day_time):
|
||||||
return self.Wind[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
|
return self.Wind[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
|
||||||
|
|
||||||
|
def get_llm_data(self, month, day, day_time):
|
||||||
|
return self.LLM[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
|
||||||
|
|
||||||
# get series data for one episode
|
# get series data for one episode
|
||||||
def get_series_price_data(self, month, day):
|
def get_series_price_data(self, month, day):
|
||||||
return self.Prices[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
|
return self.Prices[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
|
||||||
|
@ -57,3 +62,7 @@ class DataManager:
|
||||||
def get_series_wind_data(self, month, day):
|
def get_series_wind_data(self, month, day):
|
||||||
return self.Wind[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
|
return self.Wind[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
|
||||||
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
|
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
|
||||||
|
|
||||||
|
# def get_series_llm_data(self, month, day):
|
||||||
|
# return self.LLM[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
|
||||||
|
# (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
|
||||||
|
|
|
@ -23,13 +23,13 @@ class ESSEnv(gym.Env):
|
||||||
self.day = None
|
self.day = None
|
||||||
self.TRAIN = True
|
self.TRAIN = True
|
||||||
self.current_time = None
|
self.current_time = None
|
||||||
self.episode_length = kwargs.get('episode_length', 24)
|
self.episode_length = 24
|
||||||
|
self.penalty_coefficient = 50 # 约束惩罚系数
|
||||||
|
self.sell_coefficient = 0.5 # 售出利润系数
|
||||||
self.battery_parameters = kwargs.get('battery_parameters', battery_parameters)
|
self.battery_parameters = kwargs.get('battery_parameters', battery_parameters)
|
||||||
self.dg_parameters = kwargs.get('dg_parameters', dg_parameters)
|
self.dg_parameters = kwargs.get('dg_parameters', dg_parameters)
|
||||||
self.solar_parameters = kwargs.get('solar_parameters', solar_parameters)
|
self.solar_parameters = kwargs.get('solar_parameters', solar_parameters)
|
||||||
self.wind_parameters = kwargs.get('wind_parameters', wind_parameters)
|
self.wind_parameters = kwargs.get('wind_parameters', wind_parameters)
|
||||||
self.penalty_coefficient = 50 # 约束惩罚系数
|
|
||||||
self.sell_coefficient = 0.5 # 售出利润系数
|
|
||||||
|
|
||||||
self.grid = Grid()
|
self.grid = Grid()
|
||||||
self.battery = Battery(self.battery_parameters)
|
self.battery = Battery(self.battery_parameters)
|
||||||
|
@ -65,19 +65,19 @@ class ESSEnv(gym.Env):
|
||||||
time_step = self.current_time
|
time_step = self.current_time
|
||||||
|
|
||||||
price = self.data_manager.get_price_data(self.month, self.day, self.current_time)
|
price = self.data_manager.get_price_data(self.month, self.day, self.current_time)
|
||||||
house_load = self.data_manager.get_load_cons_data(self.month, self.day, self.current_time)
|
houseload = self.data_manager.get_load_cons_data(self.month, self.day, self.current_time)
|
||||||
temperature = self.data_manager.get_temperature_data(self.month, self.day, self.current_time)
|
temperature = self.data_manager.get_temperature_data(self.month, self.day, self.current_time)
|
||||||
irradiance = self.data_manager.get_irradiance_data(self.month, self.day, self.current_time)
|
irradiance = self.data_manager.get_irradiance_data(self.month, self.day, self.current_time)
|
||||||
wind_speed = self.data_manager.get_wind_data(self.month, self.day, self.current_time)
|
windspeed = self.data_manager.get_wind_data(self.month, self.day, self.current_time)
|
||||||
|
|
||||||
pv_generation = self.solar.step(temperature, irradiance)
|
pv_generation = self.solar.step(temperature, irradiance)
|
||||||
wd_generation = self.wind.step(wind_speed)
|
wd_generation = self.wind.step(windspeed)
|
||||||
generation = pv_generation + wd_generation
|
generation = pv_generation + wd_generation
|
||||||
net_load = house_load - generation
|
netload = houseload - generation
|
||||||
|
|
||||||
obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(soc), np.float32(net_load),
|
obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(soc), np.float32(netload),
|
||||||
np.float32(dg1_output), np.float32(dg2_output), np.float32(dg3_output),
|
np.float32(dg1_output), np.float32(dg2_output), np.float32(dg3_output),
|
||||||
np.float32(temperature), np.float32(irradiance), np.float32(wind_speed)), axis=None)
|
np.float32(temperature), np.float32(irradiance), np.float32(windspeed)), axis=None)
|
||||||
return obs
|
return obs
|
||||||
|
|
||||||
def step(self, action): # state transition: current_obs->take_action->get_reward->get_finish->next_obs
|
def step(self, action): # state transition: current_obs->take_action->get_reward->get_finish->next_obs
|
||||||
|
@ -93,19 +93,17 @@ class ESSEnv(gym.Env):
|
||||||
self.solar.step(action[4], temperature, irradiance)
|
self.solar.step(action[4], temperature, irradiance)
|
||||||
self.wind.step(wind_speed)
|
self.wind.step(wind_speed)
|
||||||
self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
|
self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
|
||||||
-self.battery.energy_change, self.solar.current_power, self.wind.current_power))
|
-self.battery.energy_change))
|
||||||
actual_production = sum(self.current_output)
|
actual_production = sum(self.current_output)
|
||||||
price = current_obs[1]
|
price = current_obs[1]
|
||||||
netload = current_obs[3]
|
netload = current_obs[3]
|
||||||
unbalance = actual_production - netload
|
unbalance = actual_production - netload
|
||||||
|
|
||||||
reward = 0
|
reward = 0.0
|
||||||
excess_penalty = 0 # 过多
|
excess_penalty = 0 # 过多
|
||||||
deficient_penalty = 0 # 过少
|
deficient_penalty = 0 # 过少
|
||||||
sell_benefit = 0
|
sell_benefit, buy_cost = 0, 0
|
||||||
buy_cost = 0
|
self.excess, self.shedding = 0, 0
|
||||||
self.excess = 0
|
|
||||||
self.shedding = 0
|
|
||||||
if unbalance >= 0: # 现在过剩
|
if unbalance >= 0: # 现在过剩
|
||||||
if unbalance <= self.grid.exchange_ability:
|
if unbalance <= self.grid.exchange_ability:
|
||||||
# sell money to grid is little [0.029,0.1]
|
# sell money to grid is little [0.029,0.1]
|
||||||
|
@ -122,7 +120,7 @@ class ESSEnv(gym.Env):
|
||||||
buy_cost = self.grid.get_cost(price, self.grid.exchange_ability)
|
buy_cost = self.grid.get_cost(price, self.grid.exchange_ability)
|
||||||
self.shedding = abs(unbalance) - self.grid.exchange_ability
|
self.shedding = abs(unbalance) - self.grid.exchange_ability
|
||||||
deficient_penalty = self.shedding * self.penalty_coefficient
|
deficient_penalty = self.shedding * self.penalty_coefficient
|
||||||
battery_cost = self.battery.get_cost(self.battery.energy_change)
|
battery_cost = self.battery.get_cost(self.battery.energy_change, self.battery.current_capacity)
|
||||||
dg1_cost = self.dg1.get_cost(self.dg1.current_output)
|
dg1_cost = self.dg1.get_cost(self.dg1.current_output)
|
||||||
dg2_cost = self.dg2.get_cost(self.dg2.current_output)
|
dg2_cost = self.dg2.get_cost(self.dg2.current_output)
|
||||||
dg3_cost = self.dg3.get_cost(self.dg3.current_output)
|
dg3_cost = self.dg3.get_cost(self.dg3.current_output)
|
||||||
|
@ -164,6 +162,7 @@ class ESSEnv(gym.Env):
|
||||||
wind = wind_df['wind_speed'].to_numpy(dtype=float)
|
wind = wind_df['wind_speed'].to_numpy(dtype=float)
|
||||||
|
|
||||||
'''重新设计价格和发电量以及需求的大小'''
|
'''重新设计价格和发电量以及需求的大小'''
|
||||||
|
|
||||||
def process_elements(elements, transform_function, add_function):
|
def process_elements(elements, transform_function, add_function):
|
||||||
for element in elements:
|
for element in elements:
|
||||||
transformed_element = transform_function(element)
|
transformed_element = transform_function(element)
|
||||||
|
|
|
@ -0,0 +1,177 @@
|
||||||
|
import gym
|
||||||
|
import json
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from module import *
|
||||||
|
from parameters import *
|
||||||
|
from data_manager import *
|
||||||
|
|
||||||
|
|
||||||
|
class ESSEnv(gym.Env):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(ESSEnv, self).__init__()
|
||||||
|
self.excess = None
|
||||||
|
self.shedding = None
|
||||||
|
self.unbalance = None
|
||||||
|
self.real_unbalance = None
|
||||||
|
self.operation_cost = None
|
||||||
|
self.current_output = None
|
||||||
|
self.final_step_outputs = None
|
||||||
|
self.data_manager = DataManager()
|
||||||
|
self._load_year_data()
|
||||||
|
self.month = None
|
||||||
|
self.day = None
|
||||||
|
self.TRAIN = True
|
||||||
|
self.current_time = None
|
||||||
|
self.episode_length = kwargs.get('episode_length', 24)
|
||||||
|
self.battery_parameters = kwargs.get('battery_parameters', battery_parameters)
|
||||||
|
self.dg_parameters = kwargs.get('dg_parameters', dg_parameters)
|
||||||
|
self.solar_parameters = kwargs.get('solar_parameters', solar_parameters)
|
||||||
|
self.wind_parameters = kwargs.get('wind_parameters', wind_parameters)
|
||||||
|
self.penalty_coefficient = 50 # 约束惩罚系数
|
||||||
|
self.sell_coefficient = 0.5 # 售出利润系数
|
||||||
|
|
||||||
|
self.grid = Grid()
|
||||||
|
self.battery = Battery(self.battery_parameters)
|
||||||
|
self.dg1 = DG(self.dg_parameters['gen_1'])
|
||||||
|
self.dg2 = DG(self.dg_parameters['gen_2'])
|
||||||
|
self.dg3 = DG(self.dg_parameters['gen_3'])
|
||||||
|
self.solar = Solar(self.solar_parameters)
|
||||||
|
self.wind = Wind(self.wind_parameters)
|
||||||
|
|
||||||
|
self.action_space = gym.spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float32) # 已增加调节电压动作
|
||||||
|
self.state_space = gym.spaces.Box(low=0, high=1, shape=(15,), dtype=np.float32) # 为llm调整shape
|
||||||
|
|
||||||
|
def reset(self, *args):
|
||||||
|
self.month = np.random.randint(1, 13) # choose 12 month
|
||||||
|
if self.TRAIN:
|
||||||
|
self.day = np.random.randint(1, 20)
|
||||||
|
else:
|
||||||
|
self.day = np.random.randint(20, Constant.MONTHS_LEN[self.month] - 1)
|
||||||
|
self.current_time = 0
|
||||||
|
self.battery.reset()
|
||||||
|
self.dg1.reset()
|
||||||
|
self.dg2.reset()
|
||||||
|
self.dg3.reset()
|
||||||
|
self.solar.reset()
|
||||||
|
self.wind.reset()
|
||||||
|
return self._build_state()
|
||||||
|
|
||||||
|
def _build_state(self):
|
||||||
|
soc = self.battery.SOC()
|
||||||
|
dg1_output = self.dg1.current_output
|
||||||
|
dg2_output = self.dg2.current_output
|
||||||
|
dg3_output = self.dg3.current_output
|
||||||
|
time_step = self.current_time
|
||||||
|
|
||||||
|
price = self.data_manager.get_price_data(self.month, self.day, self.current_time)
|
||||||
|
houseload = self.data_manager.get_load_cons_data(self.month, self.day, self.current_time)
|
||||||
|
temperature = self.data_manager.get_temperature_data(self.month, self.day, self.current_time)
|
||||||
|
irradiance = self.data_manager.get_irradiance_data(self.month, self.day, self.current_time)
|
||||||
|
wind_speed = self.data_manager.get_wind_data(self.month, self.day, self.current_time)
|
||||||
|
llm_data = self.data_manager.get_llm_data(self.month, self.day, self.current_time)
|
||||||
|
|
||||||
|
pv_generation = self.solar.step(temperature, irradiance)
|
||||||
|
wd_generation = self.wind.step(wind_speed)
|
||||||
|
generation = pv_generation + wd_generation
|
||||||
|
netload = houseload - generation
|
||||||
|
|
||||||
|
obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(soc), np.float32(netload),
|
||||||
|
np.float32(dg1_output), np.float32(dg2_output), np.float32(dg3_output),
|
||||||
|
np.float32(temperature), np.float32(irradiance), np.float32(wind_speed),
|
||||||
|
np.float32(llm_data)), axis=None)
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def step(self, action): # state transition: current_obs->take_action->get_reward->get_finish->next_obs
|
||||||
|
# 在每个组件中添加动作
|
||||||
|
current_obs = self._build_state()
|
||||||
|
temperature = current_obs[7]
|
||||||
|
irradiance = current_obs[8]
|
||||||
|
wind_speed = current_obs[9]
|
||||||
|
self.battery.step(action[0]) # 执行状态转换,电池当前容量也改变
|
||||||
|
self.dg1.step(action[1])
|
||||||
|
self.dg2.step(action[2])
|
||||||
|
self.dg3.step(action[3])
|
||||||
|
self.solar.step(action[4], temperature, irradiance)
|
||||||
|
self.wind.step(wind_speed)
|
||||||
|
self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
|
||||||
|
-self.battery.energy_change))
|
||||||
|
actual_production = sum(self.current_output)
|
||||||
|
price = current_obs[1]
|
||||||
|
netload = current_obs[3]
|
||||||
|
unbalance = actual_production - netload
|
||||||
|
|
||||||
|
reward = 0
|
||||||
|
excess_penalty = 0 # 过多
|
||||||
|
deficient_penalty = 0 # 过少
|
||||||
|
sell_benefit = 0
|
||||||
|
buy_cost = 0
|
||||||
|
self.excess = 0
|
||||||
|
self.shedding = 0
|
||||||
|
if unbalance >= 0: # 现在过剩
|
||||||
|
if unbalance <= self.grid.exchange_ability:
|
||||||
|
# sell money to grid is little [0.029,0.1]
|
||||||
|
sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient
|
||||||
|
else:
|
||||||
|
sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient
|
||||||
|
# real unbalance:电网也无法满足
|
||||||
|
self.excess = unbalance - self.grid.exchange_ability
|
||||||
|
excess_penalty = self.excess * self.penalty_coefficient
|
||||||
|
else: # unbalance <0, 采用缺少惩罚
|
||||||
|
if abs(unbalance) <= self.grid.exchange_ability:
|
||||||
|
buy_cost = self.grid.get_cost(price, abs(unbalance))
|
||||||
|
else:
|
||||||
|
buy_cost = self.grid.get_cost(price, self.grid.exchange_ability)
|
||||||
|
self.shedding = abs(unbalance) - self.grid.exchange_ability
|
||||||
|
deficient_penalty = self.shedding * self.penalty_coefficient
|
||||||
|
battery_cost = self.battery.get_cost(self.battery.energy_change, self.battery.current_capacity)
|
||||||
|
dg1_cost = self.dg1.get_cost(self.dg1.current_output)
|
||||||
|
dg2_cost = self.dg2.get_cost(self.dg2.current_output)
|
||||||
|
dg3_cost = self.dg3.get_cost(self.dg3.current_output)
|
||||||
|
solar_cost = self.solar.get_cost(self.solar.current_power)
|
||||||
|
wind_cost = self.wind.gen_cost(self.wind.current_power)
|
||||||
|
|
||||||
|
self.operation_cost = (battery_cost + dg1_cost + dg2_cost + dg3_cost + solar_cost + wind_cost + excess_penalty +
|
||||||
|
deficient_penalty - sell_benefit + buy_cost)
|
||||||
|
reward -= self.operation_cost / 1e3
|
||||||
|
self.unbalance = unbalance
|
||||||
|
self.real_unbalance = self.shedding + self.excess
|
||||||
|
final_step_outputs = [self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
|
||||||
|
self.battery.current_capacity, self.solar.current_power, self.wind.current_power]
|
||||||
|
self.current_time += 1
|
||||||
|
finish = (self.current_time == self.episode_length)
|
||||||
|
if finish:
|
||||||
|
self.final_step_outputs = final_step_outputs
|
||||||
|
self.current_time = 0
|
||||||
|
next_obs = self.reset()
|
||||||
|
else:
|
||||||
|
next_obs = self._build_state()
|
||||||
|
return current_obs, next_obs, float(reward), finish
|
||||||
|
|
||||||
|
def _load_year_data(self):
|
||||||
|
price_df = pd.read_csv('data/prices.csv', sep=',')
|
||||||
|
load_df = pd.read_csv('data/houseload.csv', sep=',')
|
||||||
|
irradiance_df = pd.read_csv('data/irradiance.csv', sep=',')
|
||||||
|
temperature_df = pd.read_csv('data/temper.csv', sep=',')
|
||||||
|
wind_df = pd.read_csv('data/wind.csv', sep=',')
|
||||||
|
llm_data = json.load(open('data/llm_action.json', 'r'))
|
||||||
|
|
||||||
|
price = price_df['price'].to_numpy(dtype=float)
|
||||||
|
load = load_df['houseload'].to_numpy(dtype=float)
|
||||||
|
irradiance = irradiance_df['irradiance'].to_numpy(dtype=float)
|
||||||
|
temperature = temperature_df['t2m'].to_numpy(dtype=float)
|
||||||
|
wind = wind_df['wind_speed'].to_numpy(dtype=float)
|
||||||
|
|
||||||
|
'''重新设计价格和发电量及需求的大小'''
|
||||||
|
def process_elements(elements, transform_function, add_function):
|
||||||
|
for element in elements:
|
||||||
|
transformed_element = transform_function(element)
|
||||||
|
add_function(transformed_element)
|
||||||
|
|
||||||
|
process_elements(price, lambda x: max(x / 10, 0.5), self.data_manager.add_price_element)
|
||||||
|
process_elements(load, lambda x: x * 3, self.data_manager.add_load_element)
|
||||||
|
process_elements(irradiance, lambda x: x, self.data_manager.add_irradiance_element)
|
||||||
|
process_elements(temperature, lambda x: x - 273.15, self.data_manager.add_temperature_element)
|
||||||
|
process_elements(wind, lambda x: x, self.data_manager.add_wind_element)
|
||||||
|
process_elements(llm_data, lambda x: x, self.data_manager.add_llm_element)
|
|
@ -0,0 +1,184 @@
|
||||||
|
import gym
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from module import *
|
||||||
|
from parameters import *
|
||||||
|
from data_manager import *
|
||||||
|
|
||||||
|
|
||||||
|
class ESSEnv(gym.Env):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(ESSEnv, self).__init__()
|
||||||
|
self.excess = None
|
||||||
|
self.shedding = None
|
||||||
|
self.unbalance = None
|
||||||
|
self.real_unbalance = None
|
||||||
|
self.operation_cost = None
|
||||||
|
self.current_output = None
|
||||||
|
self.final_step_outputs = None
|
||||||
|
self.data_manager = DataManager()
|
||||||
|
self._load_year_data()
|
||||||
|
self.month = None
|
||||||
|
self.day = None
|
||||||
|
self.TRAIN = True
|
||||||
|
self.current_time = None
|
||||||
|
self.episode_length = kwargs.get('episode_length', 24)
|
||||||
|
self.battery_parameters = kwargs.get('battery_parameters', battery_parameters)
|
||||||
|
self.dg_parameters = kwargs.get('dg_parameters', dg_parameters)
|
||||||
|
self.solar_parameters = kwargs.get('solar_parameters', solar_parameters)
|
||||||
|
self.wind_parameters = kwargs.get('wind_parameters', wind_parameters)
|
||||||
|
self.penalty_coefficient = 50 # 约束惩罚系数
|
||||||
|
self.sell_coefficient = 0.5 # 售出利润系数
|
||||||
|
|
||||||
|
self.grid = Grid()
|
||||||
|
self.battery = Battery(self.battery_parameters)
|
||||||
|
self.dg1 = DG(self.dg_parameters['gen_1'])
|
||||||
|
self.dg2 = DG(self.dg_parameters['gen_2'])
|
||||||
|
self.dg3 = DG(self.dg_parameters['gen_3'])
|
||||||
|
self.solar = Solar(self.solar_parameters)
|
||||||
|
self.wind = Wind(self.wind_parameters)
|
||||||
|
|
||||||
|
self.action_space = gym.spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float32) # 已增加调节电压动作
|
||||||
|
self.state_space = gym.spaces.Box(low=0, high=1, shape=(10,), dtype=np.float32)
|
||||||
|
|
||||||
|
def reset(self, *args):
|
||||||
|
self.month = np.random.randint(1, 13) # choose 12 month
|
||||||
|
if self.TRAIN:
|
||||||
|
self.day = np.random.randint(1, 20)
|
||||||
|
else:
|
||||||
|
self.day = np.random.randint(20, Constant.MONTHS_LEN[self.month] - 1)
|
||||||
|
self.current_time = 0
|
||||||
|
self.battery.reset()
|
||||||
|
self.dg1.reset()
|
||||||
|
self.dg2.reset()
|
||||||
|
self.dg3.reset()
|
||||||
|
self.solar.reset()
|
||||||
|
self.wind.reset()
|
||||||
|
return self._build_state()
|
||||||
|
|
||||||
|
def _build_state(self):
|
||||||
|
soc = self.battery.SOC()
|
||||||
|
dg1_output = self.dg1.current_output
|
||||||
|
dg2_output = self.dg2.current_output
|
||||||
|
dg3_output = self.dg3.current_output
|
||||||
|
time_step = self.current_time
|
||||||
|
|
||||||
|
price = self.data_manager.get_price_data(self.month, self.day, self.current_time)
|
||||||
|
houseload = self.data_manager.get_load_cons_data(self.month, self.day, self.current_time)
|
||||||
|
temperature = self.data_manager.get_temperature_data(self.month, self.day, self.current_time)
|
||||||
|
irradiance = self.data_manager.get_irradiance_data(self.month, self.day, self.current_time)
|
||||||
|
wind_speed = self.data_manager.get_wind_data(self.month, self.day, self.current_time)
|
||||||
|
|
||||||
|
obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(soc), np.float32(houseload),
|
||||||
|
np.float32(dg1_output), np.float32(dg2_output), np.float32(dg3_output),
|
||||||
|
np.float32(temperature), np.float32(irradiance), np.float32(wind_speed)), axis=None)
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def step(self, action): # state transition: current_obs->take_action->get_reward->get_finish->next_obs
|
||||||
|
# 在每个组件中添加动作
|
||||||
|
current_obs = self._build_state()
|
||||||
|
temperature = current_obs[7]
|
||||||
|
irradiance = current_obs[8]
|
||||||
|
wind_speed = current_obs[9]
|
||||||
|
self.battery.step(action[0]) # 执行状态转换,电池当前容量也改变
|
||||||
|
self.dg1.step(action[1])
|
||||||
|
self.dg2.step(action[2])
|
||||||
|
self.dg3.step(action[3])
|
||||||
|
self.solar.step(action[4], temperature, irradiance)
|
||||||
|
self.wind.step(wind_speed)
|
||||||
|
self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
|
||||||
|
-self.battery.energy_change, self.solar.current_power, self.wind.current_power))
|
||||||
|
actual_production = sum(self.current_output)
|
||||||
|
price = current_obs[1]
|
||||||
|
houseload = current_obs[3]
|
||||||
|
unbalance = actual_production - houseload
|
||||||
|
|
||||||
|
reward = 0
|
||||||
|
excess_penalty = 0 # 过多
|
||||||
|
deficient_penalty = 0 # 过少
|
||||||
|
sell_benefit = 0
|
||||||
|
buy_cost = 0
|
||||||
|
self.excess = 0
|
||||||
|
self.shedding = 0
|
||||||
|
if unbalance >= 0: # 现在过剩
|
||||||
|
if unbalance <= self.grid.exchange_ability:
|
||||||
|
# sell money to grid is little [0.029,0.1]
|
||||||
|
sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient
|
||||||
|
else:
|
||||||
|
sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient
|
||||||
|
# real unbalance:电网也无法满足
|
||||||
|
self.excess = unbalance - self.grid.exchange_ability
|
||||||
|
excess_penalty = self.excess * self.penalty_coefficient
|
||||||
|
else: # unbalance <0, 采用缺少惩罚
|
||||||
|
if abs(unbalance) <= self.grid.exchange_ability:
|
||||||
|
buy_cost = self.grid.get_cost(price, abs(unbalance))
|
||||||
|
else:
|
||||||
|
buy_cost = self.grid.get_cost(price, self.grid.exchange_ability)
|
||||||
|
self.shedding = abs(unbalance) - self.grid.exchange_ability
|
||||||
|
deficient_penalty = self.shedding * self.penalty_coefficient
|
||||||
|
battery_cost = self.battery.get_cost(self.battery.energy_change, self.battery.current_capacity)
|
||||||
|
dg1_cost = self.dg1.get_cost(self.dg1.current_output)
|
||||||
|
dg2_cost = self.dg2.get_cost(self.dg2.current_output)
|
||||||
|
dg3_cost = self.dg3.get_cost(self.dg3.current_output)
|
||||||
|
solar_cost = self.solar.get_cost(self.solar.current_power)
|
||||||
|
wind_cost = self.wind.gen_cost(self.wind.current_power)
|
||||||
|
|
||||||
|
self.operation_cost = (battery_cost + dg1_cost + dg2_cost + dg3_cost + solar_cost + wind_cost + excess_penalty +
|
||||||
|
deficient_penalty - sell_benefit + buy_cost)
|
||||||
|
reward -= self.operation_cost / 1e3
|
||||||
|
self.unbalance = unbalance
|
||||||
|
self.real_unbalance = self.shedding + self.excess
|
||||||
|
final_step_outputs = [self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
|
||||||
|
self.battery.current_capacity, self.solar.current_power, self.wind.current_power]
|
||||||
|
self.current_time += 1
|
||||||
|
finish = (self.current_time == self.episode_length)
|
||||||
|
if finish:
|
||||||
|
self.final_step_outputs = final_step_outputs
|
||||||
|
self.current_time = 0
|
||||||
|
next_obs = self.reset()
|
||||||
|
else:
|
||||||
|
next_obs = self._build_state()
|
||||||
|
return current_obs, next_obs, float(reward), finish
|
||||||
|
|
||||||
|
# def render(self, current_obs, next_obs, reward, finish):
|
||||||
|
# print('day={},hour={:2d}, state={}, next_state={}, reward={:.4f}, terminal={}\n'.
|
||||||
|
# format(self.day, self.current_time, current_obs, next_obs, reward, finish))
|
||||||
|
|
||||||
|
def _load_year_data(self):
|
||||||
|
price_df = pd.read_csv('data/prices.csv', sep=',')
|
||||||
|
load_df = pd.read_csv('data/houseload.csv', sep=',')
|
||||||
|
irradiance_df = pd.read_csv('data/irradiance.csv', sep=',')
|
||||||
|
temperature_df = pd.read_csv('data/temper.csv', sep=',')
|
||||||
|
wind_df = pd.read_csv('data/wind.csv', sep=',')
|
||||||
|
|
||||||
|
price = price_df['price'].to_numpy(dtype=float)
|
||||||
|
load = load_df['houseload'].to_numpy(dtype=float)
|
||||||
|
irradiance = irradiance_df['irradiance'].to_numpy(dtype=float)
|
||||||
|
temperature = temperature_df['t2m'].to_numpy(dtype=float)
|
||||||
|
wind = wind_df['wind_speed'].to_numpy(dtype=float)
|
||||||
|
|
||||||
|
'''重新设计价格和发电量以及需求的大小'''
|
||||||
|
def process_elements(elements, transform_function, add_function):
|
||||||
|
for element in elements:
|
||||||
|
transformed_element = transform_function(element)
|
||||||
|
add_function(transformed_element)
|
||||||
|
|
||||||
|
process_elements(price, lambda x: max(x / 10, 0.5), self.data_manager.add_price_element)
|
||||||
|
process_elements(load, lambda x: x * 3, self.data_manager.add_load_element)
|
||||||
|
process_elements(irradiance, lambda x: x, self.data_manager.add_irradiance_element)
|
||||||
|
process_elements(temperature, lambda x: x - 273.15, self.data_manager.add_temperature_element)
|
||||||
|
process_elements(wind, lambda x: x, self.data_manager.add_wind_element)
|
||||||
|
|
||||||
|
# if __name__ == '__main__':
|
||||||
|
# env = ESSEnv()
|
||||||
|
# env.TRAIN = False
|
||||||
|
# rewards = []
|
||||||
|
# env.reset()
|
||||||
|
# tem_action = [0.1, 0.1, 0.1, 0.1, 0.1]
|
||||||
|
# for _ in range(144):
|
||||||
|
# print(f'current month is {env.month}, current day is {env.day}, current time is {env.current_time}')
|
||||||
|
# current_obs, next_obs, reward, finish = env.step(tem_action)
|
||||||
|
# env.render(current_obs, next_obs, reward, finish)
|
||||||
|
# current_obs = next_obs
|
||||||
|
# rewards.append(reward)
|
37
module.py
37
module.py
|
@ -14,7 +14,6 @@ class DG:
|
||||||
self.power_output_min = parameters['power_output_min']
|
self.power_output_min = parameters['power_output_min']
|
||||||
self.ramping_up = parameters['ramping_up']
|
self.ramping_up = parameters['ramping_up']
|
||||||
self.ramping_down = parameters['ramping_down']
|
self.ramping_down = parameters['ramping_down']
|
||||||
self.last_step_output = None
|
|
||||||
|
|
||||||
def step(self, action_gen):
|
def step(self, action_gen):
|
||||||
output_change = action_gen * self.ramping_up # constrain the output_change with ramping up boundary
|
output_change = action_gen * self.ramping_up # constrain the output_change with ramping up boundary
|
||||||
|
@ -26,9 +25,6 @@ class DG:
|
||||||
self.current_output = output
|
self.current_output = output
|
||||||
|
|
||||||
def get_cost(self, output):
|
def get_cost(self, output):
|
||||||
if output <= 0:
|
|
||||||
cost = 0
|
|
||||||
else:
|
|
||||||
cost = (self.a_factor * pow(output, 2) + self.b_factor * output + self.c_factor)
|
cost = (self.a_factor * pow(output, 2) + self.b_factor * output + self.c_factor)
|
||||||
return cost
|
return cost
|
||||||
|
|
||||||
|
@ -43,25 +39,25 @@ class Battery:
|
||||||
self.current_capacity = None
|
self.current_capacity = None
|
||||||
self.energy_change = None
|
self.energy_change = None
|
||||||
self.capacity = parameters['capacity']
|
self.capacity = parameters['capacity']
|
||||||
|
self.min_soc = parameters['min_soc']
|
||||||
self.max_soc = parameters['max_soc']
|
self.max_soc = parameters['max_soc']
|
||||||
self.initial_capacity = parameters['initial_capacity']
|
self.initial_capacity = parameters['initial_capacity']
|
||||||
self.min_soc = parameters['min_soc']
|
self.degradation = parameters['degradation']
|
||||||
self.degradation = parameters['degradation'] # degradation cost 1.2
|
self.holding = parameters['holding']
|
||||||
self.max_charge = parameters['max_charge'] # max charge ability
|
self.max_charge = parameters['max_charge']
|
||||||
self.max_discharge = parameters['max_discharge']
|
# self.max_discharge = parameters['max_discharge']
|
||||||
self.efficiency = parameters['efficiency']
|
self.efficiency = parameters['efficiency']
|
||||||
|
|
||||||
def step(self, action_battery):
|
def step(self, action_battery):
|
||||||
energy = action_battery * self.max_charge
|
energy = action_battery * self.max_charge
|
||||||
updated_capacity = np.maximum(self.min_soc,
|
current_energy = self.current_capacity * self.capacity
|
||||||
np.minimum(self.max_soc,
|
updated_capacity = np.maximum(self.min_soc, np.minimum(self.max_soc, (current_energy + energy) / self.capacity))
|
||||||
(self.current_capacity * self.capacity + energy) / self.capacity))
|
|
||||||
# if charge, positive, if discharge, negative
|
# if charge, positive, if discharge, negative
|
||||||
self.energy_change = (updated_capacity - self.current_capacity) * self.capacity
|
self.energy_change = (updated_capacity - self.current_capacity) * self.capacity
|
||||||
self.current_capacity = updated_capacity # update capacity to current codition
|
self.current_capacity = updated_capacity # update capacity to current state
|
||||||
|
|
||||||
def get_cost(self, energy): # cost depends on the energy change
|
def get_cost(self, energy_change, energy_hold): # cost depends on the energy change
|
||||||
cost = energy ** 2 * self.degradation
|
cost = energy_change * self.degradation + energy_hold * self.holding
|
||||||
return cost
|
return cost
|
||||||
|
|
||||||
def SOC(self):
|
def SOC(self):
|
||||||
|
@ -91,12 +87,6 @@ class Solar:
|
||||||
V_oc = self.oc_voltage + self.temper_coefficient * (temperature - self.refer_temperature)
|
V_oc = self.oc_voltage + self.temper_coefficient * (temperature - self.refer_temperature)
|
||||||
|
|
||||||
current = I_sc - (V_oc / self.sh_resistance)
|
current = I_sc - (V_oc / self.sh_resistance)
|
||||||
# current = I_sc
|
|
||||||
# for _ in range(10): # 迭代次数
|
|
||||||
# new_current = I_sc - (V_oc + current * self.s_resistance) / self.sh_resistance
|
|
||||||
# if abs(new_current - current) < 1e-6: # 收敛条件
|
|
||||||
# break
|
|
||||||
# current = new_current
|
|
||||||
self.current_power = max((1 + action_voltage) * self.base_voltage * current, 0)
|
self.current_power = max((1 + action_voltage) * self.base_voltage * current, 0)
|
||||||
return self.current_power
|
return self.current_power
|
||||||
|
|
||||||
|
@ -123,12 +113,11 @@ class Wind:
|
||||||
self.opex_cofficient = parameters['opex_cofficient']
|
self.opex_cofficient = parameters['opex_cofficient']
|
||||||
|
|
||||||
def step(self, wind_speed):
|
def step(self, wind_speed):
|
||||||
|
constant = 0.5 * self.air_density * self.rotor_radius ** 2 * self.power_coefficient * self.generator_efficiency
|
||||||
if self.cutin_speed <= wind_speed < self.rated_speed:
|
if self.cutin_speed <= wind_speed < self.rated_speed:
|
||||||
self.current_power = (0.5 * self.air_density * self.rotor_radius ** 2 * wind_speed ** 3 *
|
self.current_power = constant * wind_speed ** 3 / 1e3
|
||||||
self.power_coefficient * self.generator_efficiency) / 1e3
|
|
||||||
elif self.rated_speed <= wind_speed < self.cutout_speed:
|
elif self.rated_speed <= wind_speed < self.cutout_speed:
|
||||||
self.current_power = (0.5 * self.air_density * self.rotor_radius ** 2 * self.rated_speed ** 3 *
|
self.current_power = constant * self.rated_speed ** 3 / 1e3
|
||||||
self.power_coefficient * self.generator_efficiency) / 1e3
|
|
||||||
else:
|
else:
|
||||||
self.current_power = 0
|
self.current_power = 0
|
||||||
return self.current_power
|
return self.current_power
|
||||||
|
|
|
@ -27,6 +27,7 @@ battery_parameters = {
|
||||||
'max_discharge': 100, # kw
|
'max_discharge': 100, # kw
|
||||||
'efficiency': 0.9,
|
'efficiency': 0.9,
|
||||||
'degradation': 0.01, # euro/kw
|
'degradation': 0.01, # euro/kw
|
||||||
|
'holding': 0.05,
|
||||||
'max_soc': 0.8,
|
'max_soc': 0.8,
|
||||||
'min_soc': 0.2,
|
'min_soc': 0.2,
|
||||||
'initial_capacity': 0.4
|
'initial_capacity': 0.4
|
||||||
|
|
13
plotDRL.py
13
plotDRL.py
|
@ -21,7 +21,7 @@ def plot_optimization_result(datasource, directory): # data source is dataframe
|
||||||
# 绘制步长成本 in ax[0]
|
# 绘制步长成本 in ax[0]
|
||||||
axs[0, 0].cla()
|
axs[0, 0].cla()
|
||||||
axs[0, 0].set_ylabel('Costs')
|
axs[0, 0].set_ylabel('Costs')
|
||||||
axs[0, 0].set_xlabel('Time(h)')
|
axs[0, 0].set_xlabel('Time (h)')
|
||||||
axs[0, 0].bar(T, datasource['step_cost'])
|
axs[0, 0].bar(T, datasource['step_cost'])
|
||||||
# axs[0,0].set_xticks([i for i in range(24)],[i for i in range(1,25)])
|
# axs[0,0].set_xticks([i for i in range(24)],[i for i in range(1,25)])
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ def plot_optimization_result(datasource, directory): # data source is dataframe
|
||||||
axs[0, 1].cla()
|
axs[0, 1].cla()
|
||||||
# 设置第一个 y 轴
|
# 设置第一个 y 轴
|
||||||
axs[0, 1].set_ylabel('Price')
|
axs[0, 1].set_ylabel('Price')
|
||||||
axs[0, 1].set_xlabel('Time(h)')
|
axs[0, 1].set_xlabel('Time (h)')
|
||||||
line1, = axs[0, 1].plot(T, datasource['price'], drawstyle='steps-mid', label='Price', color='pink')
|
line1, = axs[0, 1].plot(T, datasource['price'], drawstyle='steps-mid', label='Price', color='pink')
|
||||||
# 创建第二个 y 轴
|
# 创建第二个 y 轴
|
||||||
ax2 = axs[0, 1].twinx()
|
ax2 = axs[0, 1].twinx()
|
||||||
|
@ -43,8 +43,8 @@ def plot_optimization_result(datasource, directory): # data source is dataframe
|
||||||
|
|
||||||
# 绘制累计发电量和消耗量 in ax[2]
|
# 绘制累计发电量和消耗量 in ax[2]
|
||||||
axs[1, 0].cla()
|
axs[1, 0].cla()
|
||||||
axs[1, 0].set_ylabel('Outputs of DGs and Battery')
|
axs[1, 0].set_ylabel('Outputs of Units and Netload (kWh)')
|
||||||
axs[1, 0].set_xlabel('Time(h)')
|
axs[1, 0].set_xlabel('Time (h)')
|
||||||
# 处理电池充放电数据
|
# 处理电池充放电数据
|
||||||
battery_positive = np.array(datasource['battery_energy_change'])
|
battery_positive = np.array(datasource['battery_energy_change'])
|
||||||
battery_negative = np.array(datasource['battery_energy_change'])
|
battery_negative = np.array(datasource['battery_energy_change'])
|
||||||
|
@ -90,6 +90,7 @@ def plot_evaluation_information(datasource, directory):
|
||||||
|
|
||||||
# 绘制不平衡度 in axs[0]
|
# 绘制不平衡度 in axs[0]
|
||||||
axs[0, 0].cla()
|
axs[0, 0].cla()
|
||||||
|
axs[0, 0].set_xlabel('Time (h)')
|
||||||
axs[0, 0].set_ylabel('Unbalance of Generation and Load')
|
axs[0, 0].set_ylabel('Unbalance of Generation and Load')
|
||||||
axs[0, 0].bar(eval_data['time_step'], eval_data['unbalance'], label='Exchange with Grid', width=0.4)
|
axs[0, 0].bar(eval_data['time_step'], eval_data['unbalance'], label='Exchange with Grid', width=0.4)
|
||||||
axs[0, 0].bar(eval_data['time_step'] + 0.4, eval_data['netload'], label='Netload', width=0.4)
|
axs[0, 0].bar(eval_data['time_step'] + 0.4, eval_data['netload'], label='Netload', width=0.4)
|
||||||
|
@ -98,8 +99,8 @@ def plot_evaluation_information(datasource, directory):
|
||||||
|
|
||||||
# 绘制能源充/放电与价格关系图 in ax[1]
|
# 绘制能源充/放电与价格关系图 in ax[1]
|
||||||
axs[0, 1].cla()
|
axs[0, 1].cla()
|
||||||
|
axs[0, 1].set_xlabel('Time (h)')
|
||||||
axs[0, 1].set_ylabel('Price')
|
axs[0, 1].set_ylabel('Price')
|
||||||
axs[0, 1].set_xlabel('Time Steps')
|
|
||||||
line1, = axs[0, 1].plot(eval_data['time_step'], eval_data['price'], drawstyle='steps-mid', label='Price',
|
line1, = axs[0, 1].plot(eval_data['time_step'], eval_data['price'], drawstyle='steps-mid', label='Price',
|
||||||
color='pink')
|
color='pink')
|
||||||
ax2 = axs[0, 1].twinx()
|
ax2 = axs[0, 1].twinx()
|
||||||
|
@ -112,6 +113,7 @@ def plot_evaluation_information(datasource, directory):
|
||||||
|
|
||||||
# 绘制发电量和负载量 in ax[2]
|
# 绘制发电量和负载量 in ax[2]
|
||||||
axs[1, 0].cla()
|
axs[1, 0].cla()
|
||||||
|
axs[1, 0].set_xlabel('Time (h)')
|
||||||
axs[1, 0].set_ylabel('Outputs of Units and Netload (kWh)')
|
axs[1, 0].set_ylabel('Outputs of Units and Netload (kWh)')
|
||||||
# axs[1,0].set_xticks([i for i in range(24)], [i for i in range(1, 25)])
|
# axs[1,0].set_xticks([i for i in range(24)], [i for i in range(1, 25)])
|
||||||
battery_positive = np.array(eval_data['battery'])
|
battery_positive = np.array(eval_data['battery'])
|
||||||
|
@ -137,6 +139,7 @@ def plot_evaluation_information(datasource, directory):
|
||||||
|
|
||||||
# 绘制奖励 in axs[3]
|
# 绘制奖励 in axs[3]
|
||||||
axs[1, 1].cla()
|
axs[1, 1].cla()
|
||||||
|
axs[1, 1].set_xlabel('Time (h)')
|
||||||
axs[1, 1].set_ylabel('Costs')
|
axs[1, 1].set_ylabel('Costs')
|
||||||
axs[1, 1].bar(eval_data['time_step'], eval_data['operation_cost'])
|
axs[1, 1].bar(eval_data['time_step'], eval_data['operation_cost'])
|
||||||
fig.savefig(f"{directory}/evaluation_information.svg", format='svg', dpi=600, bbox_inches='tight')
|
fig.savefig(f"{directory}/evaluation_information.svg", format='svg', dpi=600, bbox_inches='tight')
|
||||||
|
|
6
tools.py
6
tools.py
|
@ -78,7 +78,6 @@ def optimization_base_result(env, month, day, initial_soc):
|
||||||
m.addConstrs((battery_capacity * soc[t] == battery_capacity * soc[t - 1] +
|
m.addConstrs((battery_capacity * soc[t] == battery_capacity * soc[t - 1] +
|
||||||
(battery_energy_change[t] * battery_efficiency) for t in range(1, period)), name='soc update')
|
(battery_energy_change[t] * battery_efficiency) for t in range(1, period)), name='soc update')
|
||||||
# 设置成本函数
|
# 设置成本函数
|
||||||
# 发电机成本
|
|
||||||
cost_gen = gp.quicksum(
|
cost_gen = gp.quicksum(
|
||||||
(a_para[g] * gen_output[g, t] * gen_output[g, t] + b_para[g] * gen_output[g, t] + c_para[g] * on_off[g, t]) for
|
(a_para[g] * gen_output[g, t] * gen_output[g, t] + b_para[g] * gen_output[g, t] + c_para[g] * on_off[g, t]) for
|
||||||
t in range(period) for g in range(NUM_GEN))
|
t in range(period) for g in range(NUM_GEN))
|
||||||
|
@ -114,6 +113,7 @@ def optimization_base_result(env, month, day, initial_soc):
|
||||||
output_record['gen2'].append(gen_output[1, t].x)
|
output_record['gen2'].append(gen_output[1, t].x)
|
||||||
output_record['gen3'].append(gen_output[2, t].x)
|
output_record['gen3'].append(gen_output[2, t].x)
|
||||||
output_record['step_cost'].append(gen_cost + grid_import_cost - grid_export_cost + solar_cost + wind_cost)
|
output_record['step_cost'].append(gen_cost + grid_import_cost - grid_export_cost + solar_cost + wind_cost)
|
||||||
|
|
||||||
output_record_df = pd.DataFrame.from_dict(output_record)
|
output_record_df = pd.DataFrame.from_dict(output_record)
|
||||||
return output_record_df
|
return output_record_df
|
||||||
|
|
||||||
|
@ -137,14 +137,12 @@ class Arguments:
|
||||||
self.soft_update_tau = 2 ** -8 # 2 ** -8 ~= 5e-3
|
self.soft_update_tau = 2 ** -8 # 2 ** -8 ~= 5e-3
|
||||||
self.net_dim = 256 # the network width 256
|
self.net_dim = 256 # the network width 256
|
||||||
self.batch_size = 4096 # num of transitions sampled from replay buffer.
|
self.batch_size = 4096 # num of transitions sampled from replay buffer.
|
||||||
self.repeat_times = 2 ** 5 # repeatedly update network to keep critic's loss small
|
self.repeat_times = 2 ** 3 # repeatedly update network to keep critic's loss small
|
||||||
self.target_step = 4096 # collect target_step experiences, then update network, 1024
|
self.target_step = 4096 # collect target_step experiences, then update network, 1024
|
||||||
self.max_memo = 500000 # capacity of replay buffer
|
self.max_memo = 500000 # capacity of replay buffer
|
||||||
self.if_per_or_gae = False # PER for off-policy sparse reward: Prioritized Experience Replay.
|
self.if_per_or_gae = False # PER for off-policy sparse reward: Prioritized Experience Replay.
|
||||||
|
|
||||||
'''Arguments for evaluate'''
|
'''Arguments for evaluate'''
|
||||||
# self.eval_gap = 2 ** 6 # evaluate the agent per eval_gap seconds
|
|
||||||
# self.eval_times = 2 # number of times that get episode return in first
|
|
||||||
self.random_seed = 0 # initialize random seed in self.init_before_training()
|
self.random_seed = 0 # initialize random seed in self.init_before_training()
|
||||||
# self.random_seed_list = [1234, 2234, 3234, 4234, 5234]
|
# self.random_seed_list = [1234, 2234, 3234, 4234, 5234]
|
||||||
self.random_seed_list = [1234]
|
self.random_seed_list = [1234]
|
||||||
|
|
Loading…
Reference in New Issue