This commit is contained in:
chenxiaodong 2024-08-22 10:06:26 +08:00
parent 94f447c711
commit 012e4e0fd4
22 changed files with 4143 additions and 5928 deletions

147
7.24.py
View File

@ -1,147 +0,0 @@
import numpy as np
import pandas as pd
import time
import os
import json
from concurrent.futures import ThreadPoolExecutor
from deap import base, creator, tools, algorithms
def fitness_numpy(individual, price, load, temperature, irradiance, wind_speed, prev_soc, prev_Pg1, prev_Pg2, prev_Pg3):
individual = np.array(individual)
price = np.array(price)
load = np.array(load)
temperature = np.array(temperature)
irradiance = np.array(irradiance)
wind_speed = np.array(wind_speed)
Ac, Ag1, Ag2, Ag3, Av = individual[:5]
soc = np.clip(prev_soc + 0.2 * Ac * 0.9, 0.2, 0.8)
Pg1 = np.clip(prev_Pg1 + 100 * Ag1, 0, 150)
Pg2 = np.clip(prev_Pg2 + 100 * Ag2, 0, 375)
Pg3 = np.clip(prev_Pg3 + 200 * Ag3, 0, 500)
Pso = np.clip((0.2 * irradiance + 0.05 * temperature - 9.25) * (1 + Av), 0, None)
Pw = np.where((wind_speed >= 3) & (wind_speed < 8), wind_speed ** 3 * 172.2625 / 1000,
np.where((wind_speed >= 8) & (wind_speed < 12), 64 * 172.2625 / 125, 0))
P = Ac + Pg1 + Pg2 + Pg3 + Pso + Pw
Ee = np.where(P >= load, P - load, 0)
Es = np.where(P < load, load - P, 0)
Cb = 0.01 * Ac + 0.1 * soc
Cg1 = 0.0034 * Pg1 ** 2 + 3 * Pg1 + 30
Cg2 = 0.001 * Pg2 ** 2 + 10 * Pg2 + 40
Cg3 = 0.001 * Pg3 ** 2 + 15 * Pg3 + 70
Cs = 0.01 * Pso
Cw = 0.01 * Pw
Rs = 0.5 * price * Ee
Cp = price * Es
Pe = np.where(Ee > 100, (Ee - 100) * 50, 0)
Ps = np.where(Es > 100, (Es - 100) * 50, 0)
total_cost = np.sum(Cb + Cg1 + Cg2 + Cg3 + Cs + Cw - Rs + Cp + Pe + Ps)
reward = -total_cost / 1000
return (reward,)
def check_bounds(func):
def wrapper(*args, **kwargs):
offspring = func(*args, **kwargs)
for individual in offspring:
for i in range(len(individual)):
individual[i] = np.clip(individual[i], -1, 1)
return offspring
return wrapper
def main():
data = pd.read_csv('./data.csv')
price = data['price'].values
load = data['load'].values
temperature = data['temperature'].values
irradiance = data['irradiance'].values
wind_speed = data['wind_speed'].values
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)
toolbox = base.Toolbox()
toolbox.register("attr_float", np.random.uniform, -1, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_float, n=5)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("mate", check_bounds(tools.cxBlend), alpha=0.5)
toolbox.register("mutate", check_bounds(tools.mutGaussian), mu=0, sigma=0.1, indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", fitness_numpy)
population = toolbox.population(n=500)
prev_soc, prev_Pg1, prev_Pg2, prev_Pg3 = 0.4, 0.0, 0.0, 0.0
decision_values = []
for hour in range(8760):
start = time.time()
current_price = price[hour]
current_load = load[hour]
current_temperature = temperature[hour]
current_irradiance = irradiance[hour]
current_wind_speed = wind_speed[hour]
for gen in range(500):
offspring = toolbox.select(population, len(population))
offspring = list(map(toolbox.clone, offspring))
for child1, child2 in zip(offspring[::2], offspring[1::2]):
if np.random.rand() < 0.7:
toolbox.mate(child1, child2)
del child1.fitness.values
del child2.fitness.values
for mutant in offspring:
if np.random.rand() < 0.2:
toolbox.mutate(mutant)
del mutant.fitness.values
invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
with ThreadPoolExecutor() as executor:
futures = []
batch_size = 500
for i in range(0, len(invalid_ind), batch_size):
batch = invalid_ind[i:i + batch_size]
for ind in batch:
futures.append(
executor.submit(toolbox.evaluate, ind, current_price, current_load, current_temperature,
current_irradiance, current_wind_speed, prev_soc, prev_Pg1, prev_Pg2,
prev_Pg3))
for future in futures:
fitness = future.result()
for ind in invalid_ind:
if not ind.fitness.valid:
ind.fitness.values = fitness
population[:] = offspring
end = time.time()
best_ind = tools.selBest(population, 1)[0]
print(f'Best individual at hour {hour + 1}: {best_ind}')
print(f'Fitness: {best_ind.fitness.values}, using {end - start}s')
decision_values.append({
'Ac': best_ind[0],
'Ag1': best_ind[1],
'Ag2': best_ind[2],
'Ag3': best_ind[3],
'Av': best_ind[4]
})
prev_soc, prev_Pg1, prev_Pg2, prev_Pg3 = best_ind[0], best_ind[1], best_ind[2], best_ind[3]
with open('decision_values.json', 'w') as f:
json.dump(decision_values, f)
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 142 KiB

After

Width:  |  Height:  |  Size: 145 KiB

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 126 KiB

After

Width:  |  Height:  |  Size: 125 KiB

View File

@ -83,7 +83,6 @@ if __name__ == '__main__':
steps, r_exp = update_buffer(trajectory)
loss_record_path = f'{args.cwd}/loss_data.pkl'
reward_record_path = f'{args.cwd}/reward_data.pkl'
# current only store last seed corresponded actor
act_save_path = f'{args.cwd}/actor.pth'
with open(loss_record_path, 'wb') as tf:
@ -118,13 +117,13 @@ if __name__ == '__main__':
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
plot_args = PlotArgs()
plot_args.feature_change = ''
plot_args.feature_change = '730'
args.cwd = agent_name
plot_dir = make_dir(args.cwd, plot_args.feature_change)
plot_optimization_result(base_result, plot_dir)
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
'''compare the different cost get from gurobi and DDPG'''
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
print('operation_cost_sum:', sum(eval_data['operation_cost']))
print('step_cost_sum:', sum(base_result['step_cost']))
print('rl_cost:', sum(eval_data['operation_cost']))
print('gurobi_cost:', sum(base_result['step_cost']))
print('ration:', ration)

10
PPO.py
View File

@ -260,7 +260,6 @@ class Arguments:
self.cwd = None # current work directory. None means set automatically
self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
self.visible_gpu = '0' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
# self.worker_num = 2 # rollout workers number pre GPU (adjust it to get high GPU usage)
self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
'''Arguments for training'''
@ -268,7 +267,6 @@ class Arguments:
self.gamma = 0.995 # discount factor of future rewards
self.learning_rate = 2 ** -14 # 2e-4
self.soft_update_tau = 2 ** -8 # 2 ** -8 ~= 5e-3
self.net_dim = 256 # the network width
self.batch_size = 4096 # num of transitions sampled from replay buffer.
self.repeat_times = 2 ** 3 # collect target_step, then update network
@ -352,8 +350,8 @@ if __name__ == '__main__':
'''init buffer'''
buffer = list()
'''init training parameters'''
# args.train = False
# args.save_network = False
args.train = False
args.save_network = False
# args.test_network = False
# args.save_test_data = False
# args.compare_with_gurobi = False
@ -417,6 +415,6 @@ if __name__ == '__main__':
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
'''compare the different cost get from gurobi and PPO'''
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
print('operation_cost_sum:', sum(eval_data['operation_cost']))
print('step_cost_sum:', sum(base_result['step_cost']))
print('rl_cost:', sum(eval_data['operation_cost']))
print('gurobi_cost:', sum(base_result['step_cost']))
print('ration:', ration)

View File

@ -1,428 +0,0 @@
import os
import json
import pickle
from copy import deepcopy
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from environment import ESSEnv
from tools import get_episode_return, test_one_episode, optimization_base_result
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
class ActorPPO(nn.Module):
def __init__(self, mid_dim, state_dim, action_dim, layer_norm=False):
super().__init__()
self.layer_norm = layer_norm
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, action_dim))
# the logarithm (log) of standard deviation (std) of action, it is a trainable parameter
self.a_logstd = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True)
self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi))
if self.layer_norm:
self.apply_layer_norm()
def apply_layer_norm(self):
def init_weights(layer):
if isinstance(layer, nn.Linear):
nn.init.orthogonal_(layer.weight, 1.0)
nn.init.constant_(layer.bias, 0.0)
self.net.apply(init_weights)
def forward(self, state):
return self.net(state).tanh() # action.tanh() limit the data output of action
def get_action(self, state):
a_avg = self.forward(state) # too big for the action
a_std = self.a_logstd.exp()
noise = torch.randn_like(a_avg)
action = a_avg + noise * a_std
return action, noise
def get_logprob_entropy(self, state, action):
a_avg = self.forward(state)
a_std = self.a_logstd.exp()
delta = ((a_avg - action) / a_std).pow(2) * 0.5
logprob = -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1) # new_logprob
dist_entropy = (logprob.exp() * logprob).mean() # policy entropy
return logprob, dist_entropy
def get_old_logprob(self, _action, noise): # noise = action - a_noise
delta = noise.pow(2) * 0.5
return -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1) # old_logprob
class CriticAdv(nn.Module):
def __init__(self, mid_dim, state_dim, _action_dim, layer_norm=False):
super().__init__()
self.layer_norm = layer_norm
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, 1))
if self.layer_norm:
self.apply_layer_norm()
def apply_layer_norm(self):
def init_weights(layer):
if isinstance(layer, nn.Linear):
nn.init.orthogonal_(layer.weight, 1.0)
nn.init.constant_(layer.bias, 0.0)
self.net.apply(init_weights)
def forward(self, state):
return self.net(state) # Advantage value
class AgentPPO:
def __init__(self):
super().__init__()
self.state = None
self.device = None
self.action_dim = None
self.get_obj_critic = None
self.current_step = 0
self.criterion = torch.nn.SmoothL1Loss()
self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
'''init modify'''
self.ClassCri = CriticAdv
self.ClassAct = ActorPPO
self.ratio_clip = 0.2 # ratio.clamp(1 - clip, 1 + clip)
self.lambda_entropy = 0.02 # could be 0.01~0.05
self.lambda_gae_adv = 0.98 # could be 0.95~0.99, GAE (Generalized Advantage Estimation. ICLR.2016.)
self.get_reward_sum = None # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
self.trajectory_list = None
self.llm_actions = self.load_llm_actions()
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=False, gpu_id=0, layer_norm=False):
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
self.action_dim = action_dim
self.trajectory_list = list()
self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
self.cri = self.ClassCri(net_dim, state_dim, action_dim, layer_norm).to(self.device)
self.act = self.ClassAct(net_dim, state_dim, action_dim, layer_norm).to(
self.device) if self.ClassAct else self.cri
self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
# def select_action(self, state):
# states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
# actions, noises = self.act.get_action(states)
# return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
def select_action(self, state):
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
action_rl, noise = self.act.get_action(states[0])
action_rl = action_rl.detach().cpu().numpy().flatten()
noises = noise.detach().cpu().numpy().flatten()
index = self.current_step % len(self.llm_actions)
self.current_step += 1
action_llm = self.llm_actions[index]
action_llm = np.array(action_llm, dtype=np.float32)
action_combined = 0.5 * action_rl + 0.5 * action_llm
if action_combined.shape[0] != self.action_dim:
raise ValueError("Combined action dimension mismatch. Check the action generation process.")
return action_combined, noises
@staticmethod
def load_llm_actions():
with open('data/llm_action.json', 'r') as file:
llm_actions = json.load(file)
return llm_actions
def explore_env(self, env, target_step):
state = self.state # sent state to agent and then agent sent state to method
trajectory_temp = list()
last_done = 0
for i in range(target_step):
action, noise = self.select_action(state)
action = np.tanh(action) # make action between -1 & 1
# print(f"Action at step {i}: {action}")
if len(action) < 2:
raise ValueError("Action dimension is less than expected. Check the action generation process.")
state, next_state, reward, done, = env.step(action)
trajectory_temp.append((state, reward, done, action, noise))
if done:
state = env.reset()
self.current_step = 0
last_done = i
else:
state = next_state
self.state = state
'''splice list'''
# store 0 trajectory information to list
trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1]
self.trajectory_list = trajectory_temp[last_done:]
return trajectory_list
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
"""put data extract and update network together"""
with torch.no_grad():
buf_len = buffer[0].shape[0]
# decompose buffer data
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
'''get buf_r_sum, buf_logprob'''
bs = 4096 # set a smaller 'BatchSize' when out of GPU memory: 1024, could change to 4096
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
buf_value = torch.cat(buf_value, dim=0)
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
buf_r_sum, buf_advantage = self.get_reward_sum(buf_len, buf_reward, buf_mask, buf_value) # detach()
# normalize advantage
buf_advantage = (buf_advantage - buf_advantage.mean()) / (buf_advantage.std() + 1e-5)
del buf_noise, buffer[:]
'''PPO: Surrogate objective of Trust Region'''
obj_critic = obj_actor = None
for _ in range(int(buf_len / batch_size * repeat_times)):
indices = torch.randint(buf_len, size=(batch_size,), requires_grad=False, device=self.device)
state = buf_state[indices]
action = buf_action[indices]
r_sum = buf_r_sum[indices]
logprob = buf_logprob[indices]
advantage = buf_advantage[indices]
new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action) # it is obj_actor
ratio = (new_logprob - logprob.detach()).exp()
surrogate1 = advantage * ratio
surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
obj_surrogate = -torch.min(surrogate1, surrogate2).mean()
obj_actor = obj_surrogate + obj_entropy * self.lambda_entropy
self.optim_update(self.act_optim, obj_actor) # update actor
value = self.cri(state).squeeze(1) # critic network predicts the reward_sum (Q value) of state
# use smoothloss L1 to evaluate the value loss
# obj_critic = self.criterion(value, r_sum) / (r_sum.std() + 1e-6)
obj_critic = self.criterion(value, r_sum)
self.optim_update(self.cri_optim, obj_critic) # calculate and update the back propogation of value loss
# choose whether to use soft update
self.soft_update(self.cri_target, self.cri, soft_update_tau) if self.cri_target is not self.cri else None
a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1))
return obj_critic.item(), obj_actor.item(), a_std_log.mean().item() # logging_tuple
def get_reward_sum_raw(self, buf_len, buf_reward, buf_mask, buf_value) -> (torch.Tensor, torch.Tensor):
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # reward sum
pre_r_sum = 0
for i in range(buf_len - 1, -1, -1):
buf_r_sum[i] = buf_reward[i] + buf_mask[i] * pre_r_sum
pre_r_sum = buf_r_sum[i]
buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
return buf_r_sum, buf_advantage
def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value) -> (torch.Tensor, torch.Tensor):
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # old policy value
buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device) # advantage value
pre_r_sum = 0
pre_advantage = 0 # advantage value of previous step
for i in range(buf_len - 1, -1, -1):
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
pre_r_sum = buf_r_sum[i]
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i]) # fix a bug here
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
return buf_r_sum, buf_advantage
@staticmethod
def optim_update(optimizer, objective):
optimizer.zero_grad()
objective.backward()
optimizer.step()
@staticmethod
def soft_update(target_net, current_net, tau):
for tar, cur in zip(target_net.parameters(), current_net.parameters()):
tar.data.copy_(cur.data.__mul__(tau) + tar.data.__mul__(1.0 - tau))
class Arguments:
def __init__(self, agent=None, env=None):
self.agent = agent # Deep Reinforcement Learning algorithm
self.env = env # the environment for training
self.cwd = None # current work directory. None means set automatically
self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
self.visible_gpu = '0' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
# self.worker_num = 2 # rollout workers number pre GPU (adjust it to get high GPU usage)
self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
'''Arguments for training'''
self.num_episode = 1000 # to control the train episodes for PPO
self.gamma = 0.995 # discount factor of future rewards
self.learning_rate = 2 ** -14 # 2e-4
self.soft_update_tau = 2 ** -8 # 2 ** -8 ~= 5e-3
self.net_dim = 256 # the network width
self.batch_size = 4096 # num of transitions sampled from replay buffer.
self.repeat_times = 2 ** 3 # collect target_step, then update network
self.target_step = 4096 # repeatedly update network to keep critic's loss small
self.max_memo = self.target_step # capacity of replay buffer
self.if_per_or_gae = False # GAE for on-policy sparse reward: Generalized Advantage Estimation.
'''Arguments for evaluate'''
self.random_seed = 0 # initialize random seed in self.init_before_training()
# self.random_seed_list = [1234, 2234, 3234, 4234, 5234]
self.random_seed_list = [1234]
self.train = True
self.save_network = True
self.test_network = True
self.save_test_data = True
self.compare_with_gurobi = True
self.plot_on = True
def init_before_training(self, if_main):
if self.cwd is None:
agent_name = self.agent.__class__.__name__
self.cwd = f'./{agent_name}'
if if_main:
import shutil # remove history according to bool(if_remove)
if self.if_remove is None:
self.if_remove = bool(input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y')
elif self.if_remove:
shutil.rmtree(self.cwd, ignore_errors=True)
print(f"| Remove cwd: {self.cwd}")
os.makedirs(self.cwd, exist_ok=True)
np.random.seed(self.random_seed)
torch.manual_seed(self.random_seed)
torch.set_num_threads(self.num_threads)
torch.set_default_dtype(torch.float32)
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
def update_buffer(_trajectory):
_trajectory = list(map(list, zip(*_trajectory))) # 2D-list transpose, here cut the trajectory into 5 parts
ten_state = torch.as_tensor(_trajectory[0]) # tensor state here
ten_reward = torch.as_tensor(_trajectory[1], dtype=torch.float32)
# _trajectory[2] = done, replace done by mask, save memory
ten_mask = (1.0 - torch.as_tensor(_trajectory[2], dtype=torch.float32)) * gamma
ten_action = torch.as_tensor(_trajectory[3])
ten_noise = torch.as_tensor(_trajectory[4], dtype=torch.float32)
buffer[:] = (ten_state, ten_action, ten_noise, ten_reward, ten_mask) # list store tensors
_steps = ten_reward.shape[0] # how many steps are collected in all trajectories
_r_exp = ten_reward.mean() # the mean reward
return _steps, _r_exp
if __name__ == '__main__':
args = Arguments()
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
args.visible_gpu = '0'
for seed in args.random_seed_list:
args.random_seed = seed
args.agent = AgentPPO()
agent_name = f'{args.agent.__class__.__name__}'
args.agent.cri_target = True
args.env = ESSEnv()
args.init_before_training(if_main=True)
'''init agent and environment'''
agent = args.agent
env = args.env
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
args.if_per_or_gae, layer_norm=True)
cwd = args.cwd
gamma = args.gamma
batch_size = args.batch_size # how much data should be used to update net
target_step = args.target_step # how manysteps of one episode should stop
repeat_times = args.repeat_times # how many times should update for one batch size data
soft_update_tau = args.soft_update_tau
agent.state = env.reset()
'''init buffer'''
buffer = list()
'''init training parameters'''
num_episode = args.num_episode
# args.train=False
# args.save_network=False
# args.test_network=False
# args.save_test_data=False
# args.compare_with_gurobi=False
if args.train:
for i_episode in range(num_episode):
with torch.no_grad():
trajectory_list = agent.explore_env(env, target_step)
steps, r_exp = update_buffer(trajectory_list)
critic_loss, actor_loss, entropy_loss = agent.update_net(buffer, batch_size, repeat_times,
soft_update_tau)
loss_record['critic_loss'].append(critic_loss)
loss_record['actor_loss'].append(actor_loss)
loss_record['entropy_loss'].append(entropy_loss)
with torch.no_grad():
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
reward_record['mean_episode_reward'].append(episode_reward)
reward_record['unbalance'].append(episode_unbalance)
print(f'current epsiode is {i_episode}, reward:{episode_reward},unbalance:{episode_unbalance}')
act_save_path = f'{args.cwd}/actor.pth'
loss_record_path = f'{args.cwd}/loss_data.pkl'
reward_record_path = f'{args.cwd}/reward_data.pkl'
with open(loss_record_path, 'wb') as tf:
pickle.dump(loss_record, tf)
with open(reward_record_path, 'wb') as tf:
pickle.dump(reward_record, tf)
if args.save_network:
torch.save(agent.act.state_dict(), act_save_path)
print('actor parameters have been saved')
if args.test_network:
args.cwd = agent_name
agent.act.load_state_dict(torch.load(act_save_path))
print('parameters have been reload and test')
record = test_one_episode(env, agent.act, agent.device)
eval_data = pd.DataFrame(record['system_info'])
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2',
'gen3', 'temperature', 'irradiance', 'unbalance', 'operation_cost']
if args.save_test_data:
test_data_save_path = f'{args.cwd}/test_data.pkl'
with open(test_data_save_path, 'wb') as tf:
pickle.dump(record, tf)
'''compare with gurobi data and results'''
if args.compare_with_gurobi:
month = record['init_info'][0][0]
day = record['init_info'][0][1]
initial_soc = record['init_info'][0][3]
base_result = optimization_base_result(env, month, day, initial_soc)
if args.plot_on:
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
plot_args = PlotArgs()
plot_args.feature_change = ''
args.cwd = agent_name
plot_dir = make_dir(args.cwd, plot_args.feature_change)
plot_optimization_result(base_result, plot_dir)
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
'''compare the different cost get from gurobi and PPO'''
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
print('operation_cost_sum:', sum(eval_data['operation_cost']))
print('step_cost_sum:', sum(base_result['step_cost']))
print('ration:', ration)

View File

@ -31,6 +31,7 @@ class ActorPPO(nn.Module):
if isinstance(layer, nn.Linear):
nn.init.orthogonal_(layer.weight, 1.0)
nn.init.constant_(layer.bias, 0.0)
self.net.apply(init_weights)
def forward(self, state):
@ -83,7 +84,7 @@ class CriticAdv(nn.Module):
class AgentPPO:
def __init__(self):
def __init__(self, file_path='data/llm_actions.json'):
super().__init__()
self.state = None
self.device = None
@ -103,6 +104,8 @@ class AgentPPO:
self.lambda_gae_adv = 0.98 # could be 0.95~0.99, GAE (Generalized Advantage Estimation. ICLR.2016.)
self.get_reward_sum = None # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
self.trajectory_list = None
self.llm_actions = self.load_llm_actions(file_path)
self.current_time = 0
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=False, gpu_id=0, layer_norm=False):
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
@ -118,10 +121,33 @@ class AgentPPO:
self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
@staticmethod
def load_llm_actions(file_path):
with open(file_path, 'r') as file:
data = json.load(file)
return data
@staticmethod
def combine_actions(rl_action, llm_action):
combined_action = [
(rl_action[0] + llm_action['asoc']) / 2,
(rl_action[1] + llm_action['ag1']) / 2,
(rl_action[2] + llm_action['ag2']) / 2,
(rl_action[3] + llm_action['ag3']) / 2,
(rl_action[4] + llm_action['apv']) / 2
]
return combined_action
def select_action(self, state):
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
actions, noises = self.act.get_action(states)
return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
rl_action = actions[0].detach().cpu().numpy()
if self.current_time >= len(self.llm_actions):
self.current_time = 0
llm_action = self.llm_actions[self.current_time]
combined_action = self.combine_actions(rl_action, llm_action)
self.current_time += 1
return combined_action, noises[0].detach().cpu().numpy()
def explore_env(self, env, target_step):
state = self.state # sent state to agent and then agent sent state to method
@ -307,7 +333,7 @@ if __name__ == '__main__':
args.visible_gpu = '0'
for seed in args.random_seed_list:
args.random_seed = seed
args.agent = AgentPPO()
args.agent = AgentPPO('data/llm_actions.json')
agent_name = f'{args.agent.__class__.__name__}'
args.agent.cri_target = True

View File

@ -1,393 +0,0 @@
import os
import pickle
from copy import deepcopy
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from environment import ESSEnv
from tools import get_episode_return, test_one_episode, optimization_base_result
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
class ActorPPO(nn.Module):
def __init__(self, mid_dim, state_dim, action_dim, layer_norm=False):
super().__init__()
self.layer_norm = layer_norm
self.net = nn.Sequential(
nn.Linear(state_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, action_dim)
)
self.a_logstd = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True)
self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi))
if self.layer_norm:
self.apply_layer_norm()
def apply_layer_norm(self):
def init_weights(layer):
if isinstance(layer, nn.Linear):
nn.init.orthogonal_(layer.weight, 1.0)
nn.init.constant_(layer.bias, 0.0)
self.net.apply(init_weights)
def forward(self, state):
return self.net(state).tanh()
def get_action(self, state):
a_avg = self.forward(state)
a_std = self.a_logstd.exp()
noise = torch.randn_like(a_avg)
action = a_avg + noise * a_std
return action, noise
def get_logprob_entropy(self, state, action):
a_avg = self.forward(state)
a_std = self.a_logstd.exp()
delta = ((a_avg - action) / a_std).pow(2) * 0.5
logprob = -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1)
dist_entropy = (logprob.exp() * logprob).mean()
return logprob, dist_entropy
def get_old_logprob(self, _action, noise):
delta = noise.pow(2) * 0.5
return -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1)
class CriticAdv(nn.Module):
def __init__(self, mid_dim, state_dim, _action_dim, layer_norm=False):
super().__init__()
self.layer_norm = layer_norm
self.net = nn.Sequential(
nn.Linear(state_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, 1)
)
if self.layer_norm:
self.apply_layer_norm()
def apply_layer_norm(self):
def init_weights(layer):
if isinstance(layer, nn.Linear):
nn.init.orthogonal_(layer.weight, 1.0)
nn.init.constant_(layer.bias, 0.0)
self.net.apply(init_weights)
def forward(self, state):
return self.net(state)
class AgentPrimalDualPPO:
def __init__(self):
self.state = None
self.device = None
self.action_dim = None
self.get_obj_critic = None
self.criterion = torch.nn.SmoothL1Loss()
self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
self.ClassCri = CriticAdv
self.ClassAct = ActorPPO
self.ratio_clip = 0.2
self.lambda_entropy = 0.02
self.lambda_gae_adv = 0.98
self.get_reward_sum = None
self.trajectory_list = None
self.lambda_cost = 1.0 # 初始对偶变量
self.constraint_value = 1.0 # 约束值,例如安全成本限制
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=False, gpu_id=0, layer_norm=False):
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
self.trajectory_list = list()
self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
self.cri = self.ClassCri(net_dim, state_dim, action_dim, layer_norm).to(self.device)
self.act = self.ClassAct(net_dim, state_dim, action_dim, layer_norm).to(
self.device) if self.ClassAct else self.cri
self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
def select_action(self, state):
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
actions, noises = self.act.get_action(states)
return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
def explore_env(self, env, target_step):
state = self.state
trajectory_temp = list()
last_done = 0
for i in range(target_step):
action, noise = self.select_action(state)
state, next_state, reward, done, cost = env.step(np.tanh(action))
trajectory_temp.append((state, reward, done, action, noise, cost))
if done:
state = env.reset()
last_done = i
else:
state = next_state
self.state = state
trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1]
self.trajectory_list = trajectory_temp[last_done:]
return trajectory_list
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
with torch.no_grad():
buf_len = buffer[0].shape[0]
buf_state, buf_action, buf_noise, buf_reward, buf_mask, buf_cost = [ten.to(self.device) for ten in buffer]
buf_value = torch.cat([self.cri_target(buf_state[i:i + 4096]) for i in range(0, buf_len, 4096)], dim=0)
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
buf_r_sum, buf_advantage = self.get_reward_sum(buf_len, buf_reward, buf_mask, buf_value)
buf_advantage = (buf_advantage - buf_advantage.mean()) / (buf_advantage.std() + 1e-5)
cost_sum = buf_cost.sum().item()
if cost_sum > self.constraint_value:
self.lambda_cost += 0.01 * (cost_sum - self.constraint_value)
else:
self.lambda_cost -= 0.01 * (self.constraint_value - cost_sum)
self.lambda_cost = max(self.lambda_cost, 0)
obj_critic = obj_actor = None
for _ in range(int(buf_len / batch_size * repeat_times)):
indices = torch.randint(buf_len, size=(batch_size,), requires_grad=False, device=self.device)
state = buf_state[indices]
action = buf_action[indices]
r_sum = buf_r_sum[indices]
logprob = buf_logprob[indices]
advantage = buf_advantage[indices]
new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action)
ratio = (new_logprob - logprob.detach()).exp()
surrogate1 = advantage * ratio
surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
obj_surrogate = -torch.min(surrogate1, surrogate2).mean()
obj_actor = obj_surrogate + obj_entropy * self.lambda_entropy - self.lambda_cost * buf_cost[indices].mean()
self.optim_update(self.act_optim, obj_actor)
value = self.cri(state).squeeze(1)
obj_critic = self.criterion(value, r_sum)
self.optim_update(self.cri_optim, obj_critic)
if self.cri_target is not self.cri:
self.soft_update(self.cri_target, self.cri, soft_update_tau)
a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1))
return obj_critic.item(), obj_actor.item(), a_std_log.mean().item()
def get_reward_sum_raw(self, buf_len, buf_reward, buf_mask, buf_value) -> (torch.Tensor, torch.Tensor):
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device)
pre_r_sum = 0
for i in range(buf_len - 1, -1, -1):
buf_r_sum[i] = buf_reward[i] + buf_mask[i] * pre_r_sum
pre_r_sum = buf_r_sum[i]
buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
return buf_r_sum, buf_advantage
def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value) -> (torch.Tensor, torch.Tensor):
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device)
buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device)
pre_r_sum = 0
pre_advantage = 0
for i in range(buf_len - 1, -1, -1):
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
pre_r_sum = buf_r_sum[i]
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i])
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
return buf_r_sum, buf_advantage
@staticmethod
def optim_update(optimizer, objective):
optimizer.zero_grad()
objective.backward()
optimizer.step()
@staticmethod
def soft_update(target_net, current_net, tau):
for tar, cur in zip(target_net.parameters(), current_net.parameters()):
tar.data.copy_(cur.data * tau + tar.data * (1.0 - tau))
class Arguments:
def __init__(self, agent=None, env=None):
self.agent = agent # Deep Reinforcement Learning algorithm
self.env = env # the environment for training
self.cwd = None # current work directory. None means set automatically
self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
self.visible_gpu = '0' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
# self.worker_num = 2 # rollout workers number pre GPU (adjust it to get high GPU usage)
self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
'''Arguments for training'''
self.num_episode = 1000 # to control the train episodes for PPO
self.gamma = 0.995 # discount factor of future rewards
self.learning_rate = 2 ** -14 # 2e-4
self.soft_update_tau = 2 ** -8 # 2 ** -8 ~= 5e-3
self.net_dim = 256 # the network width
self.batch_size = 4096 # num of transitions sampled from replay buffer.
self.repeat_times = 2 ** 3 # collect target_step, then update network
self.target_step = 4096 # repeatedly update network to keep critic's loss small
self.max_memo = self.target_step # capacity of replay buffer
self.if_per_or_gae = False # GAE for on-policy sparse reward: Generalized Advantage Estimation.
'''Arguments for evaluate'''
self.random_seed = 0 # initialize random seed in self.init_before_training()
# self.random_seed_list = [1234, 2234, 3234, 4234, 5234]
self.random_seed_list = [1234]
self.train = True
self.save_network = True
self.test_network = True
self.save_test_data = True
self.compare_with_gurobi = True
self.plot_on = True
def init_before_training(self, if_main):
if self.cwd is None:
agent_name = self.agent.__class__.__name__
self.cwd = f'./{agent_name}'
if if_main:
import shutil # remove history according to bool(if_remove)
if self.if_remove is None:
self.if_remove = bool(input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y')
elif self.if_remove:
shutil.rmtree(self.cwd, ignore_errors=True)
print(f"| Remove cwd: {self.cwd}")
os.makedirs(self.cwd, exist_ok=True)
np.random.seed(self.random_seed)
torch.manual_seed(self.random_seed)
torch.set_num_threads(self.num_threads)
torch.set_default_dtype(torch.float32)
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
def update_buffer(_trajectory):
_trajectory = list(map(list, zip(*_trajectory))) # 2D-list transpose, here cut the trajectory into 5 parts
ten_state = torch.as_tensor(_trajectory[0]) # tensor state here
ten_reward = torch.as_tensor(_trajectory[1], dtype=torch.float32)
# _trajectory[2] = done, replace done by mask, save memory
ten_mask = (1.0 - torch.as_tensor(_trajectory[2], dtype=torch.float32)) * gamma
ten_action = torch.as_tensor(_trajectory[3])
ten_noise = torch.as_tensor(_trajectory[4], dtype=torch.float32)
buffer[:] = (ten_state, ten_action, ten_noise, ten_reward, ten_mask) # list store tensors
_steps = ten_reward.shape[0] # how many steps are collected in all trajectories
_r_exp = ten_reward.mean() # the mean reward
return _steps, _r_exp
if __name__ == '__main__':
args = Arguments()
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
args.visible_gpu = '0'
for seed in args.random_seed_list:
args.random_seed = seed
args.agent = AgentPrimalDualPPO()
agent_name = f'{args.agent.__class__.__name__}'
args.agent.cri_target = True
args.env = ESSEnv()
args.init_before_training(if_main=True)
agent = args.agent
env = args.env
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
args.if_per_or_gae, layer_norm=True)
cwd = args.cwd
gamma = args.gamma
batch_size = args.batch_size # how much data should be used to update net
target_step = args.target_step # how many steps of one episode should stop
repeat_times = args.repeat_times # how many times should update for one batch size data
soft_update_tau = args.soft_update_tau
agent.state = env.reset()
buffer = list()
num_episode = args.num_episode
if args.train:
for i_episode in range(num_episode):
with torch.no_grad():
trajectory_list = []
for _ in range(target_step):
current_obs = agent.state
action, noise = agent.select_action(current_obs)
next_obs, reward, done, info, cost = env.step(action)
trajectory_list.append((current_obs, reward, done, action, noise, cost))
agent.state = next_obs
if done:
break
steps, r_exp = update_buffer(trajectory_list)
critic_loss, actor_loss, entropy_loss = agent.update_net(buffer, batch_size, repeat_times, soft_update_tau)
loss_record['critic_loss'].append(critic_loss)
loss_record['actor_loss'].append(actor_loss)
loss_record['entropy_loss'].append(entropy_loss)
with torch.no_grad():
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
reward_record['mean_episode_reward'].append(episode_reward)
reward_record['unbalance'].append(episode_unbalance)
print(f'current episode is {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}')
act_save_path = f'{args.cwd}/actor.pth'
loss_record_path = f'{args.cwd}/loss_data.pkl'
reward_record_path = f'{args.cwd}/reward_data.pkl'
with open(loss_record_path, 'wb') as tf:
pickle.dump(loss_record, tf)
with open(reward_record_path, 'wb') as tf:
pickle.dump(reward_record, tf)
if args.save_network:
torch.save(agent.act.state_dict(), act_save_path)
print('actor parameters have been saved')
if args.test_network:
args.cwd = agent_name
agent.act.load_stateDict(torch.load(act_save_path))
print('parameters have been reloaded and test')
record = test_one_episode(env, agent.act, agent.device)
eval_data = pd.DataFrame(record['system_info'])
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2',
'gen3', 'temperature', 'irradiance', 'unbalance', 'operation_cost']
if args.save_test_data:
test_data_save_path = f'{args.cwd}/test_data.pkl'
with open(test_data_save_path, 'wb') as tf:
pickle.dump(record, tf)
'''compare with gurobi data and results'''
if args.compare_with_gurobi:
month = record['init_info'][0][0]
day = record['init_info'][0][1]
initial_soc = record['init_info'][0][3]
base_result = optimization_base_result(env, month, day, initial_soc)
if args.plot_on:
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
plot_args = PlotArgs()
plot_args.feature_change = 'primal_dual'
args.cwd = agent_name
plot_dir = make_dir(args.cwd, plot_args.feature_change)
plot_optimization_result(base_result, plot_dir)
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
'''compare the different cost get from gurobi and PPO'''
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
print('operation_cost_sum:', sum(eval_data['operation_cost']))
print('step_cost_sum:', sum(base_result['step_cost']))
print('ration:', ration)

View File

@ -1,56 +0,0 @@
from openai import OpenAI
import os
def get_response():
client = OpenAI(
api_key="sk-f01744b2801344b1a72f89ec7e290cad",
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", # 填写DashScope服务的base_url
)
completion = client.chat.completions.create(
model="qwen-turbo",
messages=[
{'role': 'system',
'content':
'你是一名擅长调试和控制智慧住宅能源系统的专家,需要根据当前环境状态给出最优的动作决策。'
'能源系统结构包括太阳能发电站、风能发电站、发电机、电池模块和住宅电网连接。'},
{'role': 'user',
'content':
'一、当前状态包含9个值'
'Sc(电池荷电状态)初始为0.4范围0.2到0.8Sg1(发电机1功率状态)、Sg2(发电机2功率状态)和Sg3(发电机3功率状态)初始功率都为0'
'Sp(电价状态)、Sl(住宅负载状态)、St(温度状态)、Si(太阳辐射度状态)和Sw(风速状态)都由数据提供。'
'其中Sc、Sg1、Sg2和Sg3的值都是在上一时刻基础上进行变化的始终不小于0Sg1、Sg2和Sg3为0代表各发动机关闭。'
'二、当前动作包含5个值'
'x表示当前时间的取值, 其范围从-1到1电池总容量为500初始电池容量为500*0.4=200Ag1、Ag2和Ag3分别表示相对于各自最大瞬时功率的变化值。'
'Ac(电池充放电动作)电池容量变化100*xAg1(发电机1功率动作)功率变化100*xAg2(发电机2功率动作)功率变化100*xAg3(发电机3功率动作)功率变化200*xAv(太阳能电压动作):太阳能电压变为(1+x)倍。'
'三、功率计算公式'
'1.Pc(电池充放电功率)= max(0.2,min(0.8,Sc+0.18*Ac))2.Pg1(发电机1功率)= max(0,min(150,100*Ag1+Sg1))'
'3.Pg2(发电机2功率)= max(0,min(375,100*Ag2+Sg2))4.Pg3(发电机3功率)= max(0,min(500,200*Ag3+Sg3))'
'5.Pso(太阳能功率)= max(0,(0.45*Si+0.015*St60.3)*(1+Av))'
'6.Pw(风能功率) if 3<=Sw<8, Pw=Sw^3*172.2625/1000 elif 8<=Sw<12, Pw=64*172.2625/125 else Pw=0'
'四、成本和能源惩罚计算公式'
'P(总功率)=Pc+Pg1+Pg2+Pg3+Pso+Pw'
'if P>=Sl, Ee(过剩能源)= PSl, else Es(短缺能源)=Sl-P'
'Cb(电池成本)=0.01*Ac+0.1*Sc;Cg1(发电机1成本)=0.0034*Pg1^2+3*Pg1+30'
'Cg2(发电机2成本)=0.001*Pg2^2+10*Pg2+40;Cg3(发电机3成本)=0.001*Pg3^2+15*Pg3+70'
'Cs(太阳能成本)=0.01*Pso;Cw(风能成本)=0.01*Pw'
'Rs(销售收入)=0.5*Sp*Ee;Cp(购买成本)=Sp*Es'
'Pe(过剩惩罚)=(Ee-100)*50;Ps(短缺惩罚)=(Es-100)*50'
'Rs、Pe和Cp、Ps对应能源过剩和能源短缺情况不能同时存在。'
'五、目标函数 Goal=- (Cb+Cg1+Cg2+Cg3+Cs+Cw+Pe+Ps-Rs+Cp)/1000'
'六、控制方案和目标:'
'1.优先使用太阳能和风能发电,当无法满足负荷时,使用发电机和电池供电;'
'2.供电时综合考虑各发电模块的成本,通常前一台发电机的最大功率不足以满足负荷时,才开启后续发电机;'
'3.减少超过公网交易限额的交易,避免公共电网波动影响住宅用电稳定性;'
'4.满足下一时刻住宅用电负载,最大化奖励函数且使供需差异最小化(Ee、Es尽可能小),提升训练效果和策略质量。'
'请你读取每小时的Sp、Sl、St、Si和Sw数据并计算每个时刻的状态然后使用最合适的优化方法给出每小时的动作决策'
'其长度为5分别表示Ac、Ag1、Ag2、Ag3和Av每小时计算一次。'
'结果应为Json格式[{{x1, x2, x3, x4, x5}}, {{x1, x2, x3, x4, x5}}, ...],如果计算量太大,可以分批回答。'}],
temperature=0.8,
top_p=0.8
)
print(completion.model_dump_json())
if __name__ == '__main__':
get_response()

8
SAC.py
View File

@ -51,8 +51,8 @@ if __name__ == '__main__':
'''collect data and train and update network'''
num_episode = args.num_episode
'''here record real unbalance'''
# args.train = False
# args.save_network = False
args.train = False
args.save_network = False
# args.test_network = False
# args.save_test_data = False
# args.compare_with_gurobi = False
@ -127,6 +127,6 @@ if __name__ == '__main__':
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
'''compare the different cost get from gurobi and SAC'''
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
print('operation_cost_sum:', sum(eval_data['operation_cost']))
print('step_cost_sum:', sum(base_result['step_cost']))
print('rl_cost:', sum(eval_data['operation_cost']))
print('gurobi_cost:', sum(base_result['step_cost']))
print('ration:', ration)

8
TD3.py
View File

@ -50,8 +50,8 @@ if __name__ == '__main__':
agent.state = env.reset()
'''collect data and train and update network'''
num_episode = args.num_episode
# args.train=False
# args.save_network=False
args.train=False
args.save_network=False
# args.test_network=False
# args.save_test_data=False
# args.compare_with_gurobi=False
@ -126,6 +126,6 @@ if __name__ == '__main__':
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
'''compare the different cost get from gurobi and TD3'''
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
print('operation_cost_sum:', sum(eval_data['operation_cost']))
print('step_cost_sum:', sum(base_result['step_cost']))
print('rl_cost:', sum(eval_data['operation_cost']))
print('gurobi_cost:', sum(base_result['step_cost']))
print('ration:', ration)

View File

@ -2,6 +2,23 @@ class Constant:
MONTHS_LEN = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
class TimeCounter:
def __init__(self):
self.month = 1
self.day = 1
def increment_day(self):
self.day += 1
if self.day > Constant.MONTHS_LEN[self.month - 1]:
self.day = 1
self.month += 1
if self.month > 12:
self.month = 1
def get_date(self):
return self.month, self.day
class DataManager:
def __init__(self) -> None:
self.Prices = []

View File

@ -19,6 +19,7 @@ class ESSEnv(gym.Env):
self.current_output = None
self.final_step_outputs = None
self.data_manager = DataManager()
self.time_counter = TimeCounter()
self._load_year_data()
self.month = None
self.day = None
@ -44,11 +45,9 @@ class ESSEnv(gym.Env):
self.state_space = gym.spaces.Box(low=0, high=1, shape=(15,), dtype=np.float32) # 为llm调整shape
def reset(self, *args):
self.month = np.random.randint(1, 13) # choose 12 month
if self.TRAIN:
self.day = np.random.randint(1, 20)
else:
self.day = np.random.randint(20, Constant.MONTHS_LEN[self.month] - 1)
# 更改为顺序训练
self.time_counter.increment_day()
self.month, self.day = self.time_counter.get_date()
self.current_time = 0
self.battery.reset()
self.dg1.reset()
@ -155,7 +154,7 @@ class ESSEnv(gym.Env):
irradiance_df = pd.read_csv('data/irradiance.csv', sep=',')
temperature_df = pd.read_csv('data/temper.csv', sep=',')
wind_df = pd.read_csv('data/wind.csv', sep=',')
llm_data = json.load(open('data/llm_action.json', 'r'))
# llm_data = json.load(open('data/llm_action.json', 'r'))
price = price_df['price'].to_numpy(dtype=float)
load = load_df['houseload'].to_numpy(dtype=float)
@ -174,4 +173,4 @@ class ESSEnv(gym.Env):
process_elements(irradiance, lambda x: x, self.data_manager.add_irradiance_element)
process_elements(temperature, lambda x: x - 273.15, self.data_manager.add_temperature_element)
process_elements(wind, lambda x: x, self.data_manager.add_wind_element)
process_elements(llm_data, lambda x: x, self.data_manager.add_llm_element)
# process_elements(llm_data, lambda x: x, self.data_manager.add_llm_element)

View File

@ -1,184 +0,0 @@
import gym
import numpy as np
import pandas as pd
from module import *
from parameters import *
from data_manager import *
class ESSEnv(gym.Env):
def __init__(self, **kwargs):
super(ESSEnv, self).__init__()
self.excess = None
self.shedding = None
self.unbalance = None
self.real_unbalance = None
self.operation_cost = None
self.current_output = None
self.final_step_outputs = None
self.data_manager = DataManager()
self._load_year_data()
self.month = None
self.day = None
self.TRAIN = True
self.current_time = None
self.episode_length = kwargs.get('episode_length', 24)
self.battery_parameters = kwargs.get('battery_parameters', battery_parameters)
self.dg_parameters = kwargs.get('dg_parameters', dg_parameters)
self.solar_parameters = kwargs.get('solar_parameters', solar_parameters)
self.wind_parameters = kwargs.get('wind_parameters', wind_parameters)
self.penalty_coefficient = 50 # 约束惩罚系数
self.sell_coefficient = 0.5 # 售出利润系数
self.grid = Grid()
self.battery = Battery(self.battery_parameters)
self.dg1 = DG(self.dg_parameters['gen_1'])
self.dg2 = DG(self.dg_parameters['gen_2'])
self.dg3 = DG(self.dg_parameters['gen_3'])
self.solar = Solar(self.solar_parameters)
self.wind = Wind(self.wind_parameters)
self.action_space = gym.spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float32) # 已增加调节电压动作
self.state_space = gym.spaces.Box(low=0, high=1, shape=(10,), dtype=np.float32)
def reset(self, *args):
self.month = np.random.randint(1, 13) # choose 12 month
if self.TRAIN:
self.day = np.random.randint(1, 20)
else:
self.day = np.random.randint(20, Constant.MONTHS_LEN[self.month] - 1)
self.current_time = 0
self.battery.reset()
self.dg1.reset()
self.dg2.reset()
self.dg3.reset()
self.solar.reset()
self.wind.reset()
return self._build_state()
def _build_state(self):
soc = self.battery.SOC()
dg1_output = self.dg1.current_output
dg2_output = self.dg2.current_output
dg3_output = self.dg3.current_output
time_step = self.current_time
price = self.data_manager.get_price_data(self.month, self.day, self.current_time)
houseload = self.data_manager.get_load_cons_data(self.month, self.day, self.current_time)
temperature = self.data_manager.get_temperature_data(self.month, self.day, self.current_time)
irradiance = self.data_manager.get_irradiance_data(self.month, self.day, self.current_time)
wind_speed = self.data_manager.get_wind_data(self.month, self.day, self.current_time)
obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(soc), np.float32(houseload),
np.float32(dg1_output), np.float32(dg2_output), np.float32(dg3_output),
np.float32(temperature), np.float32(irradiance), np.float32(wind_speed)), axis=None)
return obs
def step(self, action): # state transition: current_obs->take_action->get_reward->get_finish->next_obs
# 在每个组件中添加动作
current_obs = self._build_state()
temperature = current_obs[7]
irradiance = current_obs[8]
wind_speed = current_obs[9]
self.battery.step(action[0]) # 执行状态转换,电池当前容量也改变
self.dg1.step(action[1])
self.dg2.step(action[2])
self.dg3.step(action[3])
self.solar.step(temperature, irradiance, action[4])
self.wind.step(wind_speed)
self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
-self.battery.energy_change, self.solar.current_power, self.wind.current_power))
actual_production = sum(self.current_output)
price = current_obs[1]
houseload = current_obs[3]
unbalance = actual_production - houseload
reward = 0
excess_penalty = 0 # 过多
deficient_penalty = 0 # 过少
sell_benefit = 0
buy_cost = 0
self.excess = 0
self.shedding = 0
if unbalance >= 0: # 现在过剩
if unbalance <= self.grid.exchange_ability:
# sell money to grid is little [0.029,0.1]
sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient
else:
sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient
# real unbalance电网也无法满足
self.excess = unbalance - self.grid.exchange_ability
excess_penalty = self.excess * self.penalty_coefficient
else: # unbalance <0, 采用缺少惩罚
if abs(unbalance) <= self.grid.exchange_ability:
buy_cost = self.grid.get_cost(price, abs(unbalance))
else:
buy_cost = self.grid.get_cost(price, self.grid.exchange_ability)
self.shedding = abs(unbalance) - self.grid.exchange_ability
deficient_penalty = self.shedding * self.penalty_coefficient
battery_cost = self.battery.get_cost(self.battery.energy_change, self.battery.current_capacity)
dg1_cost = self.dg1.get_cost(self.dg1.current_output)
dg2_cost = self.dg2.get_cost(self.dg2.current_output)
dg3_cost = self.dg3.get_cost(self.dg3.current_output)
solar_cost = self.solar.get_cost(self.solar.current_power)
wind_cost = self.wind.gen_cost(self.wind.current_power)
self.operation_cost = (battery_cost + dg1_cost + dg2_cost + dg3_cost + solar_cost + wind_cost + excess_penalty +
deficient_penalty - sell_benefit + buy_cost)
reward -= self.operation_cost / 1e3
self.unbalance = unbalance
self.real_unbalance = self.shedding + self.excess
final_step_outputs = [self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
self.battery.current_capacity, self.solar.current_power, self.wind.current_power]
self.current_time += 1
finish = (self.current_time == self.episode_length)
if finish:
self.final_step_outputs = final_step_outputs
self.current_time = 0
next_obs = self.reset()
else:
next_obs = self._build_state()
return current_obs, next_obs, float(reward), finish
# def render(self, current_obs, next_obs, reward, finish):
# print('day={},hour={:2d}, state={}, next_state={}, reward={:.4f}, terminal={}\n'.
# format(self.day, self.current_time, current_obs, next_obs, reward, finish))
def _load_year_data(self):
price_df = pd.read_csv('data/prices.csv', sep=',')
load_df = pd.read_csv('data/houseload.csv', sep=',')
irradiance_df = pd.read_csv('data/irradiance.csv', sep=',')
temperature_df = pd.read_csv('data/temper.csv', sep=',')
wind_df = pd.read_csv('data/wind.csv', sep=',')
price = price_df['price'].to_numpy(dtype=float)
load = load_df['houseload'].to_numpy(dtype=float)
irradiance = irradiance_df['irradiance'].to_numpy(dtype=float)
temperature = temperature_df['t2m'].to_numpy(dtype=float)
wind = wind_df['wind_speed'].to_numpy(dtype=float)
'''重新设计价格和发电量以及需求的大小'''
def process_elements(elements, transform_function, add_function):
for element in elements:
transformed_element = transform_function(element)
add_function(transformed_element)
process_elements(price, lambda x: max(x / 10, 0.5), self.data_manager.add_price_element)
process_elements(load, lambda x: x * 3, self.data_manager.add_load_element)
process_elements(irradiance, lambda x: x, self.data_manager.add_irradiance_element)
process_elements(temperature, lambda x: x - 273.15, self.data_manager.add_temperature_element)
process_elements(wind, lambda x: x, self.data_manager.add_wind_element)
# if __name__ == '__main__':
# env = ESSEnv()
# env.TRAIN = False
# rewards = []
# env.reset()
# tem_action = [0.1, 0.1, 0.1, 0.1, 0.1]
# for _ in range(144):
# print(f'current month is {env.month}, current day is {env.day}, current time is {env.current_time}')
# current_obs, next_obs, reward, finish = env.step(tem_action)
# env.render(current_obs, next_obs, reward, finish)
# current_obs = next_obs
# rewards.append(reward)

View File

@ -1,190 +0,0 @@
import numpy as np
import pandas as pd
import json
from scipy.optimize import minimize
from pyswarm import pso
# 读取数据
file_path = './data.csv'
data = pd.read_csv(file_path)
# 初始化状态
initial_soc = 0.4 # 电池初始荷电状态
initial_sg1 = 0
initial_sg2 = 0
initial_sg3 = 0
battery_capacity = 500 * initial_soc
# 参数设置
battery_efficiency = 0.9
battery_degradation = 0.01
battery_holding = 0.1
solar_cofficient = 0.01
volatage_change_percent = 0.1
wind_cofficient = 0.01
# 发电机参数
DG_parameters = {
'gen_1': {'a': 0.0034, 'b': 3, 'c': 30, 'power_output_max': 150, 'power_output_min': 10, 'ramping_up': 100,
'ramping_down': 100},
'gen_2': {'a': 0.001, 'b': 10, 'c': 40, 'power_output_max': 375, 'power_output_min': 50, 'ramping_up': 100,
'ramping_down': 100},
'gen_3': {'a': 0.001, 'b': 15, 'c': 70, 'power_output_max': 500, 'power_output_min': 100, 'ramping_up': 200,
'ramping_down': 200}
}
NUM_GEN = len(DG_parameters.keys())
period = 100 # 处理前 100 个数据
def get_dg_info(parameters):
p_max = []
p_min = []
ramping_up = []
ramping_down = []
a_para = []
b_para = []
c_para = []
for name, gen_info in parameters.items():
p_max.append(gen_info['power_output_max'])
p_min.append(gen_info['power_output_min'])
ramping_up.append(gen_info['ramping_up'])
ramping_down.append(gen_info['ramping_down'])
a_para.append(gen_info['a'])
b_para.append(gen_info['b'])
c_para.append(gen_info['c'])
return p_max, p_min, ramping_up, ramping_down, a_para, b_para, c_para
p_max, p_min, ramping_up, ramping_down, a_para, b_para, c_para = get_dg_info(parameters=DG_parameters)
# 目标函数
def objective(x, period_batch, start, initial_states):
total_cost = 0
soc, sg1, sg2, sg3 = initial_states
for t in range(period_batch):
Ac, Ag1, Ag2, Ag3, Av = x[t * 5:(t + 1) * 5]
Pc = max(0.2, min(0.8, soc + 0.2 * Ac * 0.9))
Pg1 = max(0, min(150, 100 * Ag1 + sg1))
Pg2 = max(0, min(375, 100 * Ag2 + sg2))
Pg3 = max(0, min(500, 200 * Ag3 + sg3))
Pso = max(0, (0.2 * data['irradiance'][start + t] + 0.05 * data['temperature'][start + t] - 9.25) * (1 + Av))
if 3 <= data['wind_speed'][start + t] < 8:
Pw = data['wind_speed'][start + t] ** 3 * 172.2625 / 1000
elif 8 <= data['wind_speed'][start + t] < 12:
Pw = 64 * 172.2625 / 125
else:
Pw = 0
P = Pc + Pg1 + Pg2 + Pg3 + Pso + Pw
if P >= data['load'][start + t]:
Ee = P - data['load'][start + t]
Es = 0
else:
Es = data['load'][start + t] - P
Ee = 0
Cb = 0.01 * battery_capacity + 0.1 * Pc
Cg1 = 0.0034 * Pg1 ** 2 + 3 * Pg1 + 30
Cg2 = 0.001 * Pg2 ** 2 + 10 * Pg2 + 40
Cg3 = 0.001 * Pg3 ** 2 + 15 * Pg3 + 70
Cs = 0.01 * Pso
Cw = 0.01 * Pw
Rs = 0.5 * data['price'][start + t] * Ee
Cp = data['price'][start + t] * Es
Pe = (Ee - 100) * 50 if Ee > 100 else 0
Ps = (Es - 100) * 50 if Es > 100 else 0
total_cost += (Cb + Cg1 + Cg2 + Cg3 + Cs + Cw + Pe + Ps - Rs + Cp) / 1000
soc = Pc
sg1 = Pg1
sg2 = Pg2
sg3 = Pg3
return total_cost
# 约束条件
def constraint(x, period_batch, start, initial_states):
constraints = []
soc, sg1, sg2, sg3 = initial_states
for t in range(period_batch):
Ac, Ag1, Ag2, Ag3, Av = x[t * 5:(t + 1) * 5]
Pc = max(0.2, min(0.8, soc + 0.2 * Ac * 0.9))
Pg1 = max(10, min(150, 100 * Ag1 + sg1))
Pg2 = max(50, min(375, 100 * Ag2 + sg2))
Pg3 = max(100, min(500, 200 * Ag3 + sg3))
Pso = max(0, (0.2 * data['irradiance'][start + t] + 0.05 * data['temperature'][start + t] - 9.25) * (1 + Av))
if 3 <= data['wind_speed'][start + t] < 8:
Pw = data['wind_speed'][start + t] ** 3 * 172.2625 / 1000
elif 8 <= data['wind_speed'][start + t] < 12:
Pw = 64 * 172.2625 / 125
else:
Pw = 0
P = Pc + Pg1 + Pg2 + Pg3 + Pso + Pw
# 保证变化后的值不小于最小值
constraints.append(Pc - 0.2)
constraints.append(Pg1 - 10)
constraints.append(Pg2 - 50)
constraints.append(Pg3 - 100)
constraints.append(P - data['load'][start + t])
soc = Pc
sg1 = Pg1
sg2 = Pg2
sg3 = Pg3
return constraints
actions = []
batch_size = 10 # 每次处理 10 个数据
total_batches = 10 # 总共处理 100 个数据
start = 0
for batch_num in range(total_batches):
end = min(start + batch_size, period)
period_batch = end - start
# 初始化初始状态
initial_states = (initial_soc, initial_sg1, initial_sg2, initial_sg3)
# # 使用随机初始化更接近实际情况
# x0 = np.random.uniform(0, 1, period_batch * 5)
# bounds = [(-1, 1)] * (period_batch * 5)
# cons = {'type': 'ineq', 'fun': lambda x: constraint(x, period_batch, start, initial_states)}
# result = minimize(lambda x: objective(x, period_batch, start, initial_states), x0, method='SLSQP', bounds=bounds,
# constraints=cons)
#
# actions.extend(result.x.reshape((period_batch, 5)).tolist())
# 使用粒子群优化算法
lb = [-1] * (period_batch * 5)
ub = [1] * (period_batch * 5)
xopt, fopt = pso(lambda x: objective(x, period_batch, start, initial_states), lb, ub,
ieqcons=[lambda x: constraint(x, period_batch, start, initial_states)], maxiter=100, swarmsize=50)
actions.extend(xopt.reshape((period_batch, 5)).tolist())
# 更新初始状态为当前批次最后一个状态
last_action = actions[-1]
initial_soc = max(0.2, min(0.8, initial_soc + 0.2 * last_action[0] * 0.9))
initial_sg1 = max(10, min(150, 100 * last_action[1] + initial_sg1))
initial_sg2 = max(50, min(375, 100 * last_action[2] + initial_sg2))
initial_sg3 = max(100, min(500, 200 * last_action[3] + initial_sg3))
start += batch_size
actions_json_path = './actions_batch_p.json'
try:
with open(actions_json_path, 'w') as f:
json.dump(actions, f)
print(f"文件保存成功:{actions_json_path}")
except Exception as e:
print(f"文件保存失败:{e}")

View File

@ -1,183 +0,0 @@
import json
import os
import time
import numpy as np
import pandas as pd
import torch
from concurrent.futures import ThreadPoolExecutor
from deap import base, creator, tools, algorithms
def fitness_torch(individuals, price, load, temperature, irradiance, wind_speed, device):
individuals_torch = torch.tensor(individuals, device=device)
num_individuals = individuals_torch.shape[0]
Ac, Ag1, Ag2, Ag3, Av = [individuals_torch[:, i * period:(i + 1) * period] for i in range(5)]
soc = torch.zeros((num_individuals, period), device=device)
Pg1, Pg2, Pg3 = [torch.zeros((num_individuals, period), device=device) for _ in range(3)]
Cb, Cg1, Cg2, Cg3, Cs, Cw, Rs, Cp, Pe, Ps, Ee, Es = [torch.zeros((num_individuals, period), device=device) for _ in
range(12)]
price_torch = torch.tensor(price, device=device)
load_torch = torch.tensor(load, device=device)
temperature_torch = torch.tensor(temperature, device=device)
irradiance_torch = torch.tensor(irradiance, device=device)
wind_speed_torch = torch.tensor(wind_speed, device=device)
for t in range(period):
if t > 0:
soc[:, t] = torch.clamp(soc[:, t - 1] + 0.2 * Ac[:, t] * 0.9, 0.2, 0.8)
Pg1[:, t] = torch.clamp(Pg1[:, t - 1] + 100 * Ag1[:, t], min=0, max=150)
Pg2[:, t] = torch.clamp(Pg2[:, t - 1] + 100 * Ag2[:, t], min=0, max=375)
Pg3[:, t] = torch.clamp(Pg3[:, t - 1] + 200 * Ag3[:, t], min=0, max=500)
else:
soc[:, t] = 0.4
Pg1[:, t] = torch.clamp(100 * Ag1[:, t], min=0, max=150)
Pg2[:, t] = torch.clamp(100 * Ag2[:, t], min=0, max=375)
Pg3[:, t] = torch.clamp(200 * Ag3[:, t], min=0, max=500)
Pso = torch.clamp((0.2 * irradiance_torch[t] + 0.05 * temperature_torch[t] - 9.25) * (1 + Av[:, t]), min=0)
Pw = torch.where(
(wind_speed_torch[t] >= 3) & (wind_speed_torch[t] < 8),
wind_speed_torch[t] ** 3 * 172.2625 / 1000,
torch.where(
(wind_speed_torch[t] >= 8) & (wind_speed_torch[t] < 12),
64 * 172.2625 / 125,
torch.zeros_like(wind_speed_torch[t])
)
)
P = Ac[:, t] + Pg1[:, t] + Pg2[:, t] + Pg3[:, t] + Pso + Pw
Ee[:, t] = torch.where(P >= load_torch[t], P - load_torch[t], torch.zeros_like(P))
Es[:, t] = torch.where(P < load_torch[t], load_torch[t] - P, torch.zeros_like(P))
Cb[:, t] = 0.01 * Ac[:, t] + 0.1 * soc[:, t]
Cg1[:, t] = 0.0034 * Pg1[:, t] ** 2 + 3 * Pg1[:, t] + 30
Cg2[:, t] = 0.001 * Pg2[:, t] ** 2 + 10 * Pg2[:, t] + 40
Cg3[:, t] = 0.001 * Pg3[:, t] ** 2 + 15 * Pg3[:, t] + 70
Cs[:, t] = 0.01 * Pso
Cw[:, t] = 0.01 * Pw
Rs[:, t] = 0.5 * price_torch[t] * Ee[:, t]
Cp[:, t] = price_torch[t] * Es[:, t]
Pe[:, t] = torch.where(Ee[:, t] > 100, (Ee[:, t] - 100) * 50, torch.zeros_like(Ee[:, t]))
Ps[:, t] = torch.where(Es[:, t] > 100, (Es[:, t] - 100) * 50, torch.zeros_like(Es[:, t]))
total_cost = torch.sum(Cb + Cg1 + Cg2 + Cg3 + Cs + Cw + Pe + Ps - Rs + Cp, dim=1)
reward = -total_cost / 1000
return reward.cpu().numpy()
def check_bounds(func):
def wrapper(*args, **kwargs):
offspring = func(*args, **kwargs)
for individual in offspring:
for i in range(len(individual)):
individual[i] = np.clip(individual[i], -1, 1)
return offspring
return wrapper
def save_progress(population, gen, filename='ga_progress.json'):
data = {
'population': [[list(ind), ind.fitness.values[0]] for ind in population],
'generation': gen
}
with open(filename, 'w') as f:
json.dump(data, f)
print(f"进度已保存到第 {gen} 代到 '{filename}'")
def load_progress(filename='ga_progress.json'):
if os.path.exists(filename):
with open(filename, 'r') as f:
data = json.load(f)
population = []
for ind_data in data['population']:
ind = creator.Individual(ind_data[0])
ind.fitness.values = (ind_data[1],)
population.append(ind)
gen = data['generation']
print(f"从第 {gen} 代加载进度。")
return population, gen
else:
return None, 0
def save_decision_values(best_ind, period, file_suffix):
decision_values = [
{"x1": best_ind[i], "x2": best_ind[i + period], "x3": best_ind[i + 2 * period], "x4": best_ind[i + 3 * period],
"x5": best_ind[i + 4 * period]} for i in range(period)]
filename = f'./decision_values_{file_suffix}.json'
with open(filename, 'w') as f:
json.dump(decision_values, f)
print(f"周期 {file_suffix}:决策值已保存到 '{filename}'")
def main():
data = pd.read_csv('./data.csv')
# period = len(data)
period = 2
price, load, temperature, irradiance, wind_speed = [data[col].values for col in
['price', 'load', 'temperature', 'irradiance', 'wind_speed']]
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)
toolbox = base.Toolbox()
toolbox.register("attr_float", np.random.uniform, -1, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_float, n=5 * period)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("mate", check_bounds(tools.cxBlend), alpha=0.5)
toolbox.register("mutate", check_bounds(tools.mutGaussian), mu=0, sigma=0.1, indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", fitness_torch)
population, start_gen = load_progress()
if population is None:
population = toolbox.population(n=500)
start_gen = 0
NGEN, CXPB, MUTPB = 500, 0.7, 0.2
batch_size = 500
for gen in range(start_gen, NGEN):
start_time = time.time()
offspring = algorithms.varAnd(population, toolbox, cxpb=CXPB, mutpb=MUTPB)
num_individuals = len(offspring)
with ThreadPoolExecutor() as executor:
futures = []
for i in range(0, num_individuals, batch_size):
batch = offspring[i:i + batch_size]
individuals = [ind[:] for ind in batch]
futures.append(
executor.submit(toolbox.evaluate, individuals, price, load, temperature, irradiance, wind_speed, 0))
for future in futures:
fitnesses = future.result()
for ind, fitness in zip(offspring, fitnesses):
ind.fitness.values = (fitness,)
population = toolbox.select(offspring, k=len(population))
end_time = time.time()
elapsed_time = end_time - start_time
print(f"{gen + 1} 代完成于 {elapsed_time:.2f}")
if (gen + 1) % 100 == 0:
best_ind = tools.selBest(population, 1)[0]
save_decision_values(best_ind, period, gen + 1)
save_progress(population, gen + 1)
best_ind = tools.selBest(population, 1)[0]
print('最佳个体:', best_ind)
print('适应度:', best_ind.fitness.values)
save_decision_values(best_ind, period, NGEN)
save_progress(population, NGEN)
if __name__ == "__main__":
main()

View File

@ -1,183 +0,0 @@
import json
import os
import time
import numpy as np
import pandas as pd
import torch
from concurrent.futures import ThreadPoolExecutor
from deap import base, creator, tools, algorithms
def fitness_torch(individuals, price, load, temperature, irradiance, wind_speed, prev_soc, prev_Pg1, prev_Pg2, prev_Pg3,
device):
individuals_torch = torch.tensor(individuals, device=device)
num = individuals_torch.shape[0]
Ac, Ag1, Ag2, Ag3, Av = [individuals_torch[:, i * period:(i + 1) * period] for i in range(5)]
# soc = torch.zeros((num, period), device=device)
# Pg1, Pg2, Pg3 = [torch.zeros((num, period), device=device) for _ in range(3)]
# Cb, Cg1, Cg2, Cg3, Cs, Cw, Rs, Cp, Pe, Ps, Ee, Es = [torch.zeros((num, period), device=device) for _ in range(12)]
soc = torch.clamp(prev_soc + 0.2 * Ac * 0.9, 0.2, 0.8)
Pg1 = torch.clamp(prev_Pg1 + 100 * Ag1, min=0, max=150)
Pg2 = torch.clamp(prev_Pg2 + 100 * Ag2, min=0, max=375)
Pg3 = torch.clamp(prev_Pg3 + 200 * Ag3, min=0, max=500)
Pso = torch.clamp((0.2 * irradiance + 0.05 * temperature - 9.25) * (1 + Av), min=0)
Pw = torch.where(
(wind_speed >= 3) & (wind_speed < 8),
wind_speed ** 3 * 172.2625 / 1000,
torch.where(
(wind_speed >= 8) & (wind_speed < 12),
64 * 172.2625 / 125,
torch.zeros_like(wind_speed)
)
)
P = Ac + Pg1 + Pg2 + Pg3 + Pso + Pw
Ee = torch.where(P >= load, P - load, torch.zeros_like(P))
Es = torch.where(P < load, load - P, torch.zeros_like(P))
Cb = 0.01 * Ac + 0.1 * soc
Cg1 = 0.0034 * Pg1 ** 2 + 3 * Pg1 + 30
Cg2 = 0.001 * Pg2 ** 2 + 10 * Pg2 + 40
Cg3 = 0.001 * Pg3 ** 2 + 15 * Pg3 + 70
Cs = 0.01 * Pso
Cw = 0.01 * Pw
Rs = 0.5 * price * Ee
Cp = price * Es
Pe = torch.where(Ee > 100, (Ee - 100) * 50, torch.zeros_like(Ee))
Ps = torch.where(Es > 100, (Es - 100) * 50, torch.zeros_like(Es))
total_cost = torch.sum(Cb + Cg1 + Cg2 + Cg3 + Cs + Cw + Pe + Ps - Rs + Cp, dim=1)
reward = -total_cost / 1000
return reward.cpu().numpy(), soc.cpu().numpy(), Pg1.cpu().numpy(), Pg2.cpu().numpy(), Pg3.cpu().numpy()
def save_decision_values(best_ind, period, index):
decisions = {
'Ac': best_ind[0],
'Ag1': best_ind[1],
'Ag2': best_ind[2],
'Ag3': best_ind[3],
'Av': best_ind[4]
}
with open(f'decision_values_{period}_index_{index}.json', 'w') as f:
json.dump(decisions, f)
def save_progress(population, period):
population_data = [ind.tolist() for ind in population]
with open(f'population_{period}.json', 'w') as f:
json.dump({'period': period, 'population': population_data}, f)
def load_progress():
if os.path.exists('population_gen_499.json'):
with open('population_gen_499.json', 'r') as f:
data = json.load(f)
return data['population'], data['period']
return None, 0
def check_bounds(func):
def wrapper(*args, **kwargs):
offspring = func(*args, **kwargs)
if offspring[0] is None or offspring[1] is None:
print("Error: One of the offspring is None", offspring)
raise ValueError("Offspring cannot be None.")
for child in offspring:
for i in range(len(child)):
if child[i] < -1:
child[i] = -1
elif child[i] > 1:
child[i] = 1
return offspring
return wrapper
def main():
period = 8760
NGEN = 500
CXPB, MUTPB = 0.7, 0.2
batch_size = 500
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data = pd.read_csv('./data.csv')
price, load, temperature, irradiance, wind_speed = [data[col].values for col in
['price', 'load', 'temperature', 'irradiance', 'wind_speed']]
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)
toolbox = base.Toolbox()
toolbox.register("attr_float", np.random.uniform, -1, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_float, 5)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("mate", check_bounds(tools.cxBlend), alpha=0.5)
toolbox.register("mutate", check_bounds(tools.mutGaussian), mu=0, sigma=0.1, indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", fitness_torch)
population = load_progress()
if population is None:
population = toolbox.population(n=NGEN)
print("Initial population:", len(population), "and first individual:", population[0])
prev_soc = torch.tensor([0.4] * NGEN, device=device)
prev_Pg1 = torch.zeros(NGEN, device=device)
prev_Pg2 = torch.zeros(NGEN, device=device)
prev_Pg3 = torch.zeros(NGEN, device=device)
for index in range(period):
for gen in range(NGEN):
start_time = time.time()
offspring = algorithms.varAnd(population, toolbox, cxpb=CXPB, mutpb=MUTPB)
num_individuals = len(offspring)
with ThreadPoolExecutor() as executor:
futures = []
for i in range(0, num_individuals, batch_size):
batch = offspring[i:i + batch_size]
individuals = [ind[:] for ind in batch]
futures.append(
executor.submit(toolbox.evaluate, individuals, price[index], load[index], temperature[index],
irradiance[index], wind_speed[index], prev_soc, prev_Pg1, prev_Pg2, prev_Pg3,
device)
)
for future in futures:
fitnesses, socs, Pg1s, Pg2s, Pg3s = zip(*future.result())
for ind, fitness in zip(offspring, fitnesses):
ind.fitness.values = (fitness,)
prev_soc[:len(socs)] = torch.tensor(socs, device=device)
prev_Pg1[:len(Pg1s)] = torch.tensor(Pg1s, device=device)
prev_Pg2[:len(Pg2s)] = torch.tensor(Pg2s, device=device)
prev_Pg3[:len(Pg3s)] = torch.tensor(Pg3s, device=device)
population = toolbox.select(offspring, k=len(population))
print("Population after selection:", population[:5])
end_time = time.time()
print(f"{index + 1}小时完成花费 {end_time - start_time:.2f}")
best_ind = tools.selBest(population, 1)[0]
print('最佳个体:', best_ind)
print('适应度:', best_ind.fitness.values)
save_decision_values(best_ind, period, index)
save_progress(population, period)
prev_soc.fill_(0.4)
prev_Pg1.fill_(0)
prev_Pg2.fill_(0)
prev_Pg3.fill_(0)
prev_soc[:len(best_ind)] = torch.tensor(best_ind[:period], device=device)
prev_Pg1[:len(best_ind)] = torch.tensor(best_ind[period:2 * period], device=device)
prev_Pg2[:len(best_ind)] = torch.tensor(best_ind[2 * period:3 * period], device=device)
prev_Pg3[:len(best_ind)] = torch.tensor(best_ind[3 * period:4 * period], device=device)
if __name__ == "__main__":
main()

View File

@ -42,7 +42,6 @@ class Battery:
self.capacity = parameters['capacity']
self.min_soc = parameters['min_soc']
self.max_soc = parameters['max_soc']
self.initial_capacity = parameters['initial_capacity']
self.degradation = parameters['degradation']
self.holding = parameters['holding']
self.max_charge = parameters['max_charge']

View File

@ -30,7 +30,6 @@ battery_parameters = {
'holding': 0.1,
'max_soc': 0.8,
'min_soc': 0.2,
'initial_capacity': 0.4
}
dg_parameters = {

View File

@ -67,8 +67,8 @@ def plot_optimization_result(datasource, directory): # data source is dataframe
axs[1, 0].plot(T, datasource['netload'], label='Netload', drawstyle='steps-mid', alpha=0.7)
axs[1, 0].legend(loc='upper right', bbox_to_anchor=(1.4, 1), fontsize=12, frameon=False, labelspacing=0.3)
fig.savefig(f"{directory}/rl.svg", format='svg', dpi=600, bbox_inches='tight')
print('rl figure have been ploted and saved')
fig.savefig(f"{directory}/gurobi.svg", format='svg', dpi=600, bbox_inches='tight')
print('gurobi figure have been ploted and saved')
def plot_evaluation_information(datasource, directory):
@ -139,8 +139,8 @@ def plot_evaluation_information(datasource, directory):
axs[1, 1].set_xlabel('Time (h)')
axs[1, 1].set_ylabel('Cost')
axs[1, 1].bar(eval_data['time_step'], eval_data['operation_cost'])
fig.savefig(f"{directory}/gurobi.svg", format='svg', dpi=600, bbox_inches='tight')
print('gurobi figure have been ploted and saved')
fig.savefig(f"{directory}/rl.svg", format='svg', dpi=600, bbox_inches='tight')
print('rl figure have been ploted and saved')
def make_dir(directory, feature_change):

View File

@ -1,88 +0,0 @@
from pathlib import Path
from openai import OpenAI
import os
client = OpenAI(
api_key='sk-f01744b2801344b1a72f89ec7e290cad',
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", # 填写DashScope服务base_url
)
file_csv = client.files.create(file=Path("./data.csv"), purpose="file-extract")
# 新文件上传后需要等待模型解析,首轮响应时间可能较长
completion = client.chat.completions.create(
model="qwen-long",
messages=[
{
'role': 'system',
'content':
'你是一名擅长调试和控制智慧住宅能源系统的大模型和强化学习专家,可根据当前的环境状态给出最优的动作决策。'
'目前的能源系统结构主要由太阳能发电站、风能发电站、发电机和电池模块通过电力网与住宅相连接。以下是中文和符号的对照:'
'As电池的充电状态动作Ag1发电机1的功率动作Ag2发电机2的功率动作Ag3发电机3的功率动作Av太阳能的电压动作'
'Sp电价状态Sc电池荷电状态Sl住宅负载状态Sg1发电机1功率状态Sg2发电机2功率状态Sg3发电机3功率状态St温度状态Si太阳辐射度状态Sw风速状态'
'Pc电池充放电量Pg1发电机1发电量Pg2发电机2发电量Pg3发电机3发电量Pso太阳能发电量Pw风能发电量'
'Cb电池成本Cg1发电机1成本Cg2发电机2成本Cg3发电机3成本Cs太阳能成本Cw风能成本'
'Rs销售收入Cp购买成本Ee过剩能源Es短缺能源Pe过剩惩罚Ps短缺惩罚。'
'一、控制方案和目标:'
'1、优先使用太阳能和风能发电当无法满足满负荷时请使用发电机和电池供电。'
'2、在供电时考虑多种供电来源太阳能、风能、发电机和电池综合每个发电模块的成本需满足下一时刻的住宅用电负载且使供需差异最小化。'
'3、减少超过公网交易限额的交易以避免公共电网波动影响住宅用电的稳定性。'
'4、最大化奖励函数并最小化能源不平衡。'
'二、连续状态空间是一个长度为9的向量包含以下部分'
'Sp由数据提供Sc初始为0.4始终介于0.2和0.8Sl由数据提供'
'Sg1初始功率为0Sg2初始功率为0Sg3初始功率为0'
'St由数据提供Si由数据提供Sw由数据提供。'
'其中ScSg1、Sg2和Sg3始终不小于0。'
'三、连续动作空间一个长度为5的向量x表示当前时间的取值, 其范围从-1到1'
'每个时刻的值都是在上一时刻基础上进行变化的Ag1、Ag2和Ag3分别表示相对于各自最大瞬时功率100、100和200的变化值。'
'1、Ac电池总容量为500初始电池容量为500*0.4=200电池容量变化100*x'
'2、Ag1发电量变化100*x'
'3、Ag2发电量变化100*x'
'4、Ag3发电量变化200*x'
'5、Av太阳能电压变为(1+x)倍。'
'可根据需求通过改变功率控制发电机的开关当关闭时保持Sg=0当打开时需满足Pg约束。'
'根据成本,通常前一台发电机的最大功率不足以满足剩余负荷时,才开启后续发电机。'
'四、发电量计算公式'
'1、Pc = max(0.2, min(0.8, Sc + 0.2 * action[0] * 0.9))其中0.8和0.2为电池荷电状态上下限;'
'2、Pg1 = max(0, min(150, 100 * action[1] + Sg1))其中150和0为发电机1开启时发电量上下限'
'3、Pg2 = max(0, min(375, 100 * action[2] + Sg2))其中375和0为发电机2开启时发电量上下限'
'4、Pg3 = max(0, min(500, 200 * action[3] + Sg3))其中500和0为发电机2开启时发电量上下限'
'5、Pso = max(0, (0.2 * Si + 0.05 * St - 9.25) * (1 + action[4]))其中太阳能发电功率不小于0。'
'6、Pw与Ws有关'
'if 3 <= Ws < 8, Pw = Ws^3 * 172.2625 / 1000'
'elif 8 <= Ws < 12, Pw = 64 * 172.2625 / 125'
'else Pw = 0'
'五、成本和能源平衡计算'
'P = Pc + Pg1 + Pg2 + Pg3 + Pso + Pw'
'if P >= Sl , Ee = P Sl, else Es = Sl - P'
'1、Cb = 0.01 * Asoc + 0.1 * Sc'
'2、Cg1 = 0.0034 * Pg1^2 + 3 * Pg1 + 30'
'3、Cg2 = 0.001 * Pg2^2 + 10 * Pg2 + 40'
'4、Cg3 = 0.001 * Pg3^2 + 15 * Pg3 + 70'
'5、Cs = 0.01 * Pso'
'6、Cw = 0.01 * Pw'
'7、Rs = 0.5 * Sp * Ee'
'8、Cp = Sp * Es'
'9、Pe = (Ee - 100) * 50'
'10、Ps = (Es - 100) * 50'
'因为Rs、Pe和Cp、Ps对应能源过剩和能源短缺情况只能同时存在一组。'
'六、奖励函数'
'Reward = - (Cb + Cg1 + Cg2 + Cg3 + Cs + Cw + Pe + Ps - Rs + Cp) / 1000'
},
{
'role': 'system',
'content': f'fileid://{file_csv.id}'
},
{
'role': 'user',
'content':
'读取数据并使用上述公式计算每个时刻的状态,请你使用最合适的优化方法,给出每个时刻的动作控制方案,'
'每小时计算一次并生成一个长度为5的数组表示Ac、Ag1、Ag2、Ag3和Av的动作决策。'
'返回的数据应为以下Json格式[{x1, x2, x3, x4, x5}, {x1, x2, x3, x4, x5}, ..., {x1, x2, x3, x4, x5}]'
'只需要json数据不需要其他任何分析。'
}
],
stream=False
)
print(completion.choices[0].message.model_dump()) # 非流式输出

View File

@ -85,7 +85,7 @@ def optimization_base_result(env, month, day, initial_soc):
(battery_energy_change[t] * battery_efficiency) for t in range(1, period)), name='soc update')
# 设置成本函数
cost_gen = gp.quicksum(
(a_para[g] * gen_output[g, t] * gen_output[g, t] + b_para[g] * gen_output[g, t] + c_para[g] * on_off[g, t]) for
(a_para[g] * gen_output[g, t] * gen_output[g, t] + b_para[g] * gen_output[g, t] + c_para[g]) * on_off[g, t] for
t in range(period) for g in range(NUM_GEN))
cost_battery = gp.quicksum(battery_energy_change[t] * battery_degradation + soc[t] * battery_holding for t in range(period))
cost_import = gp.quicksum(grid_energy_import[t] * price[t] for t in range(period))
@ -94,7 +94,6 @@ def optimization_base_result(env, month, day, initial_soc):
cost_wind = gp.quicksum(wind[t] * wind_cofficient for t in range(period))
m.setObjective((cost_gen + cost_battery + cost_import - cost_export + cost_solar + cost_wind), GRB.MINIMIZE)
m.setParam(GRB.Param.FuncNonlinear, 1)
m.optimize()
# 记录数据便于绘图
@ -155,7 +154,7 @@ class Arguments:
self.num_episode = 1000
self.gamma = 0.995 # discount factor of future rewards
# self.reward_scale = 1 # an approximate target reward usually be closed to 256
self.learning_rate = 2 ** -14 # 2 ** -14 ~= 6e-5
self.learning_rate = 2 ** -14 # 2e-4
self.soft_update_tau = 2 ** -8 # 2 ** -8 ~= 5e-3
self.net_dim = 256 # the network width 256
self.batch_size = 4096 # num of transitions sampled from replay buffer.