From 741fba6cd5b77987534dd3ec5e7f4a0625d7ecfc Mon Sep 17 00:00:00 2001 From: chenxiaodong Date: Fri, 5 Jul 2024 15:39:57 +0800 Subject: [PATCH] update logic --- PPO.py | 4 +- PPO_delete.py | 4 - PPO_primal_dual.py | 393 +++++++++++++++++++++++++++++++++++++ SAC.py | 10 +- data_manager.py | 11 +- environment.py | 31 ++- environment_llm.py | 177 +++++++++++++++++ environment_primal_dual.py | 184 +++++++++++++++++ module.py | 39 ++-- parameters.py | 1 + plotDRL.py | 13 +- tools.py | 6 +- 12 files changed, 811 insertions(+), 62 deletions(-) create mode 100644 PPO_primal_dual.py create mode 100644 environment_llm.py create mode 100644 environment_primal_dual.py diff --git a/PPO.py b/PPO.py index 4dc3d17..0e02e36 100644 --- a/PPO.py +++ b/PPO.py @@ -330,8 +330,8 @@ if __name__ == '__main__': buffer = list() '''init training parameters''' num_episode = args.num_episode - args.train = False - args.save_network = False + # args.train = False + # args.save_network = False # args.test_network = False # args.save_test_data = False # args.compare_with_gurobi = False diff --git a/PPO_delete.py b/PPO_delete.py index bab0768..a8a9ad0 100644 --- a/PPO_delete.py +++ b/PPO_delete.py @@ -133,14 +133,10 @@ class AgentPPO: action_rl, noise = self.act.get_action(states[0]) action_rl = action_rl.detach().cpu().numpy().flatten() noises = noise.detach().cpu().numpy().flatten() - # print(f"Action from RL model: {action_rl}") - # print(f"Noise: {noise}") - # print(f"Expected action dimension: {self.action_dim}") index = self.current_step % len(self.llm_actions) self.current_step += 1 action_llm = self.llm_actions[index] action_llm = np.array(action_llm, dtype=np.float32) - # print(f"Action from LLM: {action_llm}") action_combined = 0.5 * action_rl + 0.5 * action_llm if action_combined.shape[0] != self.action_dim: raise ValueError("Combined action dimension mismatch. Check the action generation process.") diff --git a/PPO_primal_dual.py b/PPO_primal_dual.py new file mode 100644 index 0000000..618674a --- /dev/null +++ b/PPO_primal_dual.py @@ -0,0 +1,393 @@ +import os +import pickle +from copy import deepcopy +import numpy as np +import pandas as pd +import torch +import torch.nn as nn +from environment import ESSEnv +from tools import get_episode_return, test_one_episode, optimization_base_result + +os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE' + + +class ActorPPO(nn.Module): + def __init__(self, mid_dim, state_dim, action_dim, layer_norm=False): + super().__init__() + self.layer_norm = layer_norm + self.net = nn.Sequential( + nn.Linear(state_dim, mid_dim), nn.ReLU(), + nn.Linear(mid_dim, mid_dim), nn.ReLU(), + nn.Linear(mid_dim, mid_dim), nn.Hardswish(), + nn.Linear(mid_dim, action_dim) + ) + self.a_logstd = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True) + self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi)) + + if self.layer_norm: + self.apply_layer_norm() + + def apply_layer_norm(self): + def init_weights(layer): + if isinstance(layer, nn.Linear): + nn.init.orthogonal_(layer.weight, 1.0) + nn.init.constant_(layer.bias, 0.0) + + self.net.apply(init_weights) + + def forward(self, state): + return self.net(state).tanh() + + def get_action(self, state): + a_avg = self.forward(state) + a_std = self.a_logstd.exp() + noise = torch.randn_like(a_avg) + action = a_avg + noise * a_std + return action, noise + + def get_logprob_entropy(self, state, action): + a_avg = self.forward(state) + a_std = self.a_logstd.exp() + delta = ((a_avg - action) / a_std).pow(2) * 0.5 + logprob = -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1) + dist_entropy = (logprob.exp() * logprob).mean() + return logprob, dist_entropy + + def get_old_logprob(self, _action, noise): + delta = noise.pow(2) * 0.5 + return -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1) + + +class CriticAdv(nn.Module): + def __init__(self, mid_dim, state_dim, _action_dim, layer_norm=False): + super().__init__() + self.layer_norm = layer_norm + self.net = nn.Sequential( + nn.Linear(state_dim, mid_dim), nn.ReLU(), + nn.Linear(mid_dim, mid_dim), nn.ReLU(), + nn.Linear(mid_dim, mid_dim), nn.Hardswish(), + nn.Linear(mid_dim, 1) + ) + if self.layer_norm: + self.apply_layer_norm() + + def apply_layer_norm(self): + def init_weights(layer): + if isinstance(layer, nn.Linear): + nn.init.orthogonal_(layer.weight, 1.0) + nn.init.constant_(layer.bias, 0.0) + + self.net.apply(init_weights) + + def forward(self, state): + return self.net(state) + + +class AgentPrimalDualPPO: + def __init__(self): + self.state = None + self.device = None + self.action_dim = None + self.get_obj_critic = None + + self.criterion = torch.nn.SmoothL1Loss() + self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None + self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None + + self.ClassCri = CriticAdv + self.ClassAct = ActorPPO + + self.ratio_clip = 0.2 + self.lambda_entropy = 0.02 + self.lambda_gae_adv = 0.98 + self.get_reward_sum = None + self.trajectory_list = None + + self.lambda_cost = 1.0 # 初始对偶变量 + self.constraint_value = 1.0 # 约束值,例如安全成本限制 + + def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=False, gpu_id=0, layer_norm=False): + self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu") + self.trajectory_list = list() + self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw + + self.cri = self.ClassCri(net_dim, state_dim, action_dim, layer_norm).to(self.device) + self.act = self.ClassAct(net_dim, state_dim, action_dim, layer_norm).to( + self.device) if self.ClassAct else self.cri + self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri + self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act + + self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate) + self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri + + def select_action(self, state): + states = torch.as_tensor((state,), dtype=torch.float32, device=self.device) + actions, noises = self.act.get_action(states) + return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy() + + def explore_env(self, env, target_step): + state = self.state + trajectory_temp = list() + last_done = 0 + for i in range(target_step): + action, noise = self.select_action(state) + state, next_state, reward, done, cost = env.step(np.tanh(action)) + trajectory_temp.append((state, reward, done, action, noise, cost)) + if done: + state = env.reset() + last_done = i + else: + state = next_state + self.state = state + + trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1] + self.trajectory_list = trajectory_temp[last_done:] + return trajectory_list + + def update_net(self, buffer, batch_size, repeat_times, soft_update_tau): + with torch.no_grad(): + buf_len = buffer[0].shape[0] + buf_state, buf_action, buf_noise, buf_reward, buf_mask, buf_cost = [ten.to(self.device) for ten in buffer] + + buf_value = torch.cat([self.cri_target(buf_state[i:i + 4096]) for i in range(0, buf_len, 4096)], dim=0) + buf_logprob = self.act.get_old_logprob(buf_action, buf_noise) + + buf_r_sum, buf_advantage = self.get_reward_sum(buf_len, buf_reward, buf_mask, buf_value) + buf_advantage = (buf_advantage - buf_advantage.mean()) / (buf_advantage.std() + 1e-5) + + cost_sum = buf_cost.sum().item() + if cost_sum > self.constraint_value: + self.lambda_cost += 0.01 * (cost_sum - self.constraint_value) + else: + self.lambda_cost -= 0.01 * (self.constraint_value - cost_sum) + self.lambda_cost = max(self.lambda_cost, 0) + + obj_critic = obj_actor = None + for _ in range(int(buf_len / batch_size * repeat_times)): + indices = torch.randint(buf_len, size=(batch_size,), requires_grad=False, device=self.device) + state = buf_state[indices] + action = buf_action[indices] + r_sum = buf_r_sum[indices] + logprob = buf_logprob[indices] + advantage = buf_advantage[indices] + + new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action) + ratio = (new_logprob - logprob.detach()).exp() + surrogate1 = advantage * ratio + surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip) + obj_surrogate = -torch.min(surrogate1, surrogate2).mean() + obj_actor = obj_surrogate + obj_entropy * self.lambda_entropy - self.lambda_cost * buf_cost[indices].mean() + self.optim_update(self.act_optim, obj_actor) + + value = self.cri(state).squeeze(1) + obj_critic = self.criterion(value, r_sum) + self.optim_update(self.cri_optim, obj_critic) + if self.cri_target is not self.cri: + self.soft_update(self.cri_target, self.cri, soft_update_tau) + + a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1)) + return obj_critic.item(), obj_actor.item(), a_std_log.mean().item() + + def get_reward_sum_raw(self, buf_len, buf_reward, buf_mask, buf_value) -> (torch.Tensor, torch.Tensor): + buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) + pre_r_sum = 0 + for i in range(buf_len - 1, -1, -1): + buf_r_sum[i] = buf_reward[i] + buf_mask[i] * pre_r_sum + pre_r_sum = buf_r_sum[i] + buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0]) + return buf_r_sum, buf_advantage + + def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value) -> (torch.Tensor, torch.Tensor): + buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) + buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device) + pre_r_sum = 0 + pre_advantage = 0 + for i in range(buf_len - 1, -1, -1): + buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum + pre_r_sum = buf_r_sum[i] + buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i]) + pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv + return buf_r_sum, buf_advantage + + @staticmethod + def optim_update(optimizer, objective): + optimizer.zero_grad() + objective.backward() + optimizer.step() + + @staticmethod + def soft_update(target_net, current_net, tau): + for tar, cur in zip(target_net.parameters(), current_net.parameters()): + tar.data.copy_(cur.data * tau + tar.data * (1.0 - tau)) + + +class Arguments: + def __init__(self, agent=None, env=None): + self.agent = agent # Deep Reinforcement Learning algorithm + self.env = env # the environment for training + self.cwd = None # current work directory. None means set automatically + self.if_remove = False # remove the cwd folder? (True, False, None:ask me) + self.visible_gpu = '0' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,' + # self.worker_num = 2 # rollout workers number pre GPU (adjust it to get high GPU usage) + self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads) + + '''Arguments for training''' + self.num_episode = 1000 # to control the train episodes for PPO + self.gamma = 0.995 # discount factor of future rewards + self.learning_rate = 2 ** -14 # 2e-4 + self.soft_update_tau = 2 ** -8 # 2 ** -8 ~= 5e-3 + + self.net_dim = 256 # the network width + self.batch_size = 4096 # num of transitions sampled from replay buffer. + self.repeat_times = 2 ** 3 # collect target_step, then update network + self.target_step = 4096 # repeatedly update network to keep critic's loss small + self.max_memo = self.target_step # capacity of replay buffer + self.if_per_or_gae = False # GAE for on-policy sparse reward: Generalized Advantage Estimation. + + '''Arguments for evaluate''' + self.random_seed = 0 # initialize random seed in self.init_before_training() + # self.random_seed_list = [1234, 2234, 3234, 4234, 5234] + self.random_seed_list = [1234] + self.train = True + self.save_network = True + self.test_network = True + self.save_test_data = True + self.compare_with_gurobi = True + self.plot_on = True + + def init_before_training(self, if_main): + if self.cwd is None: + agent_name = self.agent.__class__.__name__ + self.cwd = f'./{agent_name}' + + if if_main: + import shutil # remove history according to bool(if_remove) + if self.if_remove is None: + self.if_remove = bool(input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y') + elif self.if_remove: + shutil.rmtree(self.cwd, ignore_errors=True) + print(f"| Remove cwd: {self.cwd}") + os.makedirs(self.cwd, exist_ok=True) + + np.random.seed(self.random_seed) + torch.manual_seed(self.random_seed) + torch.set_num_threads(self.num_threads) + torch.set_default_dtype(torch.float32) + + os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu) + + +def update_buffer(_trajectory): + _trajectory = list(map(list, zip(*_trajectory))) # 2D-list transpose, here cut the trajectory into 5 parts + ten_state = torch.as_tensor(_trajectory[0]) # tensor state here + ten_reward = torch.as_tensor(_trajectory[1], dtype=torch.float32) + # _trajectory[2] = done, replace done by mask, save memory + ten_mask = (1.0 - torch.as_tensor(_trajectory[2], dtype=torch.float32)) * gamma + ten_action = torch.as_tensor(_trajectory[3]) + ten_noise = torch.as_tensor(_trajectory[4], dtype=torch.float32) + + buffer[:] = (ten_state, ten_action, ten_noise, ten_reward, ten_mask) # list store tensors + + _steps = ten_reward.shape[0] # how many steps are collected in all trajectories + _r_exp = ten_reward.mean() # the mean reward + return _steps, _r_exp + + +if __name__ == '__main__': + args = Arguments() + reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []} + loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []} + args.visible_gpu = '0' + for seed in args.random_seed_list: + args.random_seed = seed + args.agent = AgentPrimalDualPPO() + + agent_name = f'{args.agent.__class__.__name__}' + args.agent.cri_target = True + args.env = ESSEnv() + args.init_before_training(if_main=True) + agent = args.agent + env = args.env + agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate, + args.if_per_or_gae, layer_norm=True) + + cwd = args.cwd + gamma = args.gamma + batch_size = args.batch_size # how much data should be used to update net + target_step = args.target_step # how many steps of one episode should stop + repeat_times = args.repeat_times # how many times should update for one batch size data + soft_update_tau = args.soft_update_tau + agent.state = env.reset() + buffer = list() + num_episode = args.num_episode + + if args.train: + for i_episode in range(num_episode): + with torch.no_grad(): + trajectory_list = [] + for _ in range(target_step): + current_obs = agent.state + action, noise = agent.select_action(current_obs) + next_obs, reward, done, info, cost = env.step(action) + trajectory_list.append((current_obs, reward, done, action, noise, cost)) + agent.state = next_obs + if done: + break + steps, r_exp = update_buffer(trajectory_list) + critic_loss, actor_loss, entropy_loss = agent.update_net(buffer, batch_size, repeat_times, soft_update_tau) + loss_record['critic_loss'].append(critic_loss) + loss_record['actor_loss'].append(actor_loss) + loss_record['entropy_loss'].append(entropy_loss) + + with torch.no_grad(): + episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device) + reward_record['mean_episode_reward'].append(episode_reward) + reward_record['unbalance'].append(episode_unbalance) + print(f'current episode is {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}') + + act_save_path = f'{args.cwd}/actor.pth' + loss_record_path = f'{args.cwd}/loss_data.pkl' + reward_record_path = f'{args.cwd}/reward_data.pkl' + + with open(loss_record_path, 'wb') as tf: + pickle.dump(loss_record, tf) + with open(reward_record_path, 'wb') as tf: + pickle.dump(reward_record, tf) + + if args.save_network: + torch.save(agent.act.state_dict(), act_save_path) + print('actor parameters have been saved') + + if args.test_network: + args.cwd = agent_name + agent.act.load_stateDict(torch.load(act_save_path)) + print('parameters have been reloaded and test') + record = test_one_episode(env, agent.act, agent.device) + eval_data = pd.DataFrame(record['system_info']) + eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2', + 'gen3', 'temperature', 'irradiance', 'unbalance', 'operation_cost'] + if args.save_test_data: + test_data_save_path = f'{args.cwd}/test_data.pkl' + with open(test_data_save_path, 'wb') as tf: + pickle.dump(record, tf) + + '''compare with gurobi data and results''' + if args.compare_with_gurobi: + month = record['init_info'][0][0] + day = record['init_info'][0][1] + initial_soc = record['init_info'][0][3] + base_result = optimization_base_result(env, month, day, initial_soc) + if args.plot_on: + from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result + + plot_args = PlotArgs() + plot_args.feature_change = 'primal_dual' + args.cwd = agent_name + plot_dir = make_dir(args.cwd, plot_args.feature_change) + plot_optimization_result(base_result, plot_dir) + plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir) + '''compare the different cost get from gurobi and PPO''' + ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost']) + print('operation_cost_sum:', sum(eval_data['operation_cost'])) + print('step_cost_sum:', sum(base_result['step_cost'])) + print('ration:', ration) diff --git a/SAC.py b/SAC.py index 080f330..950953e 100644 --- a/SAC.py +++ b/SAC.py @@ -55,11 +55,11 @@ if __name__ == '__main__': '''here record real unbalance''' ## - # args.train=False - # args.save_network=False - # args.test_network=False - # args.save_test_data=False - # args.compare_with_gurobi=False + args.train = False + args.save_network = False + # args.test_network = False + # args.save_test_data = False + # args.compare_with_gurobi = False # if args.train: collect_data = True diff --git a/data_manager.py b/data_manager.py index 38ee3fd..972c177 100644 --- a/data_manager.py +++ b/data_manager.py @@ -1,6 +1,5 @@ class Constant: MONTHS_LEN = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] - MAX_STEP_HOURS = 24 * 30 class DataManager: @@ -10,6 +9,7 @@ class DataManager: self.Temperature = [] self.Irradiance = [] self.Wind = [] + self.LLM = [] def add_price_element(self, element): self.Prices.append(element) @@ -21,6 +21,8 @@ class DataManager: def add_wind_element(self, element): self.Wind.append(element) + def add_llm_element(self, element): self.LLM.append(element) + # get current time data based on given month day, and day_time def get_price_data(self, month, day, day_time): return self.Prices[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time] @@ -37,6 +39,9 @@ class DataManager: def get_wind_data(self, month, day, day_time): return self.Wind[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time] + def get_llm_data(self, month, day, day_time): + return self.LLM[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time] + # get series data for one episode def get_series_price_data(self, month, day): return self.Prices[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24: @@ -57,3 +62,7 @@ class DataManager: def get_series_wind_data(self, month, day): return self.Wind[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24: (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24] + + # def get_series_llm_data(self, month, day): + # return self.LLM[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24: + # (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24] diff --git a/environment.py b/environment.py index c9dc0a8..ec80116 100644 --- a/environment.py +++ b/environment.py @@ -23,13 +23,13 @@ class ESSEnv(gym.Env): self.day = None self.TRAIN = True self.current_time = None - self.episode_length = kwargs.get('episode_length', 24) + self.episode_length = 24 + self.penalty_coefficient = 50 # 约束惩罚系数 + self.sell_coefficient = 0.5 # 售出利润系数 self.battery_parameters = kwargs.get('battery_parameters', battery_parameters) self.dg_parameters = kwargs.get('dg_parameters', dg_parameters) self.solar_parameters = kwargs.get('solar_parameters', solar_parameters) self.wind_parameters = kwargs.get('wind_parameters', wind_parameters) - self.penalty_coefficient = 50 # 约束惩罚系数 - self.sell_coefficient = 0.5 # 售出利润系数 self.grid = Grid() self.battery = Battery(self.battery_parameters) @@ -65,19 +65,19 @@ class ESSEnv(gym.Env): time_step = self.current_time price = self.data_manager.get_price_data(self.month, self.day, self.current_time) - house_load = self.data_manager.get_load_cons_data(self.month, self.day, self.current_time) + houseload = self.data_manager.get_load_cons_data(self.month, self.day, self.current_time) temperature = self.data_manager.get_temperature_data(self.month, self.day, self.current_time) irradiance = self.data_manager.get_irradiance_data(self.month, self.day, self.current_time) - wind_speed = self.data_manager.get_wind_data(self.month, self.day, self.current_time) + windspeed = self.data_manager.get_wind_data(self.month, self.day, self.current_time) pv_generation = self.solar.step(temperature, irradiance) - wd_generation = self.wind.step(wind_speed) + wd_generation = self.wind.step(windspeed) generation = pv_generation + wd_generation - net_load = house_load - generation + netload = houseload - generation - obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(soc), np.float32(net_load), + obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(soc), np.float32(netload), np.float32(dg1_output), np.float32(dg2_output), np.float32(dg3_output), - np.float32(temperature), np.float32(irradiance), np.float32(wind_speed)), axis=None) + np.float32(temperature), np.float32(irradiance), np.float32(windspeed)), axis=None) return obs def step(self, action): # state transition: current_obs->take_action->get_reward->get_finish->next_obs @@ -93,19 +93,17 @@ class ESSEnv(gym.Env): self.solar.step(action[4], temperature, irradiance) self.wind.step(wind_speed) self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output, - -self.battery.energy_change, self.solar.current_power, self.wind.current_power)) + -self.battery.energy_change)) actual_production = sum(self.current_output) price = current_obs[1] netload = current_obs[3] unbalance = actual_production - netload - reward = 0 + reward = 0.0 excess_penalty = 0 # 过多 deficient_penalty = 0 # 过少 - sell_benefit = 0 - buy_cost = 0 - self.excess = 0 - self.shedding = 0 + sell_benefit, buy_cost = 0, 0 + self.excess, self.shedding = 0, 0 if unbalance >= 0: # 现在过剩 if unbalance <= self.grid.exchange_ability: # sell money to grid is little [0.029,0.1] @@ -122,7 +120,7 @@ class ESSEnv(gym.Env): buy_cost = self.grid.get_cost(price, self.grid.exchange_ability) self.shedding = abs(unbalance) - self.grid.exchange_ability deficient_penalty = self.shedding * self.penalty_coefficient - battery_cost = self.battery.get_cost(self.battery.energy_change) + battery_cost = self.battery.get_cost(self.battery.energy_change, self.battery.current_capacity) dg1_cost = self.dg1.get_cost(self.dg1.current_output) dg2_cost = self.dg2.get_cost(self.dg2.current_output) dg3_cost = self.dg3.get_cost(self.dg3.current_output) @@ -164,6 +162,7 @@ class ESSEnv(gym.Env): wind = wind_df['wind_speed'].to_numpy(dtype=float) '''重新设计价格和发电量以及需求的大小''' + def process_elements(elements, transform_function, add_function): for element in elements: transformed_element = transform_function(element) diff --git a/environment_llm.py b/environment_llm.py new file mode 100644 index 0000000..1e1d5a8 --- /dev/null +++ b/environment_llm.py @@ -0,0 +1,177 @@ +import gym +import json +import numpy as np +import pandas as pd + +from module import * +from parameters import * +from data_manager import * + + +class ESSEnv(gym.Env): + def __init__(self, **kwargs): + super(ESSEnv, self).__init__() + self.excess = None + self.shedding = None + self.unbalance = None + self.real_unbalance = None + self.operation_cost = None + self.current_output = None + self.final_step_outputs = None + self.data_manager = DataManager() + self._load_year_data() + self.month = None + self.day = None + self.TRAIN = True + self.current_time = None + self.episode_length = kwargs.get('episode_length', 24) + self.battery_parameters = kwargs.get('battery_parameters', battery_parameters) + self.dg_parameters = kwargs.get('dg_parameters', dg_parameters) + self.solar_parameters = kwargs.get('solar_parameters', solar_parameters) + self.wind_parameters = kwargs.get('wind_parameters', wind_parameters) + self.penalty_coefficient = 50 # 约束惩罚系数 + self.sell_coefficient = 0.5 # 售出利润系数 + + self.grid = Grid() + self.battery = Battery(self.battery_parameters) + self.dg1 = DG(self.dg_parameters['gen_1']) + self.dg2 = DG(self.dg_parameters['gen_2']) + self.dg3 = DG(self.dg_parameters['gen_3']) + self.solar = Solar(self.solar_parameters) + self.wind = Wind(self.wind_parameters) + + self.action_space = gym.spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float32) # 已增加调节电压动作 + self.state_space = gym.spaces.Box(low=0, high=1, shape=(15,), dtype=np.float32) # 为llm调整shape + + def reset(self, *args): + self.month = np.random.randint(1, 13) # choose 12 month + if self.TRAIN: + self.day = np.random.randint(1, 20) + else: + self.day = np.random.randint(20, Constant.MONTHS_LEN[self.month] - 1) + self.current_time = 0 + self.battery.reset() + self.dg1.reset() + self.dg2.reset() + self.dg3.reset() + self.solar.reset() + self.wind.reset() + return self._build_state() + + def _build_state(self): + soc = self.battery.SOC() + dg1_output = self.dg1.current_output + dg2_output = self.dg2.current_output + dg3_output = self.dg3.current_output + time_step = self.current_time + + price = self.data_manager.get_price_data(self.month, self.day, self.current_time) + houseload = self.data_manager.get_load_cons_data(self.month, self.day, self.current_time) + temperature = self.data_manager.get_temperature_data(self.month, self.day, self.current_time) + irradiance = self.data_manager.get_irradiance_data(self.month, self.day, self.current_time) + wind_speed = self.data_manager.get_wind_data(self.month, self.day, self.current_time) + llm_data = self.data_manager.get_llm_data(self.month, self.day, self.current_time) + + pv_generation = self.solar.step(temperature, irradiance) + wd_generation = self.wind.step(wind_speed) + generation = pv_generation + wd_generation + netload = houseload - generation + + obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(soc), np.float32(netload), + np.float32(dg1_output), np.float32(dg2_output), np.float32(dg3_output), + np.float32(temperature), np.float32(irradiance), np.float32(wind_speed), + np.float32(llm_data)), axis=None) + return obs + + def step(self, action): # state transition: current_obs->take_action->get_reward->get_finish->next_obs + # 在每个组件中添加动作 + current_obs = self._build_state() + temperature = current_obs[7] + irradiance = current_obs[8] + wind_speed = current_obs[9] + self.battery.step(action[0]) # 执行状态转换,电池当前容量也改变 + self.dg1.step(action[1]) + self.dg2.step(action[2]) + self.dg3.step(action[3]) + self.solar.step(action[4], temperature, irradiance) + self.wind.step(wind_speed) + self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output, + -self.battery.energy_change)) + actual_production = sum(self.current_output) + price = current_obs[1] + netload = current_obs[3] + unbalance = actual_production - netload + + reward = 0 + excess_penalty = 0 # 过多 + deficient_penalty = 0 # 过少 + sell_benefit = 0 + buy_cost = 0 + self.excess = 0 + self.shedding = 0 + if unbalance >= 0: # 现在过剩 + if unbalance <= self.grid.exchange_ability: + # sell money to grid is little [0.029,0.1] + sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient + else: + sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient + # real unbalance:电网也无法满足 + self.excess = unbalance - self.grid.exchange_ability + excess_penalty = self.excess * self.penalty_coefficient + else: # unbalance <0, 采用缺少惩罚 + if abs(unbalance) <= self.grid.exchange_ability: + buy_cost = self.grid.get_cost(price, abs(unbalance)) + else: + buy_cost = self.grid.get_cost(price, self.grid.exchange_ability) + self.shedding = abs(unbalance) - self.grid.exchange_ability + deficient_penalty = self.shedding * self.penalty_coefficient + battery_cost = self.battery.get_cost(self.battery.energy_change, self.battery.current_capacity) + dg1_cost = self.dg1.get_cost(self.dg1.current_output) + dg2_cost = self.dg2.get_cost(self.dg2.current_output) + dg3_cost = self.dg3.get_cost(self.dg3.current_output) + solar_cost = self.solar.get_cost(self.solar.current_power) + wind_cost = self.wind.gen_cost(self.wind.current_power) + + self.operation_cost = (battery_cost + dg1_cost + dg2_cost + dg3_cost + solar_cost + wind_cost + excess_penalty + + deficient_penalty - sell_benefit + buy_cost) + reward -= self.operation_cost / 1e3 + self.unbalance = unbalance + self.real_unbalance = self.shedding + self.excess + final_step_outputs = [self.dg1.current_output, self.dg2.current_output, self.dg3.current_output, + self.battery.current_capacity, self.solar.current_power, self.wind.current_power] + self.current_time += 1 + finish = (self.current_time == self.episode_length) + if finish: + self.final_step_outputs = final_step_outputs + self.current_time = 0 + next_obs = self.reset() + else: + next_obs = self._build_state() + return current_obs, next_obs, float(reward), finish + + def _load_year_data(self): + price_df = pd.read_csv('data/prices.csv', sep=',') + load_df = pd.read_csv('data/houseload.csv', sep=',') + irradiance_df = pd.read_csv('data/irradiance.csv', sep=',') + temperature_df = pd.read_csv('data/temper.csv', sep=',') + wind_df = pd.read_csv('data/wind.csv', sep=',') + llm_data = json.load(open('data/llm_action.json', 'r')) + + price = price_df['price'].to_numpy(dtype=float) + load = load_df['houseload'].to_numpy(dtype=float) + irradiance = irradiance_df['irradiance'].to_numpy(dtype=float) + temperature = temperature_df['t2m'].to_numpy(dtype=float) + wind = wind_df['wind_speed'].to_numpy(dtype=float) + + '''重新设计价格和发电量及需求的大小''' + def process_elements(elements, transform_function, add_function): + for element in elements: + transformed_element = transform_function(element) + add_function(transformed_element) + + process_elements(price, lambda x: max(x / 10, 0.5), self.data_manager.add_price_element) + process_elements(load, lambda x: x * 3, self.data_manager.add_load_element) + process_elements(irradiance, lambda x: x, self.data_manager.add_irradiance_element) + process_elements(temperature, lambda x: x - 273.15, self.data_manager.add_temperature_element) + process_elements(wind, lambda x: x, self.data_manager.add_wind_element) + process_elements(llm_data, lambda x: x, self.data_manager.add_llm_element) diff --git a/environment_primal_dual.py b/environment_primal_dual.py new file mode 100644 index 0000000..4ca18ba --- /dev/null +++ b/environment_primal_dual.py @@ -0,0 +1,184 @@ +import gym +import numpy as np +import pandas as pd + +from module import * +from parameters import * +from data_manager import * + + +class ESSEnv(gym.Env): + def __init__(self, **kwargs): + super(ESSEnv, self).__init__() + self.excess = None + self.shedding = None + self.unbalance = None + self.real_unbalance = None + self.operation_cost = None + self.current_output = None + self.final_step_outputs = None + self.data_manager = DataManager() + self._load_year_data() + self.month = None + self.day = None + self.TRAIN = True + self.current_time = None + self.episode_length = kwargs.get('episode_length', 24) + self.battery_parameters = kwargs.get('battery_parameters', battery_parameters) + self.dg_parameters = kwargs.get('dg_parameters', dg_parameters) + self.solar_parameters = kwargs.get('solar_parameters', solar_parameters) + self.wind_parameters = kwargs.get('wind_parameters', wind_parameters) + self.penalty_coefficient = 50 # 约束惩罚系数 + self.sell_coefficient = 0.5 # 售出利润系数 + + self.grid = Grid() + self.battery = Battery(self.battery_parameters) + self.dg1 = DG(self.dg_parameters['gen_1']) + self.dg2 = DG(self.dg_parameters['gen_2']) + self.dg3 = DG(self.dg_parameters['gen_3']) + self.solar = Solar(self.solar_parameters) + self.wind = Wind(self.wind_parameters) + + self.action_space = gym.spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float32) # 已增加调节电压动作 + self.state_space = gym.spaces.Box(low=0, high=1, shape=(10,), dtype=np.float32) + + def reset(self, *args): + self.month = np.random.randint(1, 13) # choose 12 month + if self.TRAIN: + self.day = np.random.randint(1, 20) + else: + self.day = np.random.randint(20, Constant.MONTHS_LEN[self.month] - 1) + self.current_time = 0 + self.battery.reset() + self.dg1.reset() + self.dg2.reset() + self.dg3.reset() + self.solar.reset() + self.wind.reset() + return self._build_state() + + def _build_state(self): + soc = self.battery.SOC() + dg1_output = self.dg1.current_output + dg2_output = self.dg2.current_output + dg3_output = self.dg3.current_output + time_step = self.current_time + + price = self.data_manager.get_price_data(self.month, self.day, self.current_time) + houseload = self.data_manager.get_load_cons_data(self.month, self.day, self.current_time) + temperature = self.data_manager.get_temperature_data(self.month, self.day, self.current_time) + irradiance = self.data_manager.get_irradiance_data(self.month, self.day, self.current_time) + wind_speed = self.data_manager.get_wind_data(self.month, self.day, self.current_time) + + obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(soc), np.float32(houseload), + np.float32(dg1_output), np.float32(dg2_output), np.float32(dg3_output), + np.float32(temperature), np.float32(irradiance), np.float32(wind_speed)), axis=None) + return obs + + def step(self, action): # state transition: current_obs->take_action->get_reward->get_finish->next_obs + # 在每个组件中添加动作 + current_obs = self._build_state() + temperature = current_obs[7] + irradiance = current_obs[8] + wind_speed = current_obs[9] + self.battery.step(action[0]) # 执行状态转换,电池当前容量也改变 + self.dg1.step(action[1]) + self.dg2.step(action[2]) + self.dg3.step(action[3]) + self.solar.step(action[4], temperature, irradiance) + self.wind.step(wind_speed) + self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output, + -self.battery.energy_change, self.solar.current_power, self.wind.current_power)) + actual_production = sum(self.current_output) + price = current_obs[1] + houseload = current_obs[3] + unbalance = actual_production - houseload + + reward = 0 + excess_penalty = 0 # 过多 + deficient_penalty = 0 # 过少 + sell_benefit = 0 + buy_cost = 0 + self.excess = 0 + self.shedding = 0 + if unbalance >= 0: # 现在过剩 + if unbalance <= self.grid.exchange_ability: + # sell money to grid is little [0.029,0.1] + sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient + else: + sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient + # real unbalance:电网也无法满足 + self.excess = unbalance - self.grid.exchange_ability + excess_penalty = self.excess * self.penalty_coefficient + else: # unbalance <0, 采用缺少惩罚 + if abs(unbalance) <= self.grid.exchange_ability: + buy_cost = self.grid.get_cost(price, abs(unbalance)) + else: + buy_cost = self.grid.get_cost(price, self.grid.exchange_ability) + self.shedding = abs(unbalance) - self.grid.exchange_ability + deficient_penalty = self.shedding * self.penalty_coefficient + battery_cost = self.battery.get_cost(self.battery.energy_change, self.battery.current_capacity) + dg1_cost = self.dg1.get_cost(self.dg1.current_output) + dg2_cost = self.dg2.get_cost(self.dg2.current_output) + dg3_cost = self.dg3.get_cost(self.dg3.current_output) + solar_cost = self.solar.get_cost(self.solar.current_power) + wind_cost = self.wind.gen_cost(self.wind.current_power) + + self.operation_cost = (battery_cost + dg1_cost + dg2_cost + dg3_cost + solar_cost + wind_cost + excess_penalty + + deficient_penalty - sell_benefit + buy_cost) + reward -= self.operation_cost / 1e3 + self.unbalance = unbalance + self.real_unbalance = self.shedding + self.excess + final_step_outputs = [self.dg1.current_output, self.dg2.current_output, self.dg3.current_output, + self.battery.current_capacity, self.solar.current_power, self.wind.current_power] + self.current_time += 1 + finish = (self.current_time == self.episode_length) + if finish: + self.final_step_outputs = final_step_outputs + self.current_time = 0 + next_obs = self.reset() + else: + next_obs = self._build_state() + return current_obs, next_obs, float(reward), finish + + # def render(self, current_obs, next_obs, reward, finish): + # print('day={},hour={:2d}, state={}, next_state={}, reward={:.4f}, terminal={}\n'. + # format(self.day, self.current_time, current_obs, next_obs, reward, finish)) + + def _load_year_data(self): + price_df = pd.read_csv('data/prices.csv', sep=',') + load_df = pd.read_csv('data/houseload.csv', sep=',') + irradiance_df = pd.read_csv('data/irradiance.csv', sep=',') + temperature_df = pd.read_csv('data/temper.csv', sep=',') + wind_df = pd.read_csv('data/wind.csv', sep=',') + + price = price_df['price'].to_numpy(dtype=float) + load = load_df['houseload'].to_numpy(dtype=float) + irradiance = irradiance_df['irradiance'].to_numpy(dtype=float) + temperature = temperature_df['t2m'].to_numpy(dtype=float) + wind = wind_df['wind_speed'].to_numpy(dtype=float) + + '''重新设计价格和发电量以及需求的大小''' + def process_elements(elements, transform_function, add_function): + for element in elements: + transformed_element = transform_function(element) + add_function(transformed_element) + + process_elements(price, lambda x: max(x / 10, 0.5), self.data_manager.add_price_element) + process_elements(load, lambda x: x * 3, self.data_manager.add_load_element) + process_elements(irradiance, lambda x: x, self.data_manager.add_irradiance_element) + process_elements(temperature, lambda x: x - 273.15, self.data_manager.add_temperature_element) + process_elements(wind, lambda x: x, self.data_manager.add_wind_element) + +# if __name__ == '__main__': +# env = ESSEnv() +# env.TRAIN = False +# rewards = [] +# env.reset() +# tem_action = [0.1, 0.1, 0.1, 0.1, 0.1] +# for _ in range(144): +# print(f'current month is {env.month}, current day is {env.day}, current time is {env.current_time}') +# current_obs, next_obs, reward, finish = env.step(tem_action) +# env.render(current_obs, next_obs, reward, finish) +# current_obs = next_obs +# rewards.append(reward) diff --git a/module.py b/module.py index 0dbb70c..f47be9f 100644 --- a/module.py +++ b/module.py @@ -14,7 +14,6 @@ class DG: self.power_output_min = parameters['power_output_min'] self.ramping_up = parameters['ramping_up'] self.ramping_down = parameters['ramping_down'] - self.last_step_output = None def step(self, action_gen): output_change = action_gen * self.ramping_up # constrain the output_change with ramping up boundary @@ -26,10 +25,7 @@ class DG: self.current_output = output def get_cost(self, output): - if output <= 0: - cost = 0 - else: - cost = (self.a_factor * pow(output, 2) + self.b_factor * output + self.c_factor) + cost = (self.a_factor * pow(output, 2) + self.b_factor * output + self.c_factor) return cost def reset(self): @@ -43,25 +39,25 @@ class Battery: self.current_capacity = None self.energy_change = None self.capacity = parameters['capacity'] + self.min_soc = parameters['min_soc'] self.max_soc = parameters['max_soc'] self.initial_capacity = parameters['initial_capacity'] - self.min_soc = parameters['min_soc'] - self.degradation = parameters['degradation'] # degradation cost 1.2 - self.max_charge = parameters['max_charge'] # max charge ability - self.max_discharge = parameters['max_discharge'] + self.degradation = parameters['degradation'] + self.holding = parameters['holding'] + self.max_charge = parameters['max_charge'] + # self.max_discharge = parameters['max_discharge'] self.efficiency = parameters['efficiency'] def step(self, action_battery): energy = action_battery * self.max_charge - updated_capacity = np.maximum(self.min_soc, - np.minimum(self.max_soc, - (self.current_capacity * self.capacity + energy) / self.capacity)) + current_energy = self.current_capacity * self.capacity + updated_capacity = np.maximum(self.min_soc, np.minimum(self.max_soc, (current_energy + energy) / self.capacity)) # if charge, positive, if discharge, negative self.energy_change = (updated_capacity - self.current_capacity) * self.capacity - self.current_capacity = updated_capacity # update capacity to current codition + self.current_capacity = updated_capacity # update capacity to current state - def get_cost(self, energy): # cost depends on the energy change - cost = energy ** 2 * self.degradation + def get_cost(self, energy_change, energy_hold): # cost depends on the energy change + cost = energy_change * self.degradation + energy_hold * self.holding return cost def SOC(self): @@ -91,12 +87,6 @@ class Solar: V_oc = self.oc_voltage + self.temper_coefficient * (temperature - self.refer_temperature) current = I_sc - (V_oc / self.sh_resistance) - # current = I_sc - # for _ in range(10): # 迭代次数 - # new_current = I_sc - (V_oc + current * self.s_resistance) / self.sh_resistance - # if abs(new_current - current) < 1e-6: # 收敛条件 - # break - # current = new_current self.current_power = max((1 + action_voltage) * self.base_voltage * current, 0) return self.current_power @@ -123,12 +113,11 @@ class Wind: self.opex_cofficient = parameters['opex_cofficient'] def step(self, wind_speed): + constant = 0.5 * self.air_density * self.rotor_radius ** 2 * self.power_coefficient * self.generator_efficiency if self.cutin_speed <= wind_speed < self.rated_speed: - self.current_power = (0.5 * self.air_density * self.rotor_radius ** 2 * wind_speed ** 3 * - self.power_coefficient * self.generator_efficiency) / 1e3 + self.current_power = constant * wind_speed ** 3 / 1e3 elif self.rated_speed <= wind_speed < self.cutout_speed: - self.current_power = (0.5 * self.air_density * self.rotor_radius ** 2 * self.rated_speed ** 3 * - self.power_coefficient * self.generator_efficiency) / 1e3 + self.current_power = constant * self.rated_speed ** 3 / 1e3 else: self.current_power = 0 return self.current_power diff --git a/parameters.py b/parameters.py index 4f404df..dc04fb4 100644 --- a/parameters.py +++ b/parameters.py @@ -27,6 +27,7 @@ battery_parameters = { 'max_discharge': 100, # kw 'efficiency': 0.9, 'degradation': 0.01, # euro/kw + 'holding': 0.05, 'max_soc': 0.8, 'min_soc': 0.2, 'initial_capacity': 0.4 diff --git a/plotDRL.py b/plotDRL.py index 547db65..d057abc 100644 --- a/plotDRL.py +++ b/plotDRL.py @@ -21,7 +21,7 @@ def plot_optimization_result(datasource, directory): # data source is dataframe # 绘制步长成本 in ax[0] axs[0, 0].cla() axs[0, 0].set_ylabel('Costs') - axs[0, 0].set_xlabel('Time(h)') + axs[0, 0].set_xlabel('Time (h)') axs[0, 0].bar(T, datasource['step_cost']) # axs[0,0].set_xticks([i for i in range(24)],[i for i in range(1,25)]) @@ -29,7 +29,7 @@ def plot_optimization_result(datasource, directory): # data source is dataframe axs[0, 1].cla() # 设置第一个 y 轴 axs[0, 1].set_ylabel('Price') - axs[0, 1].set_xlabel('Time(h)') + axs[0, 1].set_xlabel('Time (h)') line1, = axs[0, 1].plot(T, datasource['price'], drawstyle='steps-mid', label='Price', color='pink') # 创建第二个 y 轴 ax2 = axs[0, 1].twinx() @@ -43,8 +43,8 @@ def plot_optimization_result(datasource, directory): # data source is dataframe # 绘制累计发电量和消耗量 in ax[2] axs[1, 0].cla() - axs[1, 0].set_ylabel('Outputs of DGs and Battery') - axs[1, 0].set_xlabel('Time(h)') + axs[1, 0].set_ylabel('Outputs of Units and Netload (kWh)') + axs[1, 0].set_xlabel('Time (h)') # 处理电池充放电数据 battery_positive = np.array(datasource['battery_energy_change']) battery_negative = np.array(datasource['battery_energy_change']) @@ -90,6 +90,7 @@ def plot_evaluation_information(datasource, directory): # 绘制不平衡度 in axs[0] axs[0, 0].cla() + axs[0, 0].set_xlabel('Time (h)') axs[0, 0].set_ylabel('Unbalance of Generation and Load') axs[0, 0].bar(eval_data['time_step'], eval_data['unbalance'], label='Exchange with Grid', width=0.4) axs[0, 0].bar(eval_data['time_step'] + 0.4, eval_data['netload'], label='Netload', width=0.4) @@ -98,8 +99,8 @@ def plot_evaluation_information(datasource, directory): # 绘制能源充/放电与价格关系图 in ax[1] axs[0, 1].cla() + axs[0, 1].set_xlabel('Time (h)') axs[0, 1].set_ylabel('Price') - axs[0, 1].set_xlabel('Time Steps') line1, = axs[0, 1].plot(eval_data['time_step'], eval_data['price'], drawstyle='steps-mid', label='Price', color='pink') ax2 = axs[0, 1].twinx() @@ -112,6 +113,7 @@ def plot_evaluation_information(datasource, directory): # 绘制发电量和负载量 in ax[2] axs[1, 0].cla() + axs[1, 0].set_xlabel('Time (h)') axs[1, 0].set_ylabel('Outputs of Units and Netload (kWh)') # axs[1,0].set_xticks([i for i in range(24)], [i for i in range(1, 25)]) battery_positive = np.array(eval_data['battery']) @@ -137,6 +139,7 @@ def plot_evaluation_information(datasource, directory): # 绘制奖励 in axs[3] axs[1, 1].cla() + axs[1, 1].set_xlabel('Time (h)') axs[1, 1].set_ylabel('Costs') axs[1, 1].bar(eval_data['time_step'], eval_data['operation_cost']) fig.savefig(f"{directory}/evaluation_information.svg", format='svg', dpi=600, bbox_inches='tight') diff --git a/tools.py b/tools.py index fe36c26..fc57326 100644 --- a/tools.py +++ b/tools.py @@ -78,7 +78,6 @@ def optimization_base_result(env, month, day, initial_soc): m.addConstrs((battery_capacity * soc[t] == battery_capacity * soc[t - 1] + (battery_energy_change[t] * battery_efficiency) for t in range(1, period)), name='soc update') # 设置成本函数 - # 发电机成本 cost_gen = gp.quicksum( (a_para[g] * gen_output[g, t] * gen_output[g, t] + b_para[g] * gen_output[g, t] + c_para[g] * on_off[g, t]) for t in range(period) for g in range(NUM_GEN)) @@ -114,6 +113,7 @@ def optimization_base_result(env, month, day, initial_soc): output_record['gen2'].append(gen_output[1, t].x) output_record['gen3'].append(gen_output[2, t].x) output_record['step_cost'].append(gen_cost + grid_import_cost - grid_export_cost + solar_cost + wind_cost) + output_record_df = pd.DataFrame.from_dict(output_record) return output_record_df @@ -137,14 +137,12 @@ class Arguments: self.soft_update_tau = 2 ** -8 # 2 ** -8 ~= 5e-3 self.net_dim = 256 # the network width 256 self.batch_size = 4096 # num of transitions sampled from replay buffer. - self.repeat_times = 2 ** 5 # repeatedly update network to keep critic's loss small + self.repeat_times = 2 ** 3 # repeatedly update network to keep critic's loss small self.target_step = 4096 # collect target_step experiences, then update network, 1024 self.max_memo = 500000 # capacity of replay buffer self.if_per_or_gae = False # PER for off-policy sparse reward: Prioritized Experience Replay. '''Arguments for evaluate''' - # self.eval_gap = 2 ** 6 # evaluate the agent per eval_gap seconds - # self.eval_times = 2 # number of times that get episode return in first self.random_seed = 0 # initialize random seed in self.init_before_training() # self.random_seed_list = [1234, 2234, 3234, 4234, 5234] self.random_seed_list = [1234]