This commit is contained in:
chenxiaodong 2024-06-18 10:49:43 +08:00
commit 5fd1ac5e14
100 changed files with 422120 additions and 0 deletions

8
.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
# 默认忽略的文件
/shelf/
/workspace.xml
# 基于编辑器的 HTTP 客户端请求
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

View File

@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="rl-microgrid" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="format" value="PLAIN" />
<option name="myDocStringFormat" value="Plain" />
</component>
</module>

44
.idea/csv-editor.xml Normal file
View File

@ -0,0 +1,44 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CsvFileAttributes">
<option name="attributeMap">
<map>
<entry key="\data\houseload.csv">
<value>
<Attribute>
<option name="separator" value="," />
</Attribute>
</value>
</entry>
<entry key="\data\irradiance.csv">
<value>
<Attribute>
<option name="separator" value="," />
</Attribute>
</value>
</entry>
<entry key="\data\prices.csv">
<value>
<Attribute>
<option name="separator" value=";" />
</Attribute>
</value>
</entry>
<entry key="\data\temper.csv">
<value>
<Attribute>
<option name="separator" value="," />
</Attribute>
</value>
</entry>
<entry key="\data\wind.csv">
<value>
<Attribute>
<option name="separator" value="," />
</Attribute>
</value>
</entry>
</map>
</option>
</component>
</project>

View File

@ -0,0 +1,151 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="DuplicatedCode" enabled="false" level="WEAK WARNING" enabled_by_default="false">
<Languages>
<language minSize="266" name="Python" />
</Languages>
</inspection_tool>
<inspection_tool class="PyArgumentListInspection" enabled="false" level="WARNING" enabled_by_default="false" />
<inspection_tool class="PyIncorrectDocstringInspection" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
<inspection_tool class="PyPackageRequirementsInspection" enabled="false" level="WARNING" enabled_by_default="false">
<option name="ignoredPackages">
<value>
<list size="97">
<item index="0" class="java.lang.String" itemvalue="absl-py" />
<item index="1" class="java.lang.String" itemvalue="certifi" />
<item index="2" class="java.lang.String" itemvalue="astunparse" />
<item index="3" class="java.lang.String" itemvalue="cachetools" />
<item index="4" class="java.lang.String" itemvalue="charset-normalizer" />
<item index="5" class="java.lang.String" itemvalue="cloudpickle" />
<item index="6" class="java.lang.String" itemvalue="contourpy" />
<item index="7" class="java.lang.String" itemvalue="google-pasta" />
<item index="8" class="java.lang.String" itemvalue="protobuf" />
<item index="9" class="java.lang.String" itemvalue="tensorflow-estimator" />
<item index="10" class="java.lang.String" itemvalue="joblib" />
<item index="11" class="java.lang.String" itemvalue="threadpoolctl" />
<item index="12" class="java.lang.String" itemvalue="opt-einsum" />
<item index="13" class="java.lang.String" itemvalue="scikit-learn" />
<item index="14" class="java.lang.String" itemvalue="python-dateutil" />
<item index="15" class="java.lang.String" itemvalue="setuptools" />
<item index="16" class="java.lang.String" itemvalue="cycler" />
<item index="17" class="java.lang.String" itemvalue="gast" />
<item index="18" class="java.lang.String" itemvalue="markupsafe" />
<item index="19" class="java.lang.String" itemvalue="mkl" />
<item index="20" class="java.lang.String" itemvalue="fsspec" />
<item index="21" class="java.lang.String" itemvalue="pyasn1-modules" />
<item index="22" class="java.lang.String" itemvalue="filelock" />
<item index="23" class="java.lang.String" itemvalue="oauthlib" />
<item index="24" class="java.lang.String" itemvalue="keras" />
<item index="25" class="java.lang.String" itemvalue="pyparsing" />
<item index="26" class="java.lang.String" itemvalue="sympy" />
<item index="27" class="java.lang.String" itemvalue="libclang" />
<item index="28" class="java.lang.String" itemvalue="python" />
<item index="29" class="java.lang.String" itemvalue="werkzeug" />
<item index="30" class="java.lang.String" itemvalue="gymnasium" />
<item index="31" class="java.lang.String" itemvalue="sqlite" />
<item index="32" class="java.lang.String" itemvalue="h5py" />
<item index="33" class="java.lang.String" itemvalue="stable-baselines3" />
<item index="34" class="java.lang.String" itemvalue="tensorboard-data-server" />
<item index="35" class="java.lang.String" itemvalue="wrapt" />
<item index="36" class="java.lang.String" itemvalue="kiwisolver" />
<item index="37" class="java.lang.String" itemvalue="typing-extensions" />
<item index="38" class="java.lang.String" itemvalue="vc" />
<item index="39" class="java.lang.String" itemvalue="tensorflow-intel" />
<item index="40" class="java.lang.String" itemvalue="jinja2" />
<item index="41" class="java.lang.String" itemvalue="fonttools" />
<item index="42" class="java.lang.String" itemvalue="flatbuffers" />
<item index="43" class="java.lang.String" itemvalue="tensorboard" />
<item index="44" class="java.lang.String" itemvalue="tbb" />
<item index="45" class="java.lang.String" itemvalue="matplotlib" />
<item index="46" class="java.lang.String" itemvalue="openssl" />
<item index="47" class="java.lang.String" itemvalue="ca-certificates" />
<item index="48" class="java.lang.String" itemvalue="idna" />
<item index="49" class="java.lang.String" itemvalue="mkl-service" />
<item index="50" class="java.lang.String" itemvalue="mkl_fft" />
<item index="51" class="java.lang.String" itemvalue="rsa" />
<item index="52" class="java.lang.String" itemvalue="networkx" />
<item index="53" class="java.lang.String" itemvalue="numpy" />
<item index="54" class="java.lang.String" itemvalue="pyasn1" />
<item index="55" class="java.lang.String" itemvalue="importlib-metadata" />
<item index="56" class="java.lang.String" itemvalue="requests-oauthlib" />
<item index="57" class="java.lang.String" itemvalue="seaborn" />
<item index="58" class="java.lang.String" itemvalue="zipp" />
<item index="59" class="java.lang.String" itemvalue="markdown" />
<item index="60" class="java.lang.String" itemvalue="ml-dtypes" />
<item index="61" class="java.lang.String" itemvalue="urllib3" />
<item index="62" class="java.lang.String" itemvalue="vs2015_runtime" />
<item index="63" class="java.lang.String" itemvalue="numpy-base" />
<item index="64" class="java.lang.String" itemvalue="scipy" />
<item index="65" class="java.lang.String" itemvalue="google-auth-oauthlib" />
<item index="66" class="java.lang.String" itemvalue="intel-openmp" />
<item index="67" class="java.lang.String" itemvalue="tzdata" />
<item index="68" class="java.lang.String" itemvalue="farama-notifications" />
<item index="69" class="java.lang.String" itemvalue="packaging" />
<item index="70" class="java.lang.String" itemvalue="torch" />
<item index="71" class="java.lang.String" itemvalue="et-xmlfile" />
<item index="72" class="java.lang.String" itemvalue="tensorflow-io-gcs-filesystem" />
<item index="73" class="java.lang.String" itemvalue="pandas" />
<item index="74" class="java.lang.String" itemvalue="termcolor" />
<item index="75" class="java.lang.String" itemvalue="importlib-resources" />
<item index="76" class="java.lang.String" itemvalue="mkl_random" />
<item index="77" class="java.lang.String" itemvalue="icc_rt" />
<item index="78" class="java.lang.String" itemvalue="mpmath" />
<item index="79" class="java.lang.String" itemvalue="pillow" />
<item index="80" class="java.lang.String" itemvalue="grpcio" />
<item index="81" class="java.lang.String" itemvalue="pytz" />
<item index="82" class="java.lang.String" itemvalue="google-auth" />
<item index="83" class="java.lang.String" itemvalue="openpyxl" />
<item index="84" class="java.lang.String" itemvalue="blas" />
<item index="85" class="java.lang.String" itemvalue="gym" />
<item index="86" class="java.lang.String" itemvalue="nltk" />
<item index="87" class="java.lang.String" itemvalue="pygame" />
<item index="88" class="java.lang.String" itemvalue="mo-gymnasium" />
<item index="89" class="java.lang.String" itemvalue="rich" />
<item index="90" class="java.lang.String" itemvalue="pynvml" />
<item index="91" class="java.lang.String" itemvalue="mujoco" />
<item index="92" class="java.lang.String" itemvalue="pybullet-gym" />
<item index="93" class="java.lang.String" itemvalue="mpi4py" />
<item index="94" class="java.lang.String" itemvalue="tqdm" />
<item index="95" class="java.lang.String" itemvalue="ipython" />
<item index="96" class="java.lang.String" itemvalue="tensorboardX" />
</list>
</value>
</option>
</inspection_tool>
<inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="E731" />
</list>
</option>
</inspection_tool>
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="N812" />
<option value="N806" />
<option value="N802" />
<option value="N803" />
<option value="N801" />
<option value="N813" />
</list>
</option>
</inspection_tool>
<inspection_tool class="PyShadowingNamesInspection" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="false" level="WARNING" enabled_by_default="false">
<option name="ignoredIdentifiers">
<list>
<option value="filename" />
<option value="int.*" />
<option value="object.*" />
</list>
</option>
</inspection_tool>
<inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
<option name="processCode" value="true" />
<option name="processLiterals" value="true" />
<option name="processComments" value="true" />
</inspection_tool>
</profile>
</component>

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

7
.idea/misc.xml Normal file
View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="rl-microgrid" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="rl-microgrid" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/DRL-for-Energy-Systems.iml" filepath="$PROJECT_DIR$/.idea/DRL-for-Energy-Systems.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 143 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 126 KiB

BIN
AgentDDPG/actor.pth Normal file

Binary file not shown.

BIN
AgentDDPG/loss_data.pkl Normal file

Binary file not shown.

BIN
AgentDDPG/reward_data.pkl Normal file

Binary file not shown.

BIN
AgentDDPG/test_data.pkl Normal file

Binary file not shown.

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 142 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 127 KiB

BIN
AgentPPO/actor.pth Normal file

Binary file not shown.

BIN
AgentPPO/loss_data.pkl Normal file

Binary file not shown.

BIN
AgentPPO/reward_data.pkl Normal file

Binary file not shown.

BIN
AgentPPO/test_data.pkl Normal file

Binary file not shown.

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 145 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 126 KiB

BIN
AgentSAC/actor.pth Normal file

Binary file not shown.

BIN
AgentSAC/loss_data.pkl Normal file

Binary file not shown.

BIN
AgentSAC/reward_data.pkl Normal file

Binary file not shown.

BIN
AgentSAC/test_data.pkl Normal file

Binary file not shown.

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 143 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 126 KiB

BIN
AgentTD3/actor.pth Normal file

Binary file not shown.

BIN
AgentTD3/loss_data.pkl Normal file

Binary file not shown.

BIN
AgentTD3/reward_data.pkl Normal file

Binary file not shown.

BIN
AgentTD3/test_data.pkl Normal file

Binary file not shown.

133
DDPG.py Normal file
View File

@ -0,0 +1,133 @@
import pickle
import pandas as pd
import torch
from agent import AgentDDPG
from environment import ESSEnv
from tools import *
def update_buffer(_trajectory):
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
ary_other = torch.as_tensor([item[1] for item in _trajectory])
ary_other[:, 0] = ary_other[:, 0] # ten_reward
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
buffer.extend_buffer(ten_state, ary_other)
_steps = ten_state.shape[0]
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
return _steps, _r_exp
if __name__ == '__main__':
args = Arguments()
'''here record real unbalance'''
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
args.visible_gpu = '0'
for seed in args.random_seed_list:
args.random_seed = seed
args.agent = AgentDDPG()
agent_name = f'{args.agent.__class__.__name__}'
args.agent.cri_target = True
args.env = ESSEnv()
# creat lists of lists/or creat a long list?
args.init_before_training(if_main=True)
'''init agent and environment'''
agent = args.agent
env = args.env
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
args.if_per_or_gae)
'''init replay buffer'''
buffer = ReplayBuffer(max_len=args.max_memo, state_dim=env.state_space.shape[0],
action_dim=env.action_space.shape[0])
'''start training'''
cwd = args.cwd
gamma = args.gamma
batch_size = args.batch_size
target_step = args.target_step # how manysteps of one episode should stop
repeat_times = args.repeat_times # how many times should update for one batch size data
soft_update_tau = args.soft_update_tau
# get the first experience from
agent.state = env.reset()
'''collect data and train and update network'''
num_episode = args.num_episode
# args.train=False
# args.save_network=False
# args.test_network=False
# args.save_test_data=False
# args.compare_with_pyomo=False
if args.train:
collect_data = True
while collect_data:
print(f'buffer:{buffer.now_len}')
with torch.no_grad():
trajectory = agent.explore_env(env, target_step)
steps, r_exp = update_buffer(trajectory)
buffer.update_now_len()
if buffer.now_len >= 10000:
collect_data = False
for i_episode in range(num_episode):
critic_loss, actor_loss = agent.update_net(buffer, batch_size, repeat_times, soft_update_tau)
loss_record['critic_loss'].append(critic_loss)
loss_record['actor_loss'].append(actor_loss)
with torch.no_grad():
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
reward_record['mean_episode_reward'].append(episode_reward)
reward_record['unbalance'].append(episode_unbalance)
print(f'current epsiode is {i_episode}, reward:{episode_reward},'
f'unbalance:{episode_unbalance},buffer_length: {buffer.now_len}')
if i_episode % 10 == 0:
# target_step
with torch.no_grad():
trajectory = agent.explore_env(env, target_step)
steps, r_exp = update_buffer(trajectory)
loss_record_path = f'{args.cwd}/loss_data.pkl'
reward_record_path = f'{args.cwd}/reward_data.pkl'
# current only store last seed corresponded actor
act_save_path = f'{args.cwd}/actor.pth'
with open(loss_record_path, 'wb') as tf:
pickle.dump(loss_record, tf)
with open(reward_record_path, 'wb') as tf:
pickle.dump(reward_record, tf)
if args.save_network:
torch.save(agent.act.state_dict(), act_save_path)
print('actor parameters have been saved')
if args.test_network:
args.cwd = agent_name
agent.act.load_state_dict(torch.load(act_save_path))
print('parameters have been reload and test')
record = test_one_episode(env, agent.act, agent.device)
eval_data = pd.DataFrame(record['system_info'])
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2',
'gen3', 'unbalance', 'operation_cost']
if args.save_test_data:
test_data_save_path = f'{args.cwd}/test_data.pkl'
with open(test_data_save_path, 'wb') as tf:
pickle.dump(record, tf)
'''compare with pyomo data and results'''
if args.compare_with_pyomo:
month = record['init_info'][0][0]
day = record['init_info'][0][1]
initial_soc = record['init_info'][0][3]
base_result = optimization_base_result(env, month, day, initial_soc)
if args.plot_on:
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
plot_args = PlotArgs()
plot_args.feature_change = ''
args.cwd = agent_name
plot_dir = make_dir(args.cwd, plot_args.feature_change)
plot_optimization_result(base_result, plot_dir)
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
'''compare the different cost get from pyomo and DDPG'''
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
print('operation_cost_sum:', sum(eval_data['operation_cost']))
print('step_cost_sum:', sum(base_result['step_cost']))
print('ration:', ration)

399
PPO.py Normal file
View File

@ -0,0 +1,399 @@
import os
import pickle
from copy import deepcopy
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from environment import ESSEnv
from tools import get_episode_return, test_one_episode, optimization_base_result
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
script_name = os.path.basename(__file__)
# after adding layer normalization, it doesn't work
class ActorPPO(nn.Module):
def __init__(self, mid_dim, state_dim, action_dim, layer_norm=False):
super().__init__()
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, action_dim), )
# the logarithm (log) of standard deviation (std) of action, it is a trainable parameter
self.a_logstd = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True)
self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi))
if layer_norm:
self.layer_norm(self.net)
@staticmethod
def layer_norm(layer, std=1.0, bias_const=0.0):
for i in layer:
if hasattr(i, 'weight'):
torch.nn.init.orthogonal_(i.weight, std)
torch.nn.init.constant_(i.bias, bias_const)
def forward(self, state):
return self.net(state).tanh() # action.tanh() # in this way limit the data output of action
def get_action(self, state):
a_avg = self.net(state) # too big for the action
a_std = self.a_logstd.exp()
noise = torch.randn_like(a_avg)
action = a_avg + noise * a_std
return action, noise
def get_logprob_entropy(self, state, action):
a_avg = self.net(state)
a_std = self.a_logstd.exp()
delta = ((a_avg - action) / a_std).pow(2) * 0.5
logprob = -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1) # new_logprob
dist_entropy = (logprob.exp() * logprob).mean() # policy entropy
return logprob, dist_entropy
def get_old_logprob(self, _action, noise): # noise = action - a_noise
delta = noise.pow(2) * 0.5
return -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1) # old_logprob
class CriticAdv(nn.Module):
def __init__(self, mid_dim, state_dim, _action_dim, layer_norm=False):
super().__init__()
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, 1))
if layer_norm:
self.layer_norm(self.net, std=1.0)
@staticmethod
def layer_norm(layer, std=1.0, bias_const=0.0):
for i in layer:
if hasattr(i, 'weight'):
torch.nn.init.orthogonal_(i.weight, std)
torch.nn.init.constant_(i.bias, bias_const)
def forward(self, state):
return self.net(state) # Advantage value
class AgentPPO:
def __init__(self):
super().__init__()
self.state = None
self.device = None
self.action_dim = None
self.get_obj_critic = None
self.criterion = torch.nn.SmoothL1Loss()
self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
'''init modify'''
self.ClassCri = CriticAdv
self.ClassAct = ActorPPO
self.ratio_clip = 0.2 # ratio.clamp(1 - clip, 1 + clip)
self.lambda_entropy = 0.02 # could be 0.01~0.05
self.lambda_gae_adv = 0.98 # could be 0.95~0.99, GAE (Generalized Advantage Estimation. ICLR.2016.)
self.get_reward_sum = None # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
self.trajectory_list = None
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=False, gpu_id=0):
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
self.trajectory_list = list()
# choose whether to use gae or not
self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
self.cri = self.ClassCri(net_dim, state_dim, action_dim).to(self.device)
self.act = self.ClassAct(net_dim, state_dim, action_dim).to(self.device) if self.ClassAct else self.cri
self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
del self.ClassCri, self.ClassAct # why del self.ClassCri and self.ClassAct here, to save memory?
def select_action(self, state):
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
actions, noises = self.act.get_action(states)
return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
def explore_env(self, env, target_step):
state = self.state # sent state to agent and then agent sent state to method
trajectory_temp = list()
last_done = 0
for i in range(target_step):
action, noise = self.select_action(state)
state, next_state, reward, done, = env.step(
np.tanh(action)) # here the step of cut action is finally organized into the environment.
trajectory_temp.append((state, reward, done, action, noise))
if done:
state = env.reset()
last_done = i
else:
state = next_state
self.state = state
'''splice list'''
trajectory_list = self.trajectory_list + trajectory_temp[
:last_done + 1] # store 0 trajectory information to the list
self.trajectory_list = trajectory_temp[last_done:]
return trajectory_list
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
"""put data extract and update network together"""
with torch.no_grad():
buf_len = buffer[0].shape[0]
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in
buffer] # decompose buffer data
'''get buf_r_sum, buf_logprob'''
bs = 4096 # set a smaller 'BatchSize' when out of GPU memory: 1024, could change to 4096
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)] #
buf_value = torch.cat(buf_value, dim=0)
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
buf_r_sum, buf_advantage = self.get_reward_sum(buf_len, buf_reward, buf_mask, buf_value) # detach()
# normalize advantage
buf_advantage = (buf_advantage - buf_advantage.mean()) / (buf_advantage.std() + 1e-5)
del buf_noise, buffer[:]
'''PPO: Surrogate objective of Trust Region'''
obj_critic = obj_actor = None
for _ in range(int(buf_len / batch_size * repeat_times)):
indices = torch.randint(buf_len, size=(batch_size,), requires_grad=False, device=self.device)
state = buf_state[indices]
action = buf_action[indices]
r_sum = buf_r_sum[indices]
logprob = buf_logprob[indices]
advantage = buf_advantage[indices]
new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action) # it is obj_actor
ratio = (new_logprob - logprob.detach()).exp()
surrogate1 = advantage * ratio
surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
obj_surrogate = -torch.min(surrogate1, surrogate2).mean()
obj_actor = obj_surrogate + obj_entropy * self.lambda_entropy
self.optim_update(self.act_optim, obj_actor) # update actor
value = self.cri(state).squeeze(1) # critic network predicts the reward_sum (Q value) of state
# use smoothloss L1 to evaluate the value loss
# obj_critic = self.criterion(value, r_sum) / (r_sum.std() + 1e-6)
obj_critic = self.criterion(value, r_sum)
self.optim_update(self.cri_optim, obj_critic) # calculate and update the back propogation of value loss
# choose whether to use soft update
self.soft_update(self.cri_target, self.cri, soft_update_tau) if self.cri_target is not self.cri else None
a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1))
return obj_critic.item(), obj_actor.item(), a_std_log.mean().item() # logging_tuple
def get_reward_sum_raw(self, buf_len, buf_reward, buf_mask, buf_value) -> (torch.Tensor, torch.Tensor):
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # reward sum
pre_r_sum = 0
for i in range(buf_len - 1, -1, -1):
buf_r_sum[i] = buf_reward[i] + buf_mask[i] * pre_r_sum
pre_r_sum = buf_r_sum[i]
buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
return buf_r_sum, buf_advantage
def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value) -> (torch.Tensor, torch.Tensor):
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # old policy value
buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device) # advantage value
pre_r_sum = 0
pre_advantage = 0 # advantage value of previous step
for i in range(buf_len - 1, -1, -1):
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
pre_r_sum = buf_r_sum[i]
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i]) # fix a bug here
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
return buf_r_sum, buf_advantage
@staticmethod
def optim_update(optimizer, objective):
optimizer.zero_grad()
objective.backward()
optimizer.step()
@staticmethod
def soft_update(target_net, current_net, tau):
for tar, cur in zip(target_net.parameters(), current_net.parameters()):
tar.data.copy_(cur.data.__mul__(tau) + tar.data.__mul__(1.0 - tau))
class Arguments:
def __init__(self, agent=None, env=None):
self.agent = agent # Deep Reinforcement Learning algorithm
self.env = env # the environment for training
self.cwd = None # current work directory. None means set automatically
self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
self.visible_gpu = '0' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
# self.worker_num = 2 # rollout workers number pre GPU (adjust it to get high GPU usage)
self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
'''Arguments for training'''
self.num_episode = 1000 # to control the train episodes for PPO
self.gamma = 0.995 # discount factor of future rewards
self.learning_rate = 2 ** -14 # 2e-4
self.soft_update_tau = 2 ** -8 # 2 ** -8 ~= 5e-3
self.net_dim = 256 # the network width
self.batch_size = 4096 # num of transitions sampled from replay buffer.
self.repeat_times = 2 ** 3 # collect target_step, then update network
self.target_step = 4096 # repeatedly update network to keep critic's loss small
self.max_memo = self.target_step # capacity of replay buffer
self.if_per_or_gae = False # GAE for on-policy sparse reward: Generalized Advantage Estimation.
'''Arguments for evaluate'''
self.random_seed = 0 # initialize random seed in self.init_before_training()
# self.random_seed_list = [1234, 2234, 3234, 4234, 5234]
self.random_seed_list = [1234]
self.train = True
self.save_network = True
self.test_network = True
self.save_test_data = True
self.compare_with_pyomo = True
self.plot_on = True
def init_before_training(self, if_main):
if self.cwd is None:
agent_name = self.agent.__class__.__name__
self.cwd = f'./{agent_name}'
if if_main:
import shutil # remove history according to bool(if_remove)
if self.if_remove is None:
self.if_remove = bool(input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y')
elif self.if_remove:
shutil.rmtree(self.cwd, ignore_errors=True)
print(f"| Remove cwd: {self.cwd}")
os.makedirs(self.cwd, exist_ok=True)
np.random.seed(self.random_seed)
torch.manual_seed(self.random_seed)
torch.set_num_threads(self.num_threads)
torch.set_default_dtype(torch.float32)
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
def update_buffer(_trajectory):
_trajectory = list(map(list, zip(*_trajectory))) # 2D-list transpose, here cut the trajectory into 5 parts
ten_state = torch.as_tensor(_trajectory[0]) # tensor state here
ten_reward = torch.as_tensor(_trajectory[1], dtype=torch.float32)
# _trajectory[2] = done, replace done by mask, save memory
ten_mask = (1.0 - torch.as_tensor(_trajectory[2], dtype=torch.float32)) * gamma
ten_action = torch.as_tensor(_trajectory[3])
ten_noise = torch.as_tensor(_trajectory[4], dtype=torch.float32)
buffer[:] = (ten_state, ten_action, ten_noise, ten_reward, ten_mask) # list store tensors
_steps = ten_reward.shape[0] # how many steps are collected in all trajectories
_r_exp = ten_reward.mean() # the mean reward
return _steps, _r_exp
if __name__ == '__main__':
args = Arguments()
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
args.visible_gpu = '0'
for seed in args.random_seed_list:
args.random_seed = seed
args.agent = AgentPPO()
agent_name = f'{args.agent.__class__.__name__}'
args.agent.cri_target = True
args.env = ESSEnv()
args.init_before_training(if_main=True)
'''init agent and environment'''
agent = args.agent
env = args.env
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
args.if_per_or_gae)
cwd = args.cwd
gamma = args.gamma
batch_size = args.batch_size # how much data should be used to update net
target_step = args.target_step # how manysteps of one episode should stop
repeat_times = args.repeat_times # how many times should update for one batch size data
soft_update_tau = args.soft_update_tau
agent.state = env.reset()
'''init buffer'''
buffer = list()
'''init training parameters'''
num_episode = args.num_episode
# args.train=False
# args.save_network=False
# args.test_network=False
# args.save_test_data=False
# args.compare_with_pyomo=False
if args.train:
for i_episode in range(num_episode):
with torch.no_grad():
trajectory_list = agent.explore_env(env, target_step)
steps, r_exp = update_buffer(trajectory_list)
critic_loss, actor_loss, entropy_loss = agent.update_net(buffer, batch_size, repeat_times,
soft_update_tau)
loss_record['critic_loss'].append(critic_loss)
loss_record['actor_loss'].append(actor_loss)
loss_record['entropy_loss'].append(entropy_loss)
with torch.no_grad():
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
reward_record['mean_episode_reward'].append(episode_reward)
reward_record['unbalance'].append(episode_unbalance)
print(f'current epsiode is {i_episode}, reward:{episode_reward},unbalance:{episode_unbalance}')
act_save_path = f'{args.cwd}/actor.pth'
loss_record_path = f'{args.cwd}/loss_data.pkl'
reward_record_path = f'{args.cwd}/reward_data.pkl'
with open(loss_record_path, 'wb') as tf:
pickle.dump(loss_record, tf)
with open(reward_record_path, 'wb') as tf:
pickle.dump(reward_record, tf)
if args.save_network:
torch.save(agent.act.state_dict(), act_save_path)
print('actor parameters have been saved')
if args.test_network:
args.cwd = agent_name + '0618'
agent.act.load_state_dict(torch.load(act_save_path))
print('parameters have been reload and test')
record = test_one_episode(env, agent.act, agent.device)
eval_data = pd.DataFrame(record['information'])
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2',
'gen3', 'unbalance', 'operation_cost']
if args.save_test_data:
test_data_save_path = f'{args.cwd}/test_data.pkl'
with open(test_data_save_path, 'wb') as tf:
pickle.dump(record, tf)
'''compare with pyomo data and results'''
if args.compare_with_pyomo:
month = record['init_info'][0][0]
day = record['init_info'][0][1]
initial_soc = record['init_info'][0][3]
base_result = optimization_base_result(env, month, day, initial_soc)
if args.plot_on:
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
plot_args = PlotArgs()
plot_args.feature_change = ''
args.cwd = agent_name
plot_dir = make_dir(args.cwd, plot_args.feature_change)
plot_optimization_result(base_result, plot_dir)
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
'''compare the different cost get from pyomo and PPO'''
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
print('operation_cost_sum:', sum(eval_data['operation_cost']))
print('step_cost_sum:', sum(base_result['step_cost']))
print('ration:', ration)

137
SAC.py Normal file
View File

@ -0,0 +1,137 @@
import pickle
import pandas as pd
import torch
from agent import AgentSAC
from environment import ESSEnv
from tools import *
def update_buffer(_trajectory):
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
ary_other = torch.as_tensor([item[1] for item in _trajectory])
ary_other[:, 0] = ary_other[:, 0] # ten_reward
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
buffer.extend_buffer(ten_state, ary_other)
_steps = ten_state.shape[0]
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
return _steps, _r_exp
if __name__ == '__main__':
args = Arguments()
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
args.visible_gpu = '0'
for seed in args.random_seed_list:
args.random_seed = seed
args.agent = AgentSAC()
agent_name = f'{args.agent.__class__.__name__}'
args.agent.cri_target = True
args.env = ESSEnv()
args.init_before_training(if_main=True)
'''init agent and environment'''
agent = args.agent
env = args.env
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
args.if_per_or_gae)
'''init replay buffer'''
buffer = ReplayBuffer(max_len=args.max_memo, state_dim=env.state_space.shape[0],
action_dim=env.action_space.shape[0])
'''start training'''
cwd = args.cwd
gamma = args.gamma
batch_size = args.batch_size # how much data should be used to update net
target_step = args.target_step # how manysteps of one episode should stop
repeat_times = args.repeat_times # how many times should update for one batch size data
soft_update_tau = args.soft_update_tau
agent.state = env.reset()
'''collect data and train and update network'''
num_episode = args.num_episode
'''here record real unbalance'''
##
# args.train=False
# args.save_network=False
# args.test_network=False
# args.save_test_data=False
# args.compare_with_pyomo=False
#
if args.train:
collect_data = True
while collect_data:
print(f'buffer:{buffer.now_len}')
with torch.no_grad():
trajectory = agent.explore_env(env, target_step)
steps, r_exp = update_buffer(trajectory)
buffer.update_now_len()
if buffer.now_len >= 10000:
collect_data = False
for i_episode in range(num_episode):
critic_loss, actor_loss, entropy_loss = agent.update_net(buffer, batch_size, repeat_times,
soft_update_tau)
loss_record['critic_loss'].append(critic_loss)
loss_record['actor_loss'].append(actor_loss)
loss_record['entropy_loss'].append(entropy_loss)
with torch.no_grad():
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
reward_record['mean_episode_reward'].append(episode_reward)
reward_record['unbalance'].append(episode_unbalance)
print(
f'current epsiode is {i_episode}, reward:{episode_reward},'
f'unbalance:{episode_unbalance},buffer_length: {buffer.now_len}')
if i_episode % 10 == 0:
# target_step
with torch.no_grad():
trajectory = agent.explore_env(env, target_step)
steps, r_exp = update_buffer(trajectory)
act_save_path = f'{args.cwd}/actor.pth'
loss_record_path = f'{args.cwd}/loss_data.pkl'
reward_record_path = f'{args.cwd}/reward_data.pkl'
with open(loss_record_path, 'wb') as tf:
pickle.dump(loss_record, tf)
with open(reward_record_path, 'wb') as tf:
pickle.dump(reward_record, tf)
if args.save_network:
torch.save(agent.act.state_dict(), act_save_path)
print('actor parameters have been saved')
if args.test_network:
args.cwd = agent_name
agent.act.load_state_dict(torch.load(act_save_path))
print('parameters have been reload and test')
record = test_one_episode(env, agent.act, agent.device)
eval_data = pd.DataFrame(record['information'])
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2',
'gen3', 'unbalance', 'operation_cost']
if args.save_test_data:
test_data_save_path = f'{args.cwd}/test_data.pkl'
with open(test_data_save_path, 'wb') as tf:
pickle.dump(record, tf)
'''compare with pyomo data and results'''
if args.compare_with_pyomo:
month = record['init_info'][0][0]
day = record['init_info'][0][1]
initial_soc = record['init_info'][0][3]
base_result = optimization_base_result(env, month, day, initial_soc)
if args.plot_on:
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
plot_args = PlotArgs()
plot_args.feature_change = ''
args.cwd = agent_name
plot_dir = make_dir(args.cwd, plot_args.feature_change)
plot_optimization_result(base_result, plot_dir)
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
'''compare the different cost get from pyomo and SAC'''
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
print('operation_cost_sum:', sum(eval_data['operation_cost']))
print('step_cost_sum:', sum(base_result['step_cost']))
print('ration:', ration)

133
TD3.py Normal file
View File

@ -0,0 +1,133 @@
import pickle
import pandas as pd
import torch
from agent import AgentTD3
from environment import ESSEnv
from tools import *
def update_buffer(_trajectory):
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
ary_other = torch.as_tensor([item[1] for item in _trajectory])
ary_other[:, 0] = ary_other[:, 0] # ten_reward
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
buffer.extend_buffer(ten_state, ary_other)
_steps = ten_state.shape[0]
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
return _steps, _r_exp
if __name__ == '__main__':
args = Arguments()
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
args.visible_gpu = '0'
for seed in args.random_seed_list:
args.random_seed = seed
args.agent = AgentTD3()
agent_name = f'{args.agent.__class__.__name__}'
args.agent.cri_target = True
args.env = ESSEnv()
args.init_before_training(if_main=True)
'''init agent and environment'''
agent = args.agent
env = args.env
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
args.if_per_or_gae)
'''init replay buffer'''
buffer = ReplayBuffer(max_len=args.max_memo, state_dim=env.state_space.shape[0],
action_dim=env.action_space.shape[0])
'''start training'''
cwd = args.cwd
gamma = args.gamma
batch_size = args.batch_size # how much data should be used to update net
target_step = args.target_step # how manysteps of one episode should stop
repeat_times = args.repeat_times # how many times should update for one batch size data
soft_update_tau = args.soft_update_tau
agent.state = env.reset()
'''collect data and train and update network'''
num_episode = args.num_episode
# args.train=False
# args.save_network=False
# args.test_network=False
# args.save_test_data=False
# args.compare_with_pyomo=False
if args.train:
collect_data = True
while collect_data:
print(f'buffer:{buffer.now_len}')
with torch.no_grad():
trajectory = agent.explore_env(env, target_step)
steps, r_exp = update_buffer(trajectory)
buffer.update_now_len()
if buffer.now_len >= 10000:
collect_data = False
for i_episode in range(num_episode):
critic_loss, actor_loss = agent.update_net(buffer, batch_size, repeat_times, soft_update_tau)
loss_record['critic_loss'].append(critic_loss)
loss_record['actor_loss'].append(actor_loss)
with torch.no_grad():
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
reward_record['mean_episode_reward'].append(episode_reward)
reward_record['unbalance'].append(episode_unbalance)
print(
f'current epsiode is {i_episode}, reward:{episode_reward},'
f'unbalance:{episode_unbalance},buffer_length: {buffer.now_len}')
if i_episode % 10 == 0:
# target_step
with torch.no_grad():
trajectory = agent.explore_env(env, target_step)
steps, r_exp = update_buffer(trajectory)
loss_record_path = f'{args.cwd}/loss_data.pkl'
reward_record_path = f'{args.cwd}/reward_data.pkl'
# current only store last seed corresponded actor
act_save_path = f'{args.cwd}/actor.pth'
with open(loss_record_path, 'wb') as tf:
pickle.dump(loss_record, tf)
with open(reward_record_path, 'wb') as tf:
pickle.dump(reward_record, tf)
if args.save_network:
torch.save(agent.act.state_dict(), act_save_path)
print('actor parameters have been saved')
if args.test_network:
args.cwd = agent_name
agent.act.load_state_dict(torch.load(act_save_path))
print('parameters have been reload and test')
record = test_one_episode(env, agent.act, agent.device)
eval_data = pd.DataFrame(record['information'])
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2',
'gen3', 'unbalance', 'operation_cost']
if args.save_test_data:
test_data_save_path = f'{args.cwd}/test_data.pkl'
with open(test_data_save_path, 'wb') as tf:
pickle.dump(record, tf)
'''compare with pyomo data and results'''
if args.compare_with_pyomo:
month = record['init_info'][0][0]
day = record['init_info'][0][1]
initial_soc = record['init_info'][0][3]
base_result = optimization_base_result(env, month, day, initial_soc)
if args.plot_on:
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
plot_args = PlotArgs()
plot_args.feature_change = ''
args.cwd = agent_name
plot_dir = make_dir(args.cwd, plot_args.feature_change)
plot_optimization_result(base_result, plot_dir)
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
'''compare the different cost get from pyomo and TD3'''
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
print('operation_cost_sum:', sum(eval_data['operation_cost']))
print('step_cost_sum:', sum(base_result['step_cost']))
print('ration:', ration)

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

328
agent.py Normal file
View File

@ -0,0 +1,328 @@
import numpy.random as rd
import os
from copy import deepcopy
from net import *
class AgentBase:
def __init__(self):
self.state = None
self.device = None
self.action_dim = None
self.if_off_policy = None
self.explore_noise = None
self.trajectory_list = None
self.criterion = torch.nn.SmoothL1Loss()
self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, _if_per_or_gae=False, gpu_id=0):
# explict call self.init() for multiprocessing
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
self.action_dim = action_dim
self.cri = self.ClassCri(net_dim, state_dim, action_dim).to(self.device)
self.act = self.ClassAct(net_dim, state_dim, action_dim).to(self.device) if self.ClassAct else self.cri
self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
del self.ClassCri, self.ClassAct
def select_action(self, state) -> np.ndarray:
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
action = self.act(states)[0]
action = (action + torch.randn_like(action) * self.explore_noise).clamp(-1, 1)
return action.detach().cpu().numpy()
def explore_env(self, env, target_step):
trajectory = list()
state = self.state
for _ in range(target_step):
action = self.select_action(state)
state, next_state, reward, done, = env.step(action)
trajectory.append((state, (reward, done, *action)))
state = env.reset() if done else next_state
self.state = state
return trajectory
@staticmethod
def optim_update(optimizer, objective):
optimizer.zero_grad()
objective.backward()
optimizer.step()
@staticmethod
def soft_update(target_net, current_net, tau):
for tar, cur in zip(target_net.parameters(), current_net.parameters()):
tar.data.copy_(cur.data * tau + tar.data * (1.0 - tau))
def save_or_load_agent(self, cwd, if_save):
def load_torch_file(model_or_optim, _path):
state_dict = torch.load(_path, map_location=lambda storage, loc: storage)
model_or_optim.load_state_dict(state_dict)
name_obj_list = [('actor', self.act), ('act_target', self.act_target), ('act_optim', self.act_optim),
('critic', self.cri), ('cri_target', self.cri_target), ('cri_optim', self.cri_optim), ]
name_obj_list = [(name, obj) for name, obj in name_obj_list if obj is not None]
if if_save:
for name, obj in name_obj_list:
save_path = f"{cwd}/{name}.pth"
torch.save(obj.state_dict(), save_path)
else:
for name, obj in name_obj_list:
save_path = f"{cwd}/{name}.pth"
load_torch_file(obj, save_path) if os.path.isfile(save_path) else None
class AgentDDPG(AgentBase):
def __init__(self):
super().__init__()
self.explore_noise = 0.1 # explore noise of action
self.if_use_cri_target = self.if_use_act_target = True
self.ClassCri = Critic
self.ClassAct = Actor
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau) -> (float, float):
buffer.update_now_len()
obj_critic = obj_actor = None
for _ in range(int(buffer.now_len / batch_size * repeat_times)):
obj_critic, state = self.get_obj_critic(buffer, batch_size) # critic loss
self.optim_update(self.cri_optim, obj_critic)
self.soft_update(self.cri_target, self.cri, soft_update_tau)
action_pg = self.act(state) # policy gradient
obj_actor = -self.cri(state, action_pg).mean() # actor loss, makes it bigger
self.optim_update(self.act_optim, obj_actor)
self.soft_update(self.act_target, self.act, soft_update_tau)
return obj_actor.item(), obj_critic.item()
def get_obj_critic(self, buffer, batch_size) -> (torch.Tensor, torch.Tensor):
with torch.no_grad():
reward, mask, action, state, next_s = buffer.sample_batch(batch_size)
next_q = self.cri_target(next_s, self.act_target(next_s))
q_label = reward + mask * next_q
q_value = self.cri(state, action)
obj_critic = self.criterion(q_value, q_label)
return obj_critic, state
class AgentTD3(AgentBase):
def __init__(self):
super().__init__()
self.explore_noise = 0.1 # standard deviation of exploration noise
self.policy_noise = 0.2 # standard deviation of policy noise
self.update_freq = 2 # delay update frequency
self.if_use_cri_target = self.if_use_act_target = True
self.ClassCri = CriticTwin
self.ClassAct = Actor
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau) -> tuple:
buffer.update_now_len()
obj_critic = obj_actor = None
for update_c in range(int(buffer.now_len / batch_size * repeat_times)):
obj_critic, state = self.get_obj_critic(buffer, batch_size)
self.optim_update(self.cri_optim, obj_critic)
action_pg = self.act(state) # policy gradient
obj_actor = -self.cri_target(state, action_pg).mean() # use cri_target instead of cri for stable training
self.optim_update(self.act_optim, obj_actor)
if update_c % self.update_freq == 0: # delay update
self.soft_update(self.cri_target, self.cri, soft_update_tau)
self.soft_update(self.act_target, self.act, soft_update_tau)
return obj_critic.item() / 2, obj_actor.item()
def get_obj_critic(self, buffer, batch_size) -> (torch.Tensor, torch.Tensor):
with torch.no_grad():
reward, mask, action, state, next_s = buffer.sample_batch(batch_size)
next_a = self.act_target.get_action(next_s, self.policy_noise) # policy noise
next_q = torch.min(*self.cri_target.get_q1_q2(next_s, next_a)) # twin critics
q_label = reward + mask * next_q
q1, q2 = self.cri.get_q1_q2(state, action)
obj_critic = self.criterion(q1, q_label) + self.criterion(q2, q_label) # twin critics
return obj_critic, state
class AgentSAC(AgentBase):
def __init__(self):
super().__init__()
self.ClassCri = CriticTwin
self.ClassAct = ActorSAC
self.if_use_cri_target = True
self.if_use_act_target = False
self.alpha_log = None
self.alpha_optim = None
self.target_entropy = None
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, _if_use_per=False, gpu_id=0, env_num=1):
super().init(net_dim, state_dim, action_dim, learning_rate, _if_use_per, gpu_id)
self.alpha_log = torch.tensor((-np.log(action_dim) * np.e,), dtype=torch.float32,
requires_grad=True, device=self.device) # trainable parameter
self.alpha_optim = torch.optim.Adam((self.alpha_log,), lr=learning_rate)
self.target_entropy = np.log(action_dim)
def select_action(self, state):
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
actions = self.act.get_action(states)
return actions.detach().cpu().numpy()[0]
def explore_env(self, env, target_step):
trajectory = list()
state = self.state
for _ in range(target_step):
action = self.select_action(state)
state, next_state, reward, done, = env.step(action)
trajectory.append((state, (reward, done, *action)))
state = env.reset() if done else next_state
self.state = state
return trajectory
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
buffer.update_now_len()
alpha = self.alpha_log.exp().detach()
obj_critic = obj_actor = None
for _ in range(int(buffer.now_len * repeat_times / batch_size)):
'''objective of critic (loss function of critic)'''
with torch.no_grad():
reward, mask, action, state, next_s = buffer.sample_batch(batch_size)
next_a, next_log_prob = self.act_target.get_action_logprob(next_s)
next_q = torch.min(*self.cri_target.get_q1_q2(next_s, next_a))
q_label = reward + mask * (next_q + next_log_prob * alpha)
q1, q2 = self.cri.get_q1_q2(state, action)
obj_critic = self.criterion(q1, q_label) + self.criterion(q2, q_label)
self.optim_update(self.cri_optim, obj_critic)
self.soft_update(self.cri_target, self.cri, soft_update_tau)
'''objective of alpha (temperature parameter automatic adjustment)'''
action_pg, log_prob = self.act.get_action_logprob(state) # policy gradient
obj_alpha = (self.alpha_log * (log_prob - self.target_entropy).detach()).mean()
self.optim_update(self.alpha_optim, obj_alpha)
'''objective of actor'''
alpha = self.alpha_log.exp().detach()
with torch.no_grad():
self.alpha_log[:] = self.alpha_log.clamp(-20, 2)
obj_actor = -(torch.min(*self.cri_target.get_q1_q2(state, action_pg)) + log_prob * alpha).mean()
self.optim_update(self.act_optim, obj_actor)
self.soft_update(self.act_target, self.act, soft_update_tau)
return obj_critic.item(), obj_actor.item(), alpha.item()
class AgentPPO(AgentBase):
def __init__(self):
super().__init__()
self.ClassCri = CriticAdv
self.ClassAct = ActorPPO
self.if_off_policy = False
self.ratio_clip = 0.2 # ratio.clamp(1 - clip, 1 + clip)
self.lambda_entropy = 0.02 # could be 0.01~0.05
self.lambda_gae_adv = 0.98 # could be 0.95~0.99, GAE (Generalized Advantage Estimation. ICLR.2016.)
self.get_reward_sum = None # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=False, gpu_id=0, env_num=1):
super().init(net_dim, state_dim, action_dim, learning_rate, if_use_gae, gpu_id)
self.trajectory_list = list()
self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
def select_action(self, state):
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
actions, noises = self.act.get_action(states)
return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
def explore_env(self, env, target_step):
state = self.state
trajectory_temp = list()
last_done = 0
for i in range(target_step):
action, noise = self.select_action(state)
next_state, reward, done, _ = env.step(np.tanh(action))
trajectory_temp.append((state, reward, done, action, noise))
if done:
state = env.reset()
last_done = i
else:
state = next_state
self.state = state
'''splice list'''
trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1]
self.trajectory_list = trajectory_temp[last_done:]
return trajectory_list
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
with torch.no_grad():
buf_len = buffer[0].shape[0]
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
# (ten_state, ten_action, ten_noise, ten_reward, ten_mask) = buffer
'''get buf_r_sum, buf_logprob'''
bs = 2 ** 10 # set a smaller 'BatchSize' when out of GPU memory.
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
buf_value = torch.cat(buf_value, dim=0)
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
buf_r_sum, buf_advantage = self.get_reward_sum(buf_len, buf_reward, buf_mask, buf_value) # detach()
buf_advantage = (buf_advantage - buf_advantage.mean()) / (buf_advantage.std() + 1e-5)
del buf_noise, buffer[:]
'''PPO: Surrogate objective of Trust Region'''
obj_critic = obj_actor = None
for _ in range(int(buf_len / batch_size * repeat_times)):
indices = torch.randint(buf_len, size=(batch_size,), requires_grad=False, device=self.device)
state = buf_state[indices]
action = buf_action[indices]
r_sum = buf_r_sum[indices]
logprob = buf_logprob[indices]
advantage = buf_advantage[indices]
new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action) # it is obj_actor
ratio = (new_logprob - logprob.detach()).exp()
surrogate1 = advantage * ratio
surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
obj_surrogate = -torch.min(surrogate1, surrogate2).mean()
obj_actor = obj_surrogate + obj_entropy * self.lambda_entropy
self.optim_update(self.act_optim, obj_actor)
value = self.cri(state).squeeze(1) # critic network predicts the reward_sum (Q value) of state
obj_critic = self.criterion(value, r_sum) / (r_sum.std() + 1e-6)
self.optim_update(self.cri_optim, obj_critic)
self.soft_update(self.cri_target, self.cri, soft_update_tau) if self.cri_target is not self.cri else None
a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1))
return obj_critic.item(), obj_actor.item(), a_std_log.mean().item() # logging_tuple
def get_reward_sum_raw(self, buf_len, buf_reward, buf_mask, buf_value) -> (torch.Tensor, torch.Tensor):
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # reward sum
pre_r_sum = 0
for i in range(buf_len - 1, -1, -1):
buf_r_sum[i] = buf_reward[i] + buf_mask[i] * pre_r_sum
pre_r_sum = buf_r_sum[i]
buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
return buf_r_sum, buf_advantage
def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value):
"""tensor, tensor """
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # old policy value
buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device) # advantage value
pre_r_sum = 0
pre_advantage = 0 # advantage value of previous step
for i in range(buf_len - 1, -1, -1):
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
pre_r_sum = buf_r_sum[i]
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i]) # fix a bug here
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
return buf_r_sum, buf_advantage

8761
data/houseload.csv Normal file

File diff suppressed because it is too large Load Diff

BIN
data/iowa.nc Normal file

Binary file not shown.

8761
data/irradiance.csv Normal file

File diff suppressed because it is too large Load Diff

8761
data/prices.csv Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

2
data/station.csv Normal file
View File

@ -0,0 +1,2 @@
state,lon,lat
iowa,93.5,41.1
1 state lon lat
2 iowa 93.5 41.1

8761
data/temper.csv Normal file

File diff suppressed because it is too large Load Diff

8761
data/wind.csv Normal file

File diff suppressed because it is too large Load Diff

69
data_manager.py Normal file
View File

@ -0,0 +1,69 @@
class Constant:
MONTHS_LEN = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
MAX_STEP_HOURS = 24 * 30
class DataManager:
def __init__(self) -> None:
self.Pv = []
self.Prices = []
self.Load_Consumption = []
self.Temperature = []
self.Irradiance = []
self.Wind = []
def add_pv_element(self, element): self.Pv.append(element)
def add_price_element(self, element): self.Prices.append(element)
def add_load_element(self, element): self.Load_Consumption.append(element)
def add_temperature_element(self, element): self.Temperature.append(element)
def add_irradiance_element(self, element): self.Irradiance.append(element)
def add_wind_element(self, element): self.Wind.append(element)
# get current time data based on given month day, and day_time
def get_pv_data(self, month, day, day_time):
return self.Pv[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
def get_price_data(self, month, day, day_time):
return self.Prices[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
def get_load_cons_data(self, month, day, day_time):
return self.Load_Consumption[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
def get_temperature_data(self, month, day, day_time):
return self.Temperature[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
def get_irradiance_data(self, month, day, day_time):
return self.Irradiance[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
def get_wind_data(self, month, day, day_time):
return self.Wind[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
# get series data for one episode
def get_series_pv_data(self, month, day):
return self.Pv[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
def get_series_price_data(self, month, day):
return self.Prices[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
def get_series_load_cons_data(self, month, day):
return self.Load_Consumption[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
def get_series_temperature_data(self, month, day):
return self.Temperature[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
def get_series_irradiance_data(self, month, day):
return self.Irradiance[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
def get_series_wind_data(self, month, day):
return self.Wind[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]

194
environment.py Normal file
View File

@ -0,0 +1,194 @@
import gym
import numpy as np
import pandas as pd
from gym import spaces
from module import *
from parameters import *
from data_manager import *
class ESSEnv(gym.Env):
def __init__(self, **kwargs):
super(ESSEnv, self).__init__()
self.excess = None
self.unbalance = None
self.shedding = None
self.real_unbalance = None
self.operation_cost = None
self.current_output = None
self.final_step_outputs = None
self.data_manager = DataManager()
self._load_year_data()
self.month = None
self.day = None
self.TRAIN = True
self.current_time = None
self.episode_length = kwargs.get('episode_length', 24)
self.battery_parameters = kwargs.get('battery_parameters', battery_parameters)
self.dg_parameters = kwargs.get('dg_parameters', dg_parameters)
self.solar_parameters = kwargs.get('solar_parameters', solar_parameters)
self.wind_parameters = kwargs.get('wind_parameters', wind_parameters)
self.penalty_coefficient = 50 # control soft penalty constrain
self.sell_coefficient = 0.5 # control sell benefits
self.grid = Grid()
self.battery = Battery(self.battery_parameters)
self.dg1 = DG(self.dg_parameters['gen_1'])
self.dg2 = DG(self.dg_parameters['gen_2'])
self.dg3 = DG(self.dg_parameters['gen_3'])
self.solar = Solar(self.solar_parameters)
self.wind = Wind(self.wind_parameters)
self.action_space = spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float32) # 已增加调节电压动作
self.state_space = spaces.Box(low=-np.inf, high=np.inf, shape=(10,), dtype=np.float32)
def reset(self, *args):
self.month = np.random.randint(1, 13) # choose 12 month
if self.TRAIN:
self.day = np.random.randint(1, 20)
else:
self.day = np.random.randint(20, Constant.MONTHS_LEN[self.month] - 1)
self.current_time = 0
self.battery.reset()
self.dg1.reset()
self.dg2.reset()
self.dg3.reset()
self.solar.reset()
self.wind.reset()
return self._build_state()
def _build_state(self):
soc = self.battery.SOC()
dg1_output = self.dg1.current_output
dg2_output = self.dg2.current_output
dg3_output = self.dg3.current_output
time_step = self.current_time
price = self.data_manager.get_price_data(self.month, self.day, self.current_time)
house_load = self.data_manager.get_load_cons_data(self.month, self.day, self.current_time)
temperature = self.data_manager.get_temperature_data(self.month, self.day, self.current_time)
irradiance = self.data_manager.get_irradiance_data(self.month, self.day, self.current_time)
wind_speed = self.data_manager.get_wind_data(self.month, self.day, self.current_time)
# print('house_load:', house_load)
pv_generation = self.solar.step(temperature, irradiance)
wd_generation = self.wind.step(wind_speed)
generation = pv_generation + wd_generation
net_load = house_load - generation
obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(soc), np.float32(net_load),
np.float32(dg1_output), np.float32(dg2_output), np.float32(dg3_output),
np.float32(temperature), np.float32(irradiance), np.float32(wind_speed)), axis=None)
return obs
def step(self, action): # state transition: current_obs->take_action->get_reward->get_finish->next_obs
# put action into each component
current_obs = self._build_state()
temperature = current_obs[7]
irradiance = current_obs[8]
wind_speed = current_obs[9]
self.battery.step(action[0]) # execute the state-transition part, battery.current_capacity also changed
self.dg1.step(action[1])
self.dg2.step(action[2])
self.dg3.step(action[3])
self.solar.step(action[4], temperature, irradiance)
self.wind.step(wind_speed)
self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
-self.battery.energy_change, self.solar.current_power, self.wind.current_power))
actual_production = sum(self.current_output)
price = current_obs[1]
netload = current_obs[3]
# print('actual_production:', actual_production, 'netload:', netload)
unbalance = actual_production - netload
reward = 0
excess_penalty = 0 # 过多
deficient_penalty = 0 # 过少
sell_benefit = 0
buy_cost = 0
self.excess = 0
self.shedding = 0
if unbalance >= 0: # now in excess condition
if unbalance <= self.grid.exchange_ability:
# sell money to grid is little [0.029,0.1]
sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient
else:
sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient
# real unbalance that even grid could not meet
self.excess = unbalance - self.grid.exchange_ability
excess_penalty = self.excess * self.penalty_coefficient
else: # unbalance <0, its load shedding model, deficient penalty is used
if abs(unbalance) <= self.grid.exchange_ability:
buy_cost = self.grid.get_cost(price, abs(unbalance))
else:
buy_cost = self.grid.get_cost(price, self.grid.exchange_ability)
self.shedding = abs(unbalance) - self.grid.exchange_ability
deficient_penalty = self.shedding * self.penalty_coefficient
battery_cost = self.battery.get_cost(self.battery.energy_change)
dg1_cost = self.dg1.get_cost(self.dg1.current_output)
dg2_cost = self.dg2.get_cost(self.dg2.current_output)
dg3_cost = self.dg3.get_cost(self.dg3.current_output)
solar_cost = self.solar.get_cost(self.solar.current_power)
wind_cost = self.wind.gen_cost(self.wind.current_power)
reward -= (battery_cost + dg1_cost + dg2_cost + dg3_cost + solar_cost + wind_cost + excess_penalty +
deficient_penalty - sell_benefit + buy_cost) / 1e3
self.operation_cost = (battery_cost + dg1_cost + dg2_cost + dg3_cost + solar_cost + wind_cost + excess_penalty +
deficient_penalty - sell_benefit + buy_cost)
self.unbalance = unbalance
self.real_unbalance = self.shedding + self.excess
final_step_outputs = [self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
self.battery.current_capacity, self.solar.current_power, self.wind.current_power]
self.current_time += 1
finish = (self.current_time == self.episode_length)
if finish:
self.final_step_outputs = final_step_outputs
self.current_time = 0
next_obs = self.reset()
else:
next_obs = self._build_state()
return current_obs, next_obs, float(reward), finish
# def render(self, current_obs, next_obs, reward, finish):
# print('day={},hour={:2d}, state={}, next_state={}, reward={:.4f}, terminal={}\n'.
# format(self.day, self.current_time, current_obs, next_obs, reward, finish))
def _load_year_data(self):
# pv_df = pd.read_csv('data/pv.csv', sep=',')
price_df = pd.read_csv('data/prices.csv', sep=';')
load_df = pd.read_csv('data/houseload.csv', sep=',')
irradiance_df = pd.read_csv('data/irradiance.csv', sep=',')
temperature_df = pd.read_csv('data/temper.csv', sep=',')
wind_df = pd.read_csv('data/wind.csv', sep=',')
# pv = pv_df['pv'].to_numpy(dtype=float)
price = price_df['Price'].apply(lambda x: x.replace(',', '.')).to_numpy(dtype=float)
load = load_df['houseload'].to_numpy(dtype=float)
irradiance = irradiance_df['irradiance'].to_numpy(dtype=float)
temperature = temperature_df['t2m'].to_numpy(dtype=float)
wind = wind_df['wind_speed'].to_numpy(dtype=float)
'''redesign the magnitude for price and amount of generation as well as demand'''
def process_elements(elements, transform_function, add_function):
for element in elements:
transformed_element = transform_function(element)
add_function(transformed_element)
# process_elements(pv, lambda x: x, self.data_manager.add_pv_element)
process_elements(price, lambda x: max(x / 10, 0.5), self.data_manager.add_price_element)
process_elements(load, lambda x: x * 5, self.data_manager.add_load_element)
process_elements(irradiance, lambda x: x, self.data_manager.add_irradiance_element)
process_elements(temperature, lambda x: x - 273.15, self.data_manager.add_temperature_element)
process_elements(wind, lambda x: x, self.data_manager.add_wind_element)
# if __name__ == '__main__':
# env = ESSEnv()
# env.TRAIN = False
# rewards = []
# env.reset()
# tem_action = [0.1, 0.1, 0.1, 0.1, 0.1]
# for _ in range(144):
# print(f'current month is {env.month}, current day is {env.day}, current time is {env.current_time}')
# current_obs, next_obs, reward, finish = env.step(tem_action)
# env.render(current_obs, next_obs, reward, finish)
# current_obs = next_obs
# rewards.append(reward)

View File

@ -0,0 +1,35 @@
import pandas as pd
import numpy as np
import os
from nltk import word_tokenize
def func(i, j):
directory_path = "C:/Users/97532/Desktop/DRL-for-Energy-Systems/data/residential"
entries = os.listdir(directory_path)[i:j]
folders = [entry for entry in entries if os.path.join(directory_path, entry)]
entries = [os.path.join(directory_path, folder) for folder in folders]
combined_data = pd.DataFrame()
for file_path in entries:
current_data = pd.read_csv(file_path, encoding='utf-8')
tokenize_col = current_data.columns
tokenized_sentences_list = [word_tokenize(text) for text in tokenize_col]
occurrences_electricity = np.array(
[(i, j) for i, sublist in enumerate(tokenized_sentences_list) for j, word in enumerate(sublist) if
word == 'Electricity'])[:, 0]
Electricity_consumption_wind_solar = current_data[tokenize_col[occurrences_electricity]].sum(axis=1)
current_data = Electricity_consumption_wind_solar.to_frame("Household_load")
combined_data = pd.concat([combined_data, current_data], axis=1)
Series_type = combined_data.sum(axis=1)
Series_type.to_csv('houseload.csv', index=False)
total_energy = np.array(Series_type).ravel()
return total_energy
if __name__ == '__main__':
func(0, 38)

4
gurobi.log Normal file
View File

@ -0,0 +1,4 @@
Gurobi 11.0.2 (win64, Python) logging started Fri May 17 12:42:18 2024
Set parameter LogFile to value "gurobi.log"

173
module.py Normal file
View File

@ -0,0 +1,173 @@
import numpy as np
class DG:
"""simulate a simple diesel generator"""
def __init__(self, parameters):
self.current_output = None
self.name = parameters.keys()
self.a_factor = parameters['a']
self.b_factor = parameters['b']
self.c_factor = parameters['c']
self.power_output_max = parameters['power_output_max']
self.power_output_min = parameters['power_output_min']
self.ramping_up = parameters['ramping_up']
self.ramping_down = parameters['ramping_down']
self.last_step_output = None
def step(self, action_gen):
output_change = action_gen * self.ramping_up # constrain the output_change with ramping up boundary
output = self.current_output + output_change
if output > 0:
output = max(self.power_output_min, min(self.power_output_max, output)) # meet the constraint
else:
output = 0
self.current_output = output
def get_cost(self, output):
if output <= 0:
cost = 0
else:
cost = (self.a_factor * pow(output, 2) + self.b_factor * output + self.c_factor)
return cost
def reset(self):
self.current_output = 0
class Battery:
"""simulate a simple battery"""
def __init__(self, parameters):
self.current_capacity = None
self.energy_change = None
self.capacity = parameters['capacity']
self.max_soc = parameters['max_soc']
self.initial_capacity = parameters['initial_capacity']
self.min_soc = parameters['min_soc']
self.degradation = parameters['degradation'] # degradation cost 1.2
self.max_charge = parameters['max_charge'] # max charge ability
self.max_discharge = parameters['max_discharge']
self.efficiency = parameters['efficiency']
def step(self, action_battery):
energy = action_battery * self.max_charge
updated_capacity = max(self.min_soc,
min(self.max_soc, (self.current_capacity * self.capacity + energy) / self.capacity))
# if charge, positive, if discharge, negative
self.energy_change = (updated_capacity - self.current_capacity) * self.capacity
self.current_capacity = updated_capacity # update capacity to current codition
def get_cost(self, energy): # cost depends on the energy change
cost = energy ** 2 * self.degradation
return cost
def SOC(self):
return self.current_capacity
def reset(self):
self.current_capacity = np.random.uniform(0.2, 0.8)
class Solar:
def __init__(self, parameters):
self.current_power = None
self.base_voltage = parameters['V_b']
self.sc_current = parameters['I_sc0']
self.oc_voltage = parameters['V_oc0']
self.s_resistance = parameters['R_s']
self.sh_resistance = parameters['R_sh']
self.temper_coefficient = parameters['k_v']
self.opex_cofficient = parameters['k_o']
self.refer_irradiance = parameters['G_ref']
self.refer_temperature = parameters['T_ref']
def step(self, temperature, irradiance, action_voltage=0):
I_sc = self.sc_current * (irradiance / self.refer_irradiance)
V_oc = self.oc_voltage + self.temper_coefficient * (temperature - self.refer_temperature)
current = I_sc - (V_oc / self.sh_resistance)
# current = I_sc
# for _ in range(10): # 迭代次数
# new_current = I_sc - (V_oc + current * self.s_resistance) / self.sh_resistance
# if abs(new_current - current) < 1e-6: # 收敛条件
# break
# current = new_current
self.current_power = max((1 + action_voltage) * self.base_voltage * current, 0)
return self.current_power
def get_cost(self, current_power):
cost = current_power * self.opex_cofficient
return cost
def reset(self):
self.current_power = 0
class Wind:
def __init__(self, parameters):
self.current_power = None
self.cutin_speed = parameters['cutin_speed']
self.cutout_speed = parameters['cutout_speed']
self.rated_speed = parameters['rated_speed']
self.air_density = parameters['air_density']
self.rotor_radius = parameters['rotor_radius']
self.power_coefficient = parameters['power_coefficient']
self.generator_efficiency = parameters['generator_efficiency']
self.opex_cofficient = parameters['opex_cofficient']
def step(self, wind_speed):
if self.cutin_speed <= wind_speed < self.rated_speed:
self.current_power = (0.5 * self.air_density * self.rotor_radius ** 2 * wind_speed ** 3 *
self.power_coefficient * self.generator_efficiency) / 1e3
elif self.rated_speed <= wind_speed < self.cutout_speed:
self.current_power = (0.5 * self.air_density * self.rotor_radius ** 2 * self.rated_speed ** 3 *
self.power_coefficient * self.generator_efficiency) / 1e3
else:
self.current_power = 0
return self.current_power
def gen_cost(self, current_power):
cost = current_power * self.opex_cofficient
return cost
def reset(self):
self.current_power = 0
class Grid:
def __init__(self):
self.on = True
self.delta = 1
if self.on:
self.exchange_ability = 100
else:
self.exchange_ability = 0
def get_cost(self, current_price, energy_exchange):
return current_price * energy_exchange * self.delta
def retrieve_past_price(self):
result = []
# 过去24小时的价格起始、结束索引
start_index = max(0, 24 * (self.day - 1) + self.time - 24)
end_index = 24 * (self.day - 1) + self.time
past_price = self.price[start_index:end_index]
result.extend(past_price)
# current_day_price = self.price[24 * self.day:24 * self.day + self.time]
# result.extend(current_day_price)
return result
# def retrive_past_price(self):
# result = []
# if self.day < 1:
# past_price = self.past_price
# else:
# past_price = self.price[24 * (self.day - 1):24 * self.day]
# for item in past_price[(self.time - 24)::]:
# result.append(item)
# for item in self.price[24 * self.day:(24 * self.day + self.time)]:
# result.append(item)
# return result

136
net.py Normal file
View File

@ -0,0 +1,136 @@
import numpy as np
import torch
import torch.nn as nn
class Actor(nn.Module):
def __init__(self, mid_dim, state_dim, action_dim):
super().__init__()
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, action_dim))
def forward(self, state):
return self.net(state).tanh() # action.tanh()
def get_action(self, state, action_std):
action = self.net(state).tanh()
noise = (torch.randn_like(action) * action_std).clamp(-0.5, 0.5)
return (action + noise).clamp(-1.0, 1.0)
class ActorSAC(nn.Module):
def __init__(self, mid_dim, state_dim, action_dim):
super().__init__()
self.net_state = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.ReLU(), )
self.net_a_avg = nn.Sequential(nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, action_dim)) # the average of action
self.net_a_std = nn.Sequential(nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, action_dim)) # the log_std of action
self.log_sqrt_2pi = np.log(np.sqrt(2 * np.pi))
def forward(self, state):
tmp = self.net_state(state)
return self.net_a_avg(tmp).tanh() # action
def get_action(self, state):
t_tmp = self.net_state(state)
a_avg = self.net_a_avg(t_tmp) # NOTICE! it is a_avg without .tanh()
a_std = self.net_a_std(t_tmp).clamp(-20, 2).exp()
return torch.normal(a_avg, a_std).tanh() # re-parameterize
def get_action_logprob(self, state):
t_tmp = self.net_state(state)
a_avg = self.net_a_avg(t_tmp) # NOTICE! it needs a_avg.tanh()
a_std_log = self.net_a_std(t_tmp).clamp(-20, 2)
a_std = a_std_log.exp()
noise = torch.randn_like(a_avg, requires_grad=True)
a_tan = (a_avg + a_std * noise).tanh() # action.tanh()
log_prob = a_std_log + self.log_sqrt_2pi + noise.pow(2).__mul__(0.5) # noise.pow(2) * 0.5
log_prob = log_prob + (-a_tan.pow(2) + 1.000001).log() # fix log_prob using the derivative of action.tanh()
return a_tan, log_prob.sum(1, keepdim=True)
class ActorPPO(nn.Module):
def __init__(self, mid_dim, state_dim, action_dim):
super().__init__()
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, action_dim), )
# the logarithm (log) of standard deviation (std) of action, it is a trainable parameter
self.a_std_log = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True)
self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi))
def forward(self, state):
return self.net(state).tanh() # action.tanh()# in this way limit the data output of action
def get_action(self, state):
a_avg = self.net(state) # mean
a_std = self.a_std_log.exp() # standard deviation
noise = torch.randn_like(a_avg)
action = a_avg + noise * a_std
return action, noise
def get_logprob_entropy(self, state, action):
a_avg = self.net(state)
a_std = self.a_std_log.exp()
delta = ((a_avg - action) / a_std).pow(2) * 0.5
logprob = -(self.a_std_log + self.sqrt_2pi_log + delta).sum(1) # new_logprob
dist_entropy = (logprob.exp() * logprob).mean() # policy entropy
return logprob, dist_entropy
def get_old_logprob(self, _action, noise): # noise = action - a_noise
delta = noise.pow(2) * 0.5
return -(self.a_std_log + self.sqrt_2pi_log + delta).sum(1) # old_logprob
class Critic(nn.Module):
def __init__(self, mid_dim, state_dim, action_dim):
super().__init__()
self.net = nn.Sequential(nn.Linear(state_dim + action_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, 1))
def forward(self, state, action):
return self.net(torch.cat((state, action), dim=1)) # q value
class CriticAdv(nn.Module):
def __init__(self, mid_dim, state_dim, _action_dim):
super().__init__()
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, 1))
def forward(self, state):
return self.net(state) # advantage value
class CriticTwin(nn.Module): # shared parameter
def __init__(self, mid_dim, state_dim, action_dim):
super().__init__()
self.net_sa = nn.Sequential(nn.Linear(state_dim + action_dim, mid_dim), nn.ReLU(),
nn.Linear(mid_dim, mid_dim), nn.ReLU()) # concat(state, action)
self.net_q1 = nn.Sequential(nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, 1)) # q1 value
self.net_q2 = nn.Sequential(nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
nn.Linear(mid_dim, 1)) # q2 value
def forward(self, state, action):
tmp = self.net_sa(torch.cat((state, action), dim=1))
return self.net_q1(tmp) # one Q value
def get_q1_q2(self, state, action):
tmp = self.net_sa(torch.cat((state, action), dim=1))
return self.net_q1(tmp), self.net_q2(tmp) # two Q values

49
parameters.py Normal file
View File

@ -0,0 +1,49 @@
import numpy as np
solar_parameters = {
'I_sc0': 8.0, # 参考条件下的短路电流 (A)
'V_b': 24, # 基准电压
'V_oc0': 36.0, # 参考条件下的开路电压 (V)
'R_s': 0.1, # 串联电阻 (Ω)
'R_sh': 100.0, # 并联电阻 (Ω)
'k_v': -0.2, # 开路电压的温度系数 (V/°C)
'k_o': 0.001, # 变动成本系数 (元/千瓦时)
'G_ref': 1000, # 参考辐照度 (W/m²)
'T_ref': 25, # 参考温度 (°C)
}
wind_parameters = {
'cutin_speed': 3,
'cutout_speed': 12,
'rated_speed': 8,
'air_density': 1.225,
'rotor_radius': 25,
'power_coefficient': 0.5,
'generator_efficiency': 0.9,
'opex_cofficient': 0.01,
}
battery_parameters = {
'capacity': 500, # kw
'max_charge': 100, # kw
'max_discharge': 100, # kw
'efficiency': 0.9,
'degradation': 0.01, # euro/kw
'max_soc': 0.8,
'min_soc': 0.2,
'initial_capacity': 0.4
}
dg_parameters = {
'gen_1': {'a': 0.0034, 'b': 3, 'c': 30, 'd': 0.03, 'e': 4.2, 'f': 0.031,
'power_output_max': 150, 'power_output_min': 10,
'ramping_up': 100, 'ramping_down': 100, 'min_up': 2, 'min_down': 1},
'gen_2': {'a': 0.001, 'b': 10, 'c': 40, 'd': 0.03, 'e': 4.2, 'f': 0.031,
'power_output_max': 375, 'power_output_min': 50,
'ramping_up': 100, 'ramping_down': 100, 'min_up': 2, 'min_down': 1},
'gen_3': {'a': 0.001, 'b': 15, 'c': 70, 'd': 0.03, 'e': 4.2, 'f': 0.031,
'power_output_max': 500, 'power_output_min': 100,
'ramping_up': 200, 'ramping_down': 200, 'min_up': 2, 'min_down': 1}
}

150
plotDRL.py Normal file
View File

@ -0,0 +1,150 @@
import os
import pickle
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
matplotlib.rc('text', usetex=True)
pd.options.display.notebook_repr_html = False
def plot_optimization_result(datasource, directory): # data source is dataframe
sns.set_theme(style='whitegrid') # "white", "dark", "whitegrid", "darkgrid", "ticks"
plt.rcParams["figure.figsize"] = (16, 9)
fig, axs = plt.subplots(2, 2)
plt.subplots_adjust(wspace=0.7, hspace=0.3)
plt.autoscale(tight=True)
T = np.array([i for i in range(24)])
# plot step cost in ax[0]
axs[0, 0].cla()
axs[0, 0].set_ylabel('Costs')
axs[0, 0].set_xlabel('Time(h)')
axs[0, 0].bar(T, datasource['step_cost'])
# axs[0,0].set_xticks([i for i in range(24)],[i for i in range(1,25)])
# plot soc and price in ax[1]
axs[0, 1].cla()
axs[0, 1].set_ylabel('Price')
axs[0, 1].set_xlabel('Time(h)')
axs[0, 1].plot(T, datasource['price'], drawstyle='steps-mid', label='Price', color='pink')
axs[0, 1] = axs[0, 1].twinx()
axs[0, 1].set_ylabel('SOC')
axs[0, 1].plot(T, datasource['soc'], drawstyle='steps-mid', label='SOC', color='grey')
# axs[0,1].set_xticks([i for i in range(24)],[i for i in range(1,25)])
axs[0, 1].legend(loc='upper right', fontsize=12, frameon=False, labelspacing=0.3)
# plot accumulated generation and consumption in ax[2]
axs[1, 0].cla()
axs[1, 0].set_ylabel('Outputs of DGs and Battery')
axs[1, 0].set_xlabel('Time(h)')
battery_positive = np.array(datasource['battery_energy_change'])
battery_negative = np.array(datasource['battery_energy_change'])
battery_negative = np.minimum(battery_negative, 0) # discharge
battery_positive = np.maximum(battery_positive, 0) # charge
# deal with power exchange within the figure
imported_from_grid = np.array(datasource['grid_import'])
exported_2_grid = np.array(datasource['grid_export'])
axs[1, 0].bar(T, datasource['gen1'], label='Gen1')
axs[1, 0].bar(T, datasource['gen2'], label='Gen2', bottom=datasource['gen1'])
axs[1, 0].bar(T, datasource['gen3'], label='Gen3', bottom=datasource['gen2'] + datasource['gen1'])
axs[1, 0].bar(T, -battery_positive, color='blue', hatch='/', label='ESS charge')
axs[1, 0].bar(T, -battery_negative, hatch='/', label='ESS discharge',
bottom=datasource['gen3'] + datasource['gen2'] + datasource['gen1'])
# import as generate
axs[1, 0].bar(T, imported_from_grid, label='Grid import',
bottom=-battery_negative + datasource['gen3'] + datasource['gen2'] + datasource['gen1'])
# export as load
axs[1, 0].bar(T, -exported_2_grid, label='Grid export', bottom=-battery_positive)
axs[1, 0].plot(T, datasource['netload'], label='Netload', drawstyle='steps-mid', alpha=0.7)
axs[1, 0].legend(loc='upper right', fontsize=12, frameon=False, labelspacing=0.3)
# axs[1,0].set_xticks([i for i in range(24)],[i for i in range(1,25)])
# plt.show()
fig.savefig(f"{directory}/optimization_information.svg", format='svg', dpi=600, bbox_inches='tight')
print('optimization result has been ploted')
def plot_evaluation_information(datasource, directory):
sns.set_theme(style='whitegrid')
with open(datasource, 'rb') as tf:
test_data = pickle.load(tf)
# plot unbalance, and reward of each step by bar figures
plt.rcParams["figure.figsize"] = (16, 9)
fig, axs = plt.subplots(2, 2)
plt.subplots_adjust(wspace=0.7, hspace=0.3)
plt.autoscale(tight=True)
# prepare data for evaluation the environment here
eval_data = pd.DataFrame(test_data['information'])
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2',
'gen3', 'unbalance', 'operation_cost']
# plot unbalance in axs[0]
axs[0, 0].cla()
axs[0, 0].set_ylabel('Unbalance of Generation and Load')
axs[0, 0].bar(eval_data['time_step'], eval_data['unbalance'], label='Exchange with Grid', width=0.4)
axs[0, 0].bar(eval_data['time_step'] + 0.4, eval_data['netload'], label='Netload', width=0.4)
axs[0, 0].legend(loc='upper right', fontsize=12, frameon=False, labelspacing=0.5)
# axs[0,0].set_xticks([i for i in range(24)],[i for i in range(1,25)])
# plot reward in axs[1]
axs[1, 1].cla()
axs[1, 1].set_ylabel('Costs')
axs[1, 1].bar(eval_data['time_step'], eval_data['operation_cost'])
# plot generation and netload in ax[2]
axs[1, 0].cla()
axs[1, 0].set_ylabel('Outputs of Units and Netload (kWh)')
# axs[1,0].set_xticks([i for i in range(24)], [i for i in range(1, 25)])
battery_positive = np.array(eval_data['battery'])
battery_negative = np.array(eval_data['battery'])
battery_positive = np.maximum(battery_positive, 0) # charge
battery_negative = np.minimum(battery_negative, 0) # discharge
# deal with power exchange within the figure
imported_from_grid = np.minimum(np.array(eval_data['unbalance']), 0)
exported_2_grid = np.maximum(np.array(eval_data['unbalance']), 0)
x = eval_data['time_step']
axs[1, 0].bar(x, eval_data['gen1'], label='Gen1')
axs[1, 0].bar(x, eval_data['gen2'], label='Gen2', bottom=eval_data['gen1'])
axs[1, 0].bar(x, eval_data['gen3'], label='Gen3', bottom=eval_data['gen1'] + eval_data['gen2'])
axs[1, 0].bar(x, -battery_positive, color='blue', hatch='/', label='ESS charge')
axs[1, 0].bar(x, -battery_negative, label='ESS discharge', hatch='/',
bottom=eval_data['gen1'] + eval_data['gen2'] + eval_data['gen3'])
axs[1, 0].bar(x, -imported_from_grid, label='Grid import',
bottom=eval_data['gen1'] + eval_data['gen2'] + eval_data['gen3'] - battery_negative)
axs[1, 0].bar(x, -exported_2_grid, label='Grid export', bottom=-battery_positive)
axs[1, 0].plot(x, eval_data['netload'], drawstyle='steps-mid', label='Netload')
axs[1, 0].legend(loc='upper right', fontsize=12, frameon=False, labelspacing=0.3)
# plot energy charge/discharge with price in ax[3].
axs[0, 1].cla()
axs[0, 1].set_ylabel('Price')
axs[0, 1].set_xlabel('Time Steps')
axs[0, 1].plot(eval_data['time_step'], eval_data['price'], drawstyle='steps-mid', label='Price', color='pink')
axs[0, 1] = axs[0, 1].twinx()
axs[0, 1].set_ylabel('SOC')
# axs[0,1].set_xticks([i for i in range(24)], [i for i in range(1, 25)])
axs[0, 1].plot(eval_data['time_step'], eval_data['soc'], drawstyle='steps-mid', label='SOC', color='grey')
axs[0, 1].legend(loc='upper right', fontsize=12, frameon=False, labelspacing=0.3)
# plt.show()
fig.savefig(f"{directory}/Evoluation Information.svg", format='svg', dpi=600, bbox_inches='tight')
print('the evaluation figure have been plot and saved')
def make_dir(directory, feature_change):
cwd = f'{directory}/DRL_{feature_change}_plots'
os.makedirs(cwd, exist_ok=True)
return cwd
class PlotArgs:
def __init__(self) -> None:
self.cwd = None
self.feature_change = None
self.plot_on = None

403
read.ipynb Normal file
View File

@ -0,0 +1,403 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 34,
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.671133500Z",
"start_time": "2024-06-11T06:41:37.652982600Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['./data/iowa.nc']\n"
]
}
],
"source": [
"import numpy as np\n",
"import netCDF4 as nc\n",
"import os\n",
"import pandas as pd\n",
"import datetime as dt\n",
"nc_path = \"./data/\"\n",
"nc_files = [f\"{nc_path}{x}\" for x in os.listdir(nc_path) if x.endswith('.nc')]\n",
"print(nc_files)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"outputs": [],
"source": [
"stations = pd.read_csv('./data/station.csv')\n",
"stations.head()\n",
"stations.dropna(inplace=True)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.722796200Z",
"start_time": "2024-06-11T06:41:37.663028100Z"
}
},
"id": "9c4ed0ec8dc7ff7d"
},
{
"cell_type": "code",
"execution_count": 36,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['longitude', 'latitude', 'time', 't2m']\n"
]
}
],
"source": [
"nc_data = nc.Dataset(nc_files[0]) # 读文件\n",
"print(list(nc_data.variables.keys())) # keys"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.730197500Z",
"start_time": "2024-06-11T06:41:37.678192900Z"
}
},
"id": "a88ddc29d8fa3d06"
},
{
"cell_type": "code",
"execution_count": 37,
"outputs": [
{
"data": {
"text/plain": "(13, 31)"
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lat = np.asarray(nc_data['latitude'][:]).tolist()\n",
"lon = np.asarray(nc_data['longitude'][:]).tolist()\n",
"len(lat), len(lon)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.730197500Z",
"start_time": "2024-06-11T06:41:37.693749900Z"
}
},
"id": "4ec1510ef5baa4b9"
},
{
"cell_type": "code",
"execution_count": 38,
"outputs": [
{
"data": {
"text/plain": "array([1016832, 1016833, 1016834, ..., 1025613, 1025614, 1025615])"
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.asarray(nc_data['time'][:])"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.731222200Z",
"start_time": "2024-06-11T06:41:37.710255800Z"
}
},
"id": "9e1cfd59ea63dee"
},
{
"cell_type": "code",
"execution_count": 39,
"outputs": [
{
"data": {
"text/plain": "<class 'netCDF4._netCDF4.Variable'>\nint32 time(time)\n units: hours since 1900-01-01 00:00:00.0\n long_name: time\n calendar: gregorian\nunlimited dimensions: \ncurrent shape = (8784,)\nfilling on, default _FillValue of -2147483647 used"
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nc_data['time']"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.739215300Z",
"start_time": "2024-06-11T06:41:37.723796300Z"
}
},
"id": "305b595052773792"
},
{
"cell_type": "code",
"execution_count": 40,
"outputs": [
{
"data": {
"text/plain": "datetime.datetime(1900, 1, 1, 0, 0)"
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"start_date = dt.datetime(1900, 1, 1)\n",
"start_date"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.788384900Z",
"start_time": "2024-06-11T06:41:37.739215300Z"
}
},
"id": "9dc1b63dbd6e5847"
},
{
"cell_type": "code",
"execution_count": 41,
"outputs": [
{
"data": {
"text/plain": "array([[[267.10449818, 266.56524854, 265.81574602, ..., 266.35826383,\n 266.94435738, 267.10231939],\n [266.75916053, 266.21773211, 265.4017766 , ..., 266.49334859,\n 267.59908269, 267.86380524],\n [266.70360148, 265.86258993, 265.06733289, ..., 266.6197182 ,\n 267.66117811, 267.95640366],\n ...,\n [270.23759303, 270.20055367, 269.91731143, ..., 270.11231282,\n 269.99683714, 269.90750689],\n [271.34223774, 271.32153927, 271.25726507, ..., 270.55569585,\n 270.37376718, 270.24086121],\n [271.76274352, 271.85643134, 271.79215714, ..., 271.32371806,\n 270.86617291, 270.71583665]],\n\n [[267.45746158, 267.17966631, 266.73846206, ..., 266.19376546,\n 266.66983029, 266.83541806],\n [267.5337191 , 267.25374505, 266.67418786, ..., 266.32231386,\n 267.30821471, 267.57620544],\n [267.67425083, 267.02933005, 266.40183956, ..., 266.40183956,\n 267.33218136, 267.62522813],\n ...,\n [269.75172367, 269.78549485, 269.62317527, ..., 270.06873709,\n 269.95652959, 269.69289643],\n [270.76159117, 270.80080932, 270.75832299, ..., 270.33237024,\n 270.18748095, 269.91186447],\n [271.10692881, 271.19516966, 271.15268333, ..., 270.63848973,\n 270.47290197, 270.30513541]],\n\n [[267.29841017, 267.25483444, 267.06745881, ..., 265.76345515,\n 266.3157775 , 266.53692432],\n [267.55441757, 267.51084184, 267.31801925, ..., 266.01728376,\n 266.99338007, 267.2613708 ],\n [267.77338561, 267.59363573, 267.37684648, ..., 266.09898825,\n 267.00318461, 267.28860563],\n ...,\n [269.32359214, 269.41945874, 269.29091034, ..., 270.08507799,\n 269.90750689, 269.55781167],\n [270.24957636, 270.34871114, 270.35088993, ..., 270.20600063,\n 270.01317803, 269.67110857],\n [270.62105944, 270.76703813, 270.76921692, ..., 270.43695199,\n 270.18857034, 270.01099925]],\n\n ...,\n\n [[271.48168007, 271.36729379, 271.24637114, ..., 270.89776532,\n 270.98164859, 270.96857587],\n [271.81830257, 271.70282689, 271.56447396, ..., 271.43701495,\n 271.53723913, 271.69084357],\n [272.18107051, 271.92179493, 271.77690563, ..., 272.04271757,\n 272.15165689, 272.3107083 ],\n ...,\n [275.28910932, 274.99388376, 274.74441272, ..., 276.07565121,\n 276.18350114, 276.35344648],\n [275.60285456, 275.36536684, 275.34575777, ..., 276.91993095,\n 276.99618847, 277.08551871],\n [275.90788466, 275.95254978, 275.9329407 , ..., 278.0169499 ,\n 277.9058318 , 277.90365301]],\n\n [[269.88462964, 269.78658425, 269.64496313, ..., 269.72231005,\n 269.85957359, 269.92602658],\n [270.1308325 , 270.03278711, 269.93147354, ..., 270.19946427,\n 270.40427019, 270.72237301],\n [270.45656107, 270.21471578, 270.10904464, ..., 270.84329566,\n 271.05463794, 271.37056197],\n ...,\n [273.1440941 , 273.02535024, 272.86738823, ..., 274.12127981,\n 274.27488425, 274.4742432 ],\n [273.5765832 , 273.50795143, 273.58638774, ..., 274.72698243,\n 274.85553083, 275.00150951],\n [273.94697689, 274.12127981, 274.19971612, ..., 275.72704539,\n 275.64207272, 275.67148634]],\n\n [[269.74954488, 269.63951617, 269.51096777, ..., 268.99459539,\n 269.11442864, 269.11224985],\n [269.87046752, 269.76043881, 269.70596915, ..., 269.16345133,\n 269.50443141, 269.90097053],\n [269.85630541, 269.81381908, 269.78222667, ..., 269.61446012,\n 269.94672505, 270.34435357],\n ...,\n [273.29007279, 273.09833959, 273.18440165, ..., 273.11794867,\n 273.21381527, 273.48943175],\n [274.379466 , 274.08641922, 273.44258784, ..., 273.57331502,\n 273.64957255, 273.81298153],\n [273.89904359, 273.55370595, 272.90769578, ..., 274.27815243,\n 274.27488425, 274.25309638]]])"
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.asarray(nc_data['t2m'][:])"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.790393500Z",
"start_time": "2024-06-11T06:41:37.756209300Z"
}
},
"id": "d189f90ec07c8690"
},
{
"cell_type": "code",
"execution_count": 42,
"outputs": [
{
"data": {
"text/plain": "(8784, 13, 31)"
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.asarray(nc_data['t2m'][:]).shape"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.862579700Z",
"start_time": "2024-06-11T06:41:37.787379500Z"
}
},
"id": "6a90a501e4374806"
},
{
"cell_type": "code",
"execution_count": 43,
"outputs": [],
"source": [
"def find_closest_number_index(arr, target):\n",
" arr.sort() # 对数组进行排序\n",
" left, right = 0, len(arr) - 1\n",
" closest_index = 0\n",
"\n",
" while left <= right:\n",
" mid = left + (right - left) // 2\n",
" # 更新最接近数值的索引\n",
" if abs(arr[mid] - target) < abs(arr[closest_index] - target):\n",
" closest_index = mid\n",
" # 根据目标值与中间值的比较,决定搜索左半部分还是右半部分\n",
" if arr[mid] < target:\n",
" left = mid + 1\n",
" elif arr[mid] > target:\n",
" right = mid - 1\n",
" else:\n",
" return mid # 如果找到精确匹配,直接返回索引\n",
" return closest_index"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.870067500Z",
"start_time": "2024-06-11T06:41:37.820315500Z"
}
},
"id": "738d2236e3320758"
},
{
"cell_type": "code",
"execution_count": 44,
"outputs": [],
"source": [
"target_cols = ['t2m']\n",
"times = np.asarray(nc_data['time'][:]).tolist()\n",
"times = [start_date + dt.timedelta(hours=x) for x in times]"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.870067500Z",
"start_time": "2024-06-11T06:41:37.834951500Z"
}
},
"id": "336150c5fe22b4db"
},
{
"cell_type": "code",
"execution_count": 45,
"outputs": [],
"source": [
"time_index = pd.to_datetime(times * stations.shape[0])\n",
"stations['lat'] = stations['lat'].astype(float)\n",
"stations['lon'] = stations['lon'].astype(float)\n",
"stations_out = stations.copy()"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.871070500Z",
"start_time": "2024-06-11T06:41:37.848726Z"
}
},
"id": "dfc81e6752e2c51a"
},
{
"cell_type": "code",
"execution_count": 46,
"outputs": [
{
"data": {
"text/plain": " state lon lat lon_index lat_index best_lon best_lat\n0 iowa 93.5 41.1 30 2 -89.0 41.0",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>state</th>\n <th>lon</th>\n <th>lat</th>\n <th>lon_index</th>\n <th>lat_index</th>\n <th>best_lon</th>\n <th>best_lat</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>iowa</td>\n <td>93.5</td>\n <td>41.1</td>\n <td>30</td>\n <td>2</td>\n <td>-89.0</td>\n <td>41.0</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lon_index = list()\n",
"lat_index = list()\n",
"best_lons = list()\n",
"best_lats = list()\n",
"for i in range(stations.shape[0]):\n",
" # 获得观测点所在的经纬度\n",
" stat_lat = stations.iloc[i]['lat']\n",
" stat_lon = stations.iloc[i]['lon']\n",
" # 获得观测点所在经纬度对应最近的网格数据的经纬度\n",
" best_lat_index = find_closest_number_index(lat, stat_lat)\n",
" best_lon_index = find_closest_number_index(lon, stat_lon)\n",
" lat_index.append(best_lat_index)\n",
" lon_index.append(best_lon_index)\n",
" best_lons.append(lon[best_lon_index])\n",
" best_lats.append(lat[best_lat_index])\n",
"stations_out['lon_index'] = lon_index\n",
"stations_out['lat_index'] = lat_index\n",
"stations_out['best_lon'] = best_lons\n",
"stations_out['best_lat'] = best_lats\n",
"stations_out"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.880042100Z",
"start_time": "2024-06-11T06:41:37.866999300Z"
}
},
"id": "3eccbc89ab82bf16"
},
{
"cell_type": "code",
"execution_count": 47,
"outputs": [],
"source": [
"for tgt in target_cols:\n",
" tmp = np.asarray(nc_data[tgt][:])\n",
" stations_out[tgt] = stations_out.apply(lambda x: tmp[:, len(lat) - x['lat_index'] - 1, x['lon_index']].squeeze(),axis=1)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.953643400Z",
"start_time": "2024-06-11T06:41:37.882051700Z"
}
},
"id": "eb2d82717535c00b"
},
{
"cell_type": "code",
"execution_count": 48,
"outputs": [],
"source": [
"result_df = stations_out.explode(column=target_cols).reset_index(drop=True)\n",
"result_df['date_time'] = time_index\n",
"result_df[['date_time', 't2m']].to_csv('./temper.csv', index=False, encoding='utf-8-sig')"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.961281200Z",
"start_time": "2024-06-11T06:41:37.913630800Z"
}
},
"id": "f347ca967bfcafce"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

111
test.ipynb Normal file
View File

@ -0,0 +1,111 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 24,
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2024-06-11T06:48:54.873304Z",
"start_time": "2024-06-11T06:48:54.842415500Z"
}
},
"outputs": [],
"source": [
"import pandas as pd\n",
"pv_df = pd.read_csv('data/PV.csv', sep=';')\n",
"temperature_df = pd.read_csv('data/temper.csv', sep=',')\n",
"\n",
"pv_data = pv_df['P_PV_'].apply(lambda x: x.replace(',', '.')).to_numpy(dtype=float)\n",
"temper = temperature_df['t2m'].to_numpy(dtype=float)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"outputs": [
{
"data": {
"text/plain": "array([0., 0., 0., ..., 0., 0., 0.])"
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pv_data"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:48:54.881858Z",
"start_time": "2024-06-11T06:48:54.867302600Z"
}
},
"id": "d37d5b6cd5cb7387"
},
{
"cell_type": "code",
"execution_count": 26,
"outputs": [
{
"data": {
"text/plain": "array([-3.2424931, -3.4571036, -3.5921883, ..., 3.2034465, 1.3242432,\n 0.3394317])"
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"temper -= 273.15\n",
"temper"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:48:54.906957500Z",
"start_time": "2024-06-11T06:48:54.880858700Z"
}
},
"id": "3ef2774e5245627"
},
{
"cell_type": "code",
"execution_count": 26,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:48:54.915543800Z",
"start_time": "2024-06-11T06:48:54.896403Z"
}
},
"id": "33f9458d47973337"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

41
test.py Normal file
View File

@ -0,0 +1,41 @@
# from joblib import load
#
# model = load('AgentPPO/reward_data.pkl')
#
# data = model['mean_episode_reward']
# print(data)
# import torch
#
# model_path = 'AgentDDPG/actor.pth'
#
# checkpoint = torch.load(model_path, map_location=torch.device('cuda'))
#
# # 如果.pth文件保存的是整个模型包含模型结构和参数
# model = checkpoint # 这里假设checkpoint直接就是模型实例有时候可能需要model.load_state_dict(checkpoint['state_dict'])
# 如果.pth文件仅保存了state_dict模型参数
# model = YourModelClass() # 实例化你的模型
# model.load_state_dict(checkpoint)
# print(model)
import pickle
a = 'DDPG'
b = 'PPO'
c = 'SAC'
d = 'TD3'
a1 = '/reward_data.pkl'
a2 = '/loss_data.pkl'
a3 = '/test_data.pkl'
filename = './Agent' + a + a3
# 使用 'rb' 模式打开文件,读取二进制数据
with open(filename, 'rb') as f:
data = pickle.load(f)
print(data)

285
tools.py Normal file
View File

@ -0,0 +1,285 @@
import os
import gurobipy as gp
import numpy as np
import numpy.random as rd
import pandas as pd
import torch
from gurobipy import GRB
def optimization_base_result(env, month, day, initial_soc):
price = env.data_manager.get_series_price_data(month, day)
load = env.data_manager.get_series_load_cons_data(month, day)
temperature = env.data_manager.get_series_temperature_data(month, day)
irradiance = env.data_manager.get_series_irradiance_data(month, day)
wind_speed = env.data_manager.get_series_wind_data(month, day)
pv = [env.solar.step(temp, irr) for temp, irr in zip(temperature, irradiance)]
wind = [env.wind.step(ws) for ws in wind_speed]
period = env.episode_length
DG_parameters = env.dg_parameters
def get_dg_info(parameters):
p_max = []
p_min = []
ramping_up = []
ramping_down = []
a_para = []
b_para = []
c_para = []
for name, gen_info in parameters.items():
p_max.append(gen_info['power_output_max'])
p_min.append(gen_info['power_output_min'])
ramping_up.append(gen_info['ramping_up'])
ramping_down.append(gen_info['ramping_down'])
a_para.append(gen_info['a'])
b_para.append(gen_info['b'])
c_para.append(gen_info['c'])
return p_max, p_min, ramping_up, ramping_down, a_para, b_para, c_para
p_max, p_min, ramping_up, ramping_down, a_para, b_para, c_para = get_dg_info(parameters=DG_parameters)
NUM_GEN = len(DG_parameters.keys())
battery_capacity = env.battery.capacity
battery_efficiency = env.battery.efficiency
m = gp.Model("UC")
# set system variables
on_off = m.addVars(NUM_GEN, period, vtype=GRB.BINARY, name='on_off')
gen_output = m.addVars(NUM_GEN, period, vtype=GRB.CONTINUOUS, name='output')
# set constrains for charge/discharge
battery_energy_change = m.addVars(period, vtype=GRB.CONTINUOUS, lb=env.battery.max_discharge,
ub=env.battery.max_charge, name='battery_action')
# set constrains for exchange between external grid and distributed energy system
grid_energy_import = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=env.grid.exchange_ability, name='import')
grid_energy_export = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=env.grid.exchange_ability, name='export')
soc = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0.2, ub=0.8, name='SOC')
# 1. add balance constrain
m.addConstrs(((sum(gen_output[g, t] for g in range(NUM_GEN)) + pv[t] + wind[t] + grid_energy_import[t] >= load[t] +
battery_energy_change[t] + grid_energy_export[t]) for t in range(period)), name='powerbalance')
# 2. add constrain for pmax pmin
m.addConstrs((gen_output[g, t] <= on_off[g, t] * p_max[g] for g in range(NUM_GEN) for t in range(period)),
'gen_output_max')
m.addConstrs((gen_output[g, t] >= on_off[g, t] * p_min[g] for g in range(NUM_GEN) for t in range(period)),
'gen_output_min')
# 3. add constrain for ramping up ramping down
m.addConstrs((gen_output[g, t + 1] - gen_output[g, t] <= ramping_up[g] for g in range(NUM_GEN)
for t in range(period - 1)), 'ramping_up')
m.addConstrs((gen_output[g, t] - gen_output[g, t + 1] <= ramping_down[g] for g in range(NUM_GEN)
for t in range(period - 1)), 'ramping_down')
# 4. add constrains for SOC
m.addConstr(battery_capacity * soc[0] == battery_capacity * initial_soc +
(battery_energy_change[0] * battery_efficiency), name='soc0')
m.addConstrs((battery_capacity * soc[t] == battery_capacity * soc[t - 1] +
(battery_energy_change[t] * battery_efficiency) for t in range(1, period)), name='soc update')
# 5. add constrain for pv output
m.addConstrs((pv[t] >= 0 for t in range(period)), name='pv_output_min')
# set cost function
# 1 cost of generator
cost_gen = gp.quicksum(
(a_para[g] * gen_output[g, t] * gen_output[g, t] + b_para[g] * gen_output[g, t] + c_para[g] * on_off[g, t]) for
t in range(period) for g in range(NUM_GEN))
cost_grid_import = gp.quicksum(grid_energy_import[t] * price[t] for t in range(period))
cost_grid_export = gp.quicksum(grid_energy_export[t] * price[t] * env.sell_coefficient for t in range(period))
m.setObjective((cost_gen + cost_grid_import - cost_grid_export), GRB.MINIMIZE)
m.optimize()
output_record = {'pv': [], 'temperature': [], 'irradiance': [], 'wind_speed': [], 'price': [], 'load': [],
'netload': [], 'soc': [], 'battery_energy_change': [], 'grid_import': [], 'grid_export': [],
'gen1': [], 'gen2': [], 'gen3': [], 'step_cost': []}
for t in range(period):
gen_cost = sum((on_off[g, t].x * (
a_para[g] * gen_output[g, t].x * gen_output[g, t].x + b_para[g] * gen_output[g, t].x + c_para[g]))
for g in range(NUM_GEN))
grid_import_cost = grid_energy_import[t].x * price[t]
grid_export_cost = grid_energy_export[t].x * price[t] * env.sell_coefficient
output_record['pv'].append(pv[t].x)
output_record['temperature'].append(temperature[t])
output_record['irradiance'].append(irradiance[t])
output_record['wind_speed'].append(wind[t])
output_record['price'].append(price[t])
output_record['load'].append(load[t])
output_record['netload'].append(load[t] - pv[t].x)
output_record['soc'].append(soc[t].x)
output_record['battery_energy_change'].append(battery_energy_change[t].x)
output_record['grid_import'].append(grid_energy_import[t].x)
output_record['grid_export'].append(grid_energy_export[t].x)
output_record['gen1'].append(gen_output[0, t].x)
output_record['gen2'].append(gen_output[1, t].x)
output_record['gen3'].append(gen_output[2, t].x)
output_record['step_cost'].append(gen_cost + grid_import_cost - grid_export_cost)
output_record_df = pd.DataFrame.from_dict(output_record)
return output_record_df
class Arguments:
"""revise here for our own purpose"""
def __init__(self, agent=None, env=None):
self.agent = agent # Deep Reinforcement Learning algorithm
self.env = env # the environment for training
# self.plot_shadow_on = False # control do we need to plot all shadow figures
self.cwd = None # current work directory. None means set automatically
self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
self.visible_gpu = '0,1,2,3' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
# self.worker_num = 2 # rollout workers number pre GPU (adjust it to get high GPU usage)
self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
'''Arguments for training'''
self.num_episode = 1000
self.gamma = 0.995 # discount factor of future rewards
# self.reward_scale = 1 # an approximate target reward usually be closed to 256
self.learning_rate = 2 ** -14 # 2 ** -14 ~= 6e-5
self.soft_update_tau = 2 ** -8 # 2 ** -8 ~= 5e-3
self.net_dim = 256 # the network width 256
self.batch_size = 4096 # num of transitions sampled from replay buffer.
self.repeat_times = 2 ** 5 # repeatedly update network to keep critic's loss small
self.target_step = 4096 # collect target_step experiences, then update network, 1024
self.max_memo = 500000 # capacity of replay buffer
self.if_per_or_gae = False # PER for off-policy sparse reward: Prioritized Experience Replay.
'''Arguments for evaluate'''
# self.eval_gap = 2 ** 6 # evaluate the agent per eval_gap seconds
# self.eval_times = 2 # number of times that get episode return in first
self.random_seed = 0 # initialize random seed in self.init_before_training()
# self.random_seed_list = [1234, 2234, 3234, 4234, 5234]
self.random_seed_list = [1234]
'''Arguments for save and plot issues'''
self.train = True
self.save_network = True
self.test_network = True
self.save_test_data = True
self.compare_with_pyomo = True
self.plot_on = True
def init_before_training(self, if_main):
if self.cwd is None:
agent_name = self.agent.__class__.__name__
self.cwd = f'./{agent_name}'
if if_main:
import shutil # remove history according to bool(if_remove)
if self.if_remove is None:
self.if_remove = bool(input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y')
elif self.if_remove:
shutil.rmtree(self.cwd, ignore_errors=True)
print(f"| Remove cwd: {self.cwd}")
os.makedirs(self.cwd, exist_ok=True)
np.random.seed(self.random_seed)
torch.manual_seed(self.random_seed)
torch.set_num_threads(self.num_threads)
torch.set_default_dtype(torch.float32)
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu) # control how many GPU is used  
def test_one_episode(env, act, device):
"""to get evaluate information, here record the unbalance of after taking action"""
record_state = []
record_action = []
record_reward = []
record_output = []
record_cost = []
record_unbalance = []
record_system_info = [] # [time price,netload,action,real action, output*4,soc,unbalance(exchange+penalty)]
record_init_info = [] # include month,day,time,intial soc
env.TRAIN = False
state = env.reset()
record_init_info.append([env.month, env.day, env.current_time, env.battery.current_capacity])
print(f'current testing month is {env.month}, day is {env.day},initial_soc is {env.battery.current_capacity}')
for i in range(24):
s_tensor = torch.as_tensor((state,), device=device)
a_tensor = act(s_tensor)
action = a_tensor.detach().cpu().numpy()[0] # not need detach(), because with torch.no_grad() outside
real_action = action
state, next_state, reward, done = env.step(action)
record_system_info.append([state[0], state[1], state[3], action, real_action, env.battery.SOC(),
env.battery.energy_change, next_state[4], next_state[5], next_state[6],
next_state[7], next_state[8], env.unbalance, env.operation_cost])
record_state.append(state)
record_action.append(real_action)
record_reward.append(reward)
record_output.append(env.current_output)
record_unbalance.append(env.unbalance)
state = next_state
# add information of last step dg1, dh2, dg3, soc
record_system_info[-1][7:10] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2]]
record_system_info[-1][5] = env.final_step_outputs[3]
record = {'init_info': record_init_info, 'system_info': record_system_info, 'state': record_state,
'action': record_action, 'reward': record_reward, 'cost': record_cost, 'unbalance': record_unbalance,
'record_output': record_output}
return record
def get_episode_return(env, act, device):
episode_return = 0.0 # sum of rewards in an episode
episode_unbalance = 0.0
state = env.reset()
for i in range(24):
s_tensor = torch.as_tensor((state,), device=device)
a_tensor = act(s_tensor)
action = a_tensor.detach().cpu().numpy()[0] # not need detach(), because with torch.no_grad() outside
state, next_state, reward, done, = env.step(action)
state = next_state
episode_return += reward
episode_unbalance += env.real_unbalance
if done:
break
return episode_return, episode_unbalance
class ReplayBuffer:
def __init__(self, max_len, state_dim, action_dim, gpu_id=0):
self.now_len = 0
self.next_idx = 0
self.if_full = False
self.max_len = max_len
self.data_type = torch.float32
self.action_dim = action_dim
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
other_dim = 1 + 1 + self.action_dim
self.buf_other = torch.empty(size=(max_len, other_dim), dtype=self.data_type, device=self.device)
if isinstance(state_dim, int): # state is pixel
self.buf_state = torch.empty((max_len, state_dim), dtype=torch.float32, device=self.device)
elif isinstance(state_dim, tuple):
self.buf_state = torch.empty((max_len, *state_dim), dtype=torch.uint8, device=self.device)
else:
raise ValueError('state_dim')
def extend_buffer(self, state, other): # CPU array to CPU array
size = len(other)
next_idx = self.next_idx + size
if next_idx > self.max_len:
self.buf_state[self.next_idx:self.max_len] = state[:self.max_len - self.next_idx]
self.buf_other[self.next_idx:self.max_len] = other[:self.max_len - self.next_idx]
self.if_full = True
next_idx = next_idx - self.max_len
self.buf_state[0:next_idx] = state[-next_idx:]
self.buf_other[0:next_idx] = other[-next_idx:]
else:
self.buf_state[self.next_idx:next_idx] = state
self.buf_other[self.next_idx:next_idx] = other
self.next_idx = next_idx
def sample_batch(self, batch_size) -> tuple:
indices = rd.randint(self.now_len - 1, size=batch_size)
r_m_a = self.buf_other[indices]
return (r_m_a[:, 0:1],
r_m_a[:, 1:2],
r_m_a[:, 2:],
self.buf_state[indices],
self.buf_state[indices + 1])
def update_now_len(self):
self.now_len = self.max_len if self.if_full else self.next_idx