'init'
|
@ -0,0 +1,8 @@
|
|||
# 默认忽略的文件
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# 基于编辑器的 HTTP 客户端请求
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
|
@ -0,0 +1,12 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="jdk" jdkName="rl-microgrid" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PyDocumentationSettings">
|
||||
<option name="format" value="PLAIN" />
|
||||
<option name="myDocStringFormat" value="Plain" />
|
||||
</component>
|
||||
</module>
|
|
@ -0,0 +1,44 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="CsvFileAttributes">
|
||||
<option name="attributeMap">
|
||||
<map>
|
||||
<entry key="\data\houseload.csv">
|
||||
<value>
|
||||
<Attribute>
|
||||
<option name="separator" value="," />
|
||||
</Attribute>
|
||||
</value>
|
||||
</entry>
|
||||
<entry key="\data\irradiance.csv">
|
||||
<value>
|
||||
<Attribute>
|
||||
<option name="separator" value="," />
|
||||
</Attribute>
|
||||
</value>
|
||||
</entry>
|
||||
<entry key="\data\prices.csv">
|
||||
<value>
|
||||
<Attribute>
|
||||
<option name="separator" value=";" />
|
||||
</Attribute>
|
||||
</value>
|
||||
</entry>
|
||||
<entry key="\data\temper.csv">
|
||||
<value>
|
||||
<Attribute>
|
||||
<option name="separator" value="," />
|
||||
</Attribute>
|
||||
</value>
|
||||
</entry>
|
||||
<entry key="\data\wind.csv">
|
||||
<value>
|
||||
<Attribute>
|
||||
<option name="separator" value="," />
|
||||
</Attribute>
|
||||
</value>
|
||||
</entry>
|
||||
</map>
|
||||
</option>
|
||||
</component>
|
||||
</project>
|
|
@ -0,0 +1,151 @@
|
|||
<component name="InspectionProjectProfileManager">
|
||||
<profile version="1.0">
|
||||
<option name="myName" value="Project Default" />
|
||||
<inspection_tool class="DuplicatedCode" enabled="false" level="WEAK WARNING" enabled_by_default="false">
|
||||
<Languages>
|
||||
<language minSize="266" name="Python" />
|
||||
</Languages>
|
||||
</inspection_tool>
|
||||
<inspection_tool class="PyArgumentListInspection" enabled="false" level="WARNING" enabled_by_default="false" />
|
||||
<inspection_tool class="PyIncorrectDocstringInspection" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
|
||||
<inspection_tool class="PyPackageRequirementsInspection" enabled="false" level="WARNING" enabled_by_default="false">
|
||||
<option name="ignoredPackages">
|
||||
<value>
|
||||
<list size="97">
|
||||
<item index="0" class="java.lang.String" itemvalue="absl-py" />
|
||||
<item index="1" class="java.lang.String" itemvalue="certifi" />
|
||||
<item index="2" class="java.lang.String" itemvalue="astunparse" />
|
||||
<item index="3" class="java.lang.String" itemvalue="cachetools" />
|
||||
<item index="4" class="java.lang.String" itemvalue="charset-normalizer" />
|
||||
<item index="5" class="java.lang.String" itemvalue="cloudpickle" />
|
||||
<item index="6" class="java.lang.String" itemvalue="contourpy" />
|
||||
<item index="7" class="java.lang.String" itemvalue="google-pasta" />
|
||||
<item index="8" class="java.lang.String" itemvalue="protobuf" />
|
||||
<item index="9" class="java.lang.String" itemvalue="tensorflow-estimator" />
|
||||
<item index="10" class="java.lang.String" itemvalue="joblib" />
|
||||
<item index="11" class="java.lang.String" itemvalue="threadpoolctl" />
|
||||
<item index="12" class="java.lang.String" itemvalue="opt-einsum" />
|
||||
<item index="13" class="java.lang.String" itemvalue="scikit-learn" />
|
||||
<item index="14" class="java.lang.String" itemvalue="python-dateutil" />
|
||||
<item index="15" class="java.lang.String" itemvalue="setuptools" />
|
||||
<item index="16" class="java.lang.String" itemvalue="cycler" />
|
||||
<item index="17" class="java.lang.String" itemvalue="gast" />
|
||||
<item index="18" class="java.lang.String" itemvalue="markupsafe" />
|
||||
<item index="19" class="java.lang.String" itemvalue="mkl" />
|
||||
<item index="20" class="java.lang.String" itemvalue="fsspec" />
|
||||
<item index="21" class="java.lang.String" itemvalue="pyasn1-modules" />
|
||||
<item index="22" class="java.lang.String" itemvalue="filelock" />
|
||||
<item index="23" class="java.lang.String" itemvalue="oauthlib" />
|
||||
<item index="24" class="java.lang.String" itemvalue="keras" />
|
||||
<item index="25" class="java.lang.String" itemvalue="pyparsing" />
|
||||
<item index="26" class="java.lang.String" itemvalue="sympy" />
|
||||
<item index="27" class="java.lang.String" itemvalue="libclang" />
|
||||
<item index="28" class="java.lang.String" itemvalue="python" />
|
||||
<item index="29" class="java.lang.String" itemvalue="werkzeug" />
|
||||
<item index="30" class="java.lang.String" itemvalue="gymnasium" />
|
||||
<item index="31" class="java.lang.String" itemvalue="sqlite" />
|
||||
<item index="32" class="java.lang.String" itemvalue="h5py" />
|
||||
<item index="33" class="java.lang.String" itemvalue="stable-baselines3" />
|
||||
<item index="34" class="java.lang.String" itemvalue="tensorboard-data-server" />
|
||||
<item index="35" class="java.lang.String" itemvalue="wrapt" />
|
||||
<item index="36" class="java.lang.String" itemvalue="kiwisolver" />
|
||||
<item index="37" class="java.lang.String" itemvalue="typing-extensions" />
|
||||
<item index="38" class="java.lang.String" itemvalue="vc" />
|
||||
<item index="39" class="java.lang.String" itemvalue="tensorflow-intel" />
|
||||
<item index="40" class="java.lang.String" itemvalue="jinja2" />
|
||||
<item index="41" class="java.lang.String" itemvalue="fonttools" />
|
||||
<item index="42" class="java.lang.String" itemvalue="flatbuffers" />
|
||||
<item index="43" class="java.lang.String" itemvalue="tensorboard" />
|
||||
<item index="44" class="java.lang.String" itemvalue="tbb" />
|
||||
<item index="45" class="java.lang.String" itemvalue="matplotlib" />
|
||||
<item index="46" class="java.lang.String" itemvalue="openssl" />
|
||||
<item index="47" class="java.lang.String" itemvalue="ca-certificates" />
|
||||
<item index="48" class="java.lang.String" itemvalue="idna" />
|
||||
<item index="49" class="java.lang.String" itemvalue="mkl-service" />
|
||||
<item index="50" class="java.lang.String" itemvalue="mkl_fft" />
|
||||
<item index="51" class="java.lang.String" itemvalue="rsa" />
|
||||
<item index="52" class="java.lang.String" itemvalue="networkx" />
|
||||
<item index="53" class="java.lang.String" itemvalue="numpy" />
|
||||
<item index="54" class="java.lang.String" itemvalue="pyasn1" />
|
||||
<item index="55" class="java.lang.String" itemvalue="importlib-metadata" />
|
||||
<item index="56" class="java.lang.String" itemvalue="requests-oauthlib" />
|
||||
<item index="57" class="java.lang.String" itemvalue="seaborn" />
|
||||
<item index="58" class="java.lang.String" itemvalue="zipp" />
|
||||
<item index="59" class="java.lang.String" itemvalue="markdown" />
|
||||
<item index="60" class="java.lang.String" itemvalue="ml-dtypes" />
|
||||
<item index="61" class="java.lang.String" itemvalue="urllib3" />
|
||||
<item index="62" class="java.lang.String" itemvalue="vs2015_runtime" />
|
||||
<item index="63" class="java.lang.String" itemvalue="numpy-base" />
|
||||
<item index="64" class="java.lang.String" itemvalue="scipy" />
|
||||
<item index="65" class="java.lang.String" itemvalue="google-auth-oauthlib" />
|
||||
<item index="66" class="java.lang.String" itemvalue="intel-openmp" />
|
||||
<item index="67" class="java.lang.String" itemvalue="tzdata" />
|
||||
<item index="68" class="java.lang.String" itemvalue="farama-notifications" />
|
||||
<item index="69" class="java.lang.String" itemvalue="packaging" />
|
||||
<item index="70" class="java.lang.String" itemvalue="torch" />
|
||||
<item index="71" class="java.lang.String" itemvalue="et-xmlfile" />
|
||||
<item index="72" class="java.lang.String" itemvalue="tensorflow-io-gcs-filesystem" />
|
||||
<item index="73" class="java.lang.String" itemvalue="pandas" />
|
||||
<item index="74" class="java.lang.String" itemvalue="termcolor" />
|
||||
<item index="75" class="java.lang.String" itemvalue="importlib-resources" />
|
||||
<item index="76" class="java.lang.String" itemvalue="mkl_random" />
|
||||
<item index="77" class="java.lang.String" itemvalue="icc_rt" />
|
||||
<item index="78" class="java.lang.String" itemvalue="mpmath" />
|
||||
<item index="79" class="java.lang.String" itemvalue="pillow" />
|
||||
<item index="80" class="java.lang.String" itemvalue="grpcio" />
|
||||
<item index="81" class="java.lang.String" itemvalue="pytz" />
|
||||
<item index="82" class="java.lang.String" itemvalue="google-auth" />
|
||||
<item index="83" class="java.lang.String" itemvalue="openpyxl" />
|
||||
<item index="84" class="java.lang.String" itemvalue="blas" />
|
||||
<item index="85" class="java.lang.String" itemvalue="gym" />
|
||||
<item index="86" class="java.lang.String" itemvalue="nltk" />
|
||||
<item index="87" class="java.lang.String" itemvalue="pygame" />
|
||||
<item index="88" class="java.lang.String" itemvalue="mo-gymnasium" />
|
||||
<item index="89" class="java.lang.String" itemvalue="rich" />
|
||||
<item index="90" class="java.lang.String" itemvalue="pynvml" />
|
||||
<item index="91" class="java.lang.String" itemvalue="mujoco" />
|
||||
<item index="92" class="java.lang.String" itemvalue="pybullet-gym" />
|
||||
<item index="93" class="java.lang.String" itemvalue="mpi4py" />
|
||||
<item index="94" class="java.lang.String" itemvalue="tqdm" />
|
||||
<item index="95" class="java.lang.String" itemvalue="ipython" />
|
||||
<item index="96" class="java.lang.String" itemvalue="tensorboardX" />
|
||||
</list>
|
||||
</value>
|
||||
</option>
|
||||
</inspection_tool>
|
||||
<inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
||||
<option name="ignoredErrors">
|
||||
<list>
|
||||
<option value="E731" />
|
||||
</list>
|
||||
</option>
|
||||
</inspection_tool>
|
||||
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
||||
<option name="ignoredErrors">
|
||||
<list>
|
||||
<option value="N812" />
|
||||
<option value="N806" />
|
||||
<option value="N802" />
|
||||
<option value="N803" />
|
||||
<option value="N801" />
|
||||
<option value="N813" />
|
||||
</list>
|
||||
</option>
|
||||
</inspection_tool>
|
||||
<inspection_tool class="PyShadowingNamesInspection" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
|
||||
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="false" level="WARNING" enabled_by_default="false">
|
||||
<option name="ignoredIdentifiers">
|
||||
<list>
|
||||
<option value="filename" />
|
||||
<option value="int.*" />
|
||||
<option value="object.*" />
|
||||
</list>
|
||||
</option>
|
||||
</inspection_tool>
|
||||
<inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
|
||||
<option name="processCode" value="true" />
|
||||
<option name="processLiterals" value="true" />
|
||||
<option name="processComments" value="true" />
|
||||
</inspection_tool>
|
||||
</profile>
|
||||
</component>
|
|
@ -0,0 +1,6 @@
|
|||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
|
@ -0,0 +1,7 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="rl-microgrid" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="rl-microgrid" project-jdk-type="Python SDK" />
|
||||
</project>
|
|
@ -0,0 +1,8 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/DRL-for-Energy-Systems.iml" filepath="$PROJECT_DIR$/.idea/DRL-for-Energy-Systems.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
After Width: | Height: | Size: 143 KiB |
After Width: | Height: | Size: 126 KiB |
After Width: | Height: | Size: 142 KiB |
After Width: | Height: | Size: 127 KiB |
After Width: | Height: | Size: 145 KiB |
After Width: | Height: | Size: 126 KiB |
After Width: | Height: | Size: 143 KiB |
After Width: | Height: | Size: 126 KiB |
|
@ -0,0 +1,133 @@
|
|||
import pickle
|
||||
import pandas as pd
|
||||
import torch
|
||||
|
||||
from agent import AgentDDPG
|
||||
from environment import ESSEnv
|
||||
from tools import *
|
||||
|
||||
|
||||
def update_buffer(_trajectory):
|
||||
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
|
||||
ary_other = torch.as_tensor([item[1] for item in _trajectory])
|
||||
ary_other[:, 0] = ary_other[:, 0] # ten_reward
|
||||
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
|
||||
|
||||
buffer.extend_buffer(ten_state, ary_other)
|
||||
|
||||
_steps = ten_state.shape[0]
|
||||
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
|
||||
return _steps, _r_exp
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = Arguments()
|
||||
'''here record real unbalance'''
|
||||
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
|
||||
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
|
||||
args.visible_gpu = '0'
|
||||
for seed in args.random_seed_list:
|
||||
args.random_seed = seed
|
||||
args.agent = AgentDDPG()
|
||||
agent_name = f'{args.agent.__class__.__name__}'
|
||||
args.agent.cri_target = True
|
||||
args.env = ESSEnv()
|
||||
# creat lists of lists/or creat a long list?
|
||||
args.init_before_training(if_main=True)
|
||||
'''init agent and environment'''
|
||||
agent = args.agent
|
||||
env = args.env
|
||||
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
||||
args.if_per_or_gae)
|
||||
'''init replay buffer'''
|
||||
buffer = ReplayBuffer(max_len=args.max_memo, state_dim=env.state_space.shape[0],
|
||||
action_dim=env.action_space.shape[0])
|
||||
'''start training'''
|
||||
cwd = args.cwd
|
||||
gamma = args.gamma
|
||||
batch_size = args.batch_size
|
||||
target_step = args.target_step # how manysteps of one episode should stop
|
||||
repeat_times = args.repeat_times # how many times should update for one batch size data
|
||||
soft_update_tau = args.soft_update_tau
|
||||
# get the first experience from
|
||||
agent.state = env.reset()
|
||||
'''collect data and train and update network'''
|
||||
num_episode = args.num_episode
|
||||
# args.train=False
|
||||
# args.save_network=False
|
||||
# args.test_network=False
|
||||
# args.save_test_data=False
|
||||
# args.compare_with_pyomo=False
|
||||
|
||||
if args.train:
|
||||
collect_data = True
|
||||
while collect_data:
|
||||
print(f'buffer:{buffer.now_len}')
|
||||
with torch.no_grad():
|
||||
trajectory = agent.explore_env(env, target_step)
|
||||
steps, r_exp = update_buffer(trajectory)
|
||||
buffer.update_now_len()
|
||||
if buffer.now_len >= 10000:
|
||||
collect_data = False
|
||||
for i_episode in range(num_episode):
|
||||
critic_loss, actor_loss = agent.update_net(buffer, batch_size, repeat_times, soft_update_tau)
|
||||
loss_record['critic_loss'].append(critic_loss)
|
||||
loss_record['actor_loss'].append(actor_loss)
|
||||
with torch.no_grad():
|
||||
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
|
||||
reward_record['mean_episode_reward'].append(episode_reward)
|
||||
reward_record['unbalance'].append(episode_unbalance)
|
||||
print(f'current epsiode is {i_episode}, reward:{episode_reward},'
|
||||
f'unbalance:{episode_unbalance},buffer_length: {buffer.now_len}')
|
||||
if i_episode % 10 == 0:
|
||||
# target_step
|
||||
with torch.no_grad():
|
||||
trajectory = agent.explore_env(env, target_step)
|
||||
steps, r_exp = update_buffer(trajectory)
|
||||
loss_record_path = f'{args.cwd}/loss_data.pkl'
|
||||
reward_record_path = f'{args.cwd}/reward_data.pkl'
|
||||
# current only store last seed corresponded actor
|
||||
act_save_path = f'{args.cwd}/actor.pth'
|
||||
|
||||
with open(loss_record_path, 'wb') as tf:
|
||||
pickle.dump(loss_record, tf)
|
||||
with open(reward_record_path, 'wb') as tf:
|
||||
pickle.dump(reward_record, tf)
|
||||
|
||||
if args.save_network:
|
||||
torch.save(agent.act.state_dict(), act_save_path)
|
||||
print('actor parameters have been saved')
|
||||
|
||||
if args.test_network:
|
||||
args.cwd = agent_name
|
||||
agent.act.load_state_dict(torch.load(act_save_path))
|
||||
print('parameters have been reload and test')
|
||||
record = test_one_episode(env, agent.act, agent.device)
|
||||
eval_data = pd.DataFrame(record['system_info'])
|
||||
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2',
|
||||
'gen3', 'unbalance', 'operation_cost']
|
||||
if args.save_test_data:
|
||||
test_data_save_path = f'{args.cwd}/test_data.pkl'
|
||||
with open(test_data_save_path, 'wb') as tf:
|
||||
pickle.dump(record, tf)
|
||||
|
||||
'''compare with pyomo data and results'''
|
||||
if args.compare_with_pyomo:
|
||||
month = record['init_info'][0][0]
|
||||
day = record['init_info'][0][1]
|
||||
initial_soc = record['init_info'][0][3]
|
||||
base_result = optimization_base_result(env, month, day, initial_soc)
|
||||
if args.plot_on:
|
||||
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
|
||||
|
||||
plot_args = PlotArgs()
|
||||
plot_args.feature_change = ''
|
||||
args.cwd = agent_name
|
||||
plot_dir = make_dir(args.cwd, plot_args.feature_change)
|
||||
plot_optimization_result(base_result, plot_dir)
|
||||
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
|
||||
'''compare the different cost get from pyomo and DDPG'''
|
||||
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
|
||||
print('operation_cost_sum:', sum(eval_data['operation_cost']))
|
||||
print('step_cost_sum:', sum(base_result['step_cost']))
|
||||
print('ration:', ration)
|
|
@ -0,0 +1,399 @@
|
|||
import os
|
||||
import pickle
|
||||
from copy import deepcopy
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from environment import ESSEnv
|
||||
from tools import get_episode_return, test_one_episode, optimization_base_result
|
||||
|
||||
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
|
||||
script_name = os.path.basename(__file__)
|
||||
|
||||
|
||||
# after adding layer normalization, it doesn't work
|
||||
class ActorPPO(nn.Module):
|
||||
def __init__(self, mid_dim, state_dim, action_dim, layer_norm=False):
|
||||
super().__init__()
|
||||
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
|
||||
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
|
||||
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
||||
nn.Linear(mid_dim, action_dim), )
|
||||
|
||||
# the logarithm (log) of standard deviation (std) of action, it is a trainable parameter
|
||||
self.a_logstd = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True)
|
||||
self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi))
|
||||
if layer_norm:
|
||||
self.layer_norm(self.net)
|
||||
|
||||
@staticmethod
|
||||
def layer_norm(layer, std=1.0, bias_const=0.0):
|
||||
for i in layer:
|
||||
if hasattr(i, 'weight'):
|
||||
torch.nn.init.orthogonal_(i.weight, std)
|
||||
torch.nn.init.constant_(i.bias, bias_const)
|
||||
|
||||
def forward(self, state):
|
||||
return self.net(state).tanh() # action.tanh() # in this way limit the data output of action
|
||||
|
||||
def get_action(self, state):
|
||||
a_avg = self.net(state) # too big for the action
|
||||
a_std = self.a_logstd.exp()
|
||||
|
||||
noise = torch.randn_like(a_avg)
|
||||
action = a_avg + noise * a_std
|
||||
return action, noise
|
||||
|
||||
def get_logprob_entropy(self, state, action):
|
||||
a_avg = self.net(state)
|
||||
a_std = self.a_logstd.exp()
|
||||
|
||||
delta = ((a_avg - action) / a_std).pow(2) * 0.5
|
||||
logprob = -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1) # new_logprob
|
||||
|
||||
dist_entropy = (logprob.exp() * logprob).mean() # policy entropy
|
||||
return logprob, dist_entropy
|
||||
|
||||
def get_old_logprob(self, _action, noise): # noise = action - a_noise
|
||||
delta = noise.pow(2) * 0.5
|
||||
return -(self.a_logstd + self.sqrt_2pi_log + delta).sum(1) # old_logprob
|
||||
|
||||
|
||||
class CriticAdv(nn.Module):
|
||||
def __init__(self, mid_dim, state_dim, _action_dim, layer_norm=False):
|
||||
super().__init__()
|
||||
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
|
||||
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
|
||||
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
||||
nn.Linear(mid_dim, 1))
|
||||
if layer_norm:
|
||||
self.layer_norm(self.net, std=1.0)
|
||||
|
||||
@staticmethod
|
||||
def layer_norm(layer, std=1.0, bias_const=0.0):
|
||||
for i in layer:
|
||||
if hasattr(i, 'weight'):
|
||||
torch.nn.init.orthogonal_(i.weight, std)
|
||||
torch.nn.init.constant_(i.bias, bias_const)
|
||||
|
||||
def forward(self, state):
|
||||
return self.net(state) # Advantage value
|
||||
|
||||
|
||||
class AgentPPO:
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.state = None
|
||||
self.device = None
|
||||
self.action_dim = None
|
||||
self.get_obj_critic = None
|
||||
|
||||
self.criterion = torch.nn.SmoothL1Loss()
|
||||
self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
|
||||
self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
|
||||
|
||||
'''init modify'''
|
||||
self.ClassCri = CriticAdv
|
||||
self.ClassAct = ActorPPO
|
||||
|
||||
self.ratio_clip = 0.2 # ratio.clamp(1 - clip, 1 + clip)
|
||||
self.lambda_entropy = 0.02 # could be 0.01~0.05
|
||||
self.lambda_gae_adv = 0.98 # could be 0.95~0.99, GAE (Generalized Advantage Estimation. ICLR.2016.)
|
||||
self.get_reward_sum = None # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
|
||||
self.trajectory_list = None
|
||||
|
||||
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=False, gpu_id=0):
|
||||
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
|
||||
self.trajectory_list = list()
|
||||
# choose whether to use gae or not
|
||||
self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
|
||||
|
||||
self.cri = self.ClassCri(net_dim, state_dim, action_dim).to(self.device)
|
||||
self.act = self.ClassAct(net_dim, state_dim, action_dim).to(self.device) if self.ClassAct else self.cri
|
||||
self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
|
||||
self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
|
||||
|
||||
self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
|
||||
self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
|
||||
del self.ClassCri, self.ClassAct # why del self.ClassCri and self.ClassAct here, to save memory?
|
||||
|
||||
def select_action(self, state):
|
||||
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
|
||||
actions, noises = self.act.get_action(states)
|
||||
return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
|
||||
|
||||
def explore_env(self, env, target_step):
|
||||
state = self.state # sent state to agent and then agent sent state to method
|
||||
trajectory_temp = list()
|
||||
last_done = 0
|
||||
for i in range(target_step):
|
||||
action, noise = self.select_action(state)
|
||||
state, next_state, reward, done, = env.step(
|
||||
np.tanh(action)) # here the step of cut action is finally organized into the environment.
|
||||
trajectory_temp.append((state, reward, done, action, noise))
|
||||
if done:
|
||||
state = env.reset()
|
||||
last_done = i
|
||||
else:
|
||||
state = next_state
|
||||
self.state = state
|
||||
|
||||
'''splice list'''
|
||||
trajectory_list = self.trajectory_list + trajectory_temp[
|
||||
:last_done + 1] # store 0 trajectory information to the list
|
||||
self.trajectory_list = trajectory_temp[last_done:]
|
||||
return trajectory_list
|
||||
|
||||
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
|
||||
"""put data extract and update network together"""
|
||||
with torch.no_grad():
|
||||
buf_len = buffer[0].shape[0]
|
||||
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in
|
||||
buffer] # decompose buffer data
|
||||
|
||||
'''get buf_r_sum, buf_logprob'''
|
||||
bs = 4096 # set a smaller 'BatchSize' when out of GPU memory: 1024, could change to 4096
|
||||
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)] #
|
||||
buf_value = torch.cat(buf_value, dim=0)
|
||||
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
|
||||
|
||||
buf_r_sum, buf_advantage = self.get_reward_sum(buf_len, buf_reward, buf_mask, buf_value) # detach()
|
||||
# normalize advantage
|
||||
buf_advantage = (buf_advantage - buf_advantage.mean()) / (buf_advantage.std() + 1e-5)
|
||||
del buf_noise, buffer[:]
|
||||
|
||||
'''PPO: Surrogate objective of Trust Region'''
|
||||
obj_critic = obj_actor = None
|
||||
for _ in range(int(buf_len / batch_size * repeat_times)):
|
||||
indices = torch.randint(buf_len, size=(batch_size,), requires_grad=False, device=self.device)
|
||||
|
||||
state = buf_state[indices]
|
||||
action = buf_action[indices]
|
||||
r_sum = buf_r_sum[indices]
|
||||
logprob = buf_logprob[indices]
|
||||
advantage = buf_advantage[indices]
|
||||
|
||||
new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action) # it is obj_actor
|
||||
ratio = (new_logprob - logprob.detach()).exp()
|
||||
surrogate1 = advantage * ratio
|
||||
surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
|
||||
obj_surrogate = -torch.min(surrogate1, surrogate2).mean()
|
||||
obj_actor = obj_surrogate + obj_entropy * self.lambda_entropy
|
||||
self.optim_update(self.act_optim, obj_actor) # update actor
|
||||
|
||||
value = self.cri(state).squeeze(1) # critic network predicts the reward_sum (Q value) of state
|
||||
# use smoothloss L1 to evaluate the value loss
|
||||
# obj_critic = self.criterion(value, r_sum) / (r_sum.std() + 1e-6)
|
||||
obj_critic = self.criterion(value, r_sum)
|
||||
self.optim_update(self.cri_optim, obj_critic) # calculate and update the back propogation of value loss
|
||||
# choose whether to use soft update
|
||||
self.soft_update(self.cri_target, self.cri, soft_update_tau) if self.cri_target is not self.cri else None
|
||||
|
||||
a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1))
|
||||
return obj_critic.item(), obj_actor.item(), a_std_log.mean().item() # logging_tuple
|
||||
|
||||
def get_reward_sum_raw(self, buf_len, buf_reward, buf_mask, buf_value) -> (torch.Tensor, torch.Tensor):
|
||||
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # reward sum
|
||||
|
||||
pre_r_sum = 0
|
||||
for i in range(buf_len - 1, -1, -1):
|
||||
buf_r_sum[i] = buf_reward[i] + buf_mask[i] * pre_r_sum
|
||||
pre_r_sum = buf_r_sum[i]
|
||||
buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
|
||||
return buf_r_sum, buf_advantage
|
||||
|
||||
def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value) -> (torch.Tensor, torch.Tensor):
|
||||
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # old policy value
|
||||
buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device) # advantage value
|
||||
|
||||
pre_r_sum = 0
|
||||
pre_advantage = 0 # advantage value of previous step
|
||||
for i in range(buf_len - 1, -1, -1):
|
||||
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
|
||||
pre_r_sum = buf_r_sum[i]
|
||||
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i]) # fix a bug here
|
||||
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
|
||||
return buf_r_sum, buf_advantage
|
||||
|
||||
@staticmethod
|
||||
def optim_update(optimizer, objective):
|
||||
optimizer.zero_grad()
|
||||
objective.backward()
|
||||
optimizer.step()
|
||||
|
||||
@staticmethod
|
||||
def soft_update(target_net, current_net, tau):
|
||||
for tar, cur in zip(target_net.parameters(), current_net.parameters()):
|
||||
tar.data.copy_(cur.data.__mul__(tau) + tar.data.__mul__(1.0 - tau))
|
||||
|
||||
|
||||
class Arguments:
|
||||
def __init__(self, agent=None, env=None):
|
||||
self.agent = agent # Deep Reinforcement Learning algorithm
|
||||
self.env = env # the environment for training
|
||||
self.cwd = None # current work directory. None means set automatically
|
||||
self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
|
||||
self.visible_gpu = '0' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
|
||||
# self.worker_num = 2 # rollout workers number pre GPU (adjust it to get high GPU usage)
|
||||
self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
|
||||
|
||||
'''Arguments for training'''
|
||||
self.num_episode = 1000 # to control the train episodes for PPO
|
||||
self.gamma = 0.995 # discount factor of future rewards
|
||||
self.learning_rate = 2 ** -14 # 2e-4
|
||||
self.soft_update_tau = 2 ** -8 # 2 ** -8 ~= 5e-3
|
||||
|
||||
self.net_dim = 256 # the network width
|
||||
self.batch_size = 4096 # num of transitions sampled from replay buffer.
|
||||
self.repeat_times = 2 ** 3 # collect target_step, then update network
|
||||
self.target_step = 4096 # repeatedly update network to keep critic's loss small
|
||||
self.max_memo = self.target_step # capacity of replay buffer
|
||||
self.if_per_or_gae = False # GAE for on-policy sparse reward: Generalized Advantage Estimation.
|
||||
|
||||
'''Arguments for evaluate'''
|
||||
self.random_seed = 0 # initialize random seed in self.init_before_training()
|
||||
# self.random_seed_list = [1234, 2234, 3234, 4234, 5234]
|
||||
self.random_seed_list = [1234]
|
||||
self.train = True
|
||||
self.save_network = True
|
||||
self.test_network = True
|
||||
self.save_test_data = True
|
||||
self.compare_with_pyomo = True
|
||||
self.plot_on = True
|
||||
|
||||
def init_before_training(self, if_main):
|
||||
if self.cwd is None:
|
||||
agent_name = self.agent.__class__.__name__
|
||||
self.cwd = f'./{agent_name}'
|
||||
|
||||
if if_main:
|
||||
import shutil # remove history according to bool(if_remove)
|
||||
if self.if_remove is None:
|
||||
self.if_remove = bool(input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y')
|
||||
elif self.if_remove:
|
||||
shutil.rmtree(self.cwd, ignore_errors=True)
|
||||
print(f"| Remove cwd: {self.cwd}")
|
||||
os.makedirs(self.cwd, exist_ok=True)
|
||||
|
||||
np.random.seed(self.random_seed)
|
||||
torch.manual_seed(self.random_seed)
|
||||
torch.set_num_threads(self.num_threads)
|
||||
torch.set_default_dtype(torch.float32)
|
||||
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
|
||||
|
||||
|
||||
def update_buffer(_trajectory):
|
||||
_trajectory = list(map(list, zip(*_trajectory))) # 2D-list transpose, here cut the trajectory into 5 parts
|
||||
ten_state = torch.as_tensor(_trajectory[0]) # tensor state here
|
||||
ten_reward = torch.as_tensor(_trajectory[1], dtype=torch.float32)
|
||||
# _trajectory[2] = done, replace done by mask, save memory
|
||||
ten_mask = (1.0 - torch.as_tensor(_trajectory[2], dtype=torch.float32)) * gamma
|
||||
ten_action = torch.as_tensor(_trajectory[3])
|
||||
ten_noise = torch.as_tensor(_trajectory[4], dtype=torch.float32)
|
||||
|
||||
buffer[:] = (ten_state, ten_action, ten_noise, ten_reward, ten_mask) # list store tensors
|
||||
|
||||
_steps = ten_reward.shape[0] # how many steps are collected in all trajectories
|
||||
_r_exp = ten_reward.mean() # the mean reward
|
||||
return _steps, _r_exp
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = Arguments()
|
||||
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
|
||||
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
|
||||
args.visible_gpu = '0'
|
||||
for seed in args.random_seed_list:
|
||||
args.random_seed = seed
|
||||
args.agent = AgentPPO()
|
||||
|
||||
agent_name = f'{args.agent.__class__.__name__}'
|
||||
args.agent.cri_target = True
|
||||
args.env = ESSEnv()
|
||||
args.init_before_training(if_main=True)
|
||||
'''init agent and environment'''
|
||||
agent = args.agent
|
||||
env = args.env
|
||||
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
||||
args.if_per_or_gae)
|
||||
|
||||
cwd = args.cwd
|
||||
gamma = args.gamma
|
||||
batch_size = args.batch_size # how much data should be used to update net
|
||||
target_step = args.target_step # how manysteps of one episode should stop
|
||||
repeat_times = args.repeat_times # how many times should update for one batch size data
|
||||
soft_update_tau = args.soft_update_tau
|
||||
agent.state = env.reset()
|
||||
'''init buffer'''
|
||||
buffer = list()
|
||||
'''init training parameters'''
|
||||
num_episode = args.num_episode
|
||||
# args.train=False
|
||||
# args.save_network=False
|
||||
# args.test_network=False
|
||||
# args.save_test_data=False
|
||||
# args.compare_with_pyomo=False
|
||||
if args.train:
|
||||
for i_episode in range(num_episode):
|
||||
with torch.no_grad():
|
||||
trajectory_list = agent.explore_env(env, target_step)
|
||||
steps, r_exp = update_buffer(trajectory_list)
|
||||
critic_loss, actor_loss, entropy_loss = agent.update_net(buffer, batch_size, repeat_times,
|
||||
soft_update_tau)
|
||||
loss_record['critic_loss'].append(critic_loss)
|
||||
loss_record['actor_loss'].append(actor_loss)
|
||||
loss_record['entropy_loss'].append(entropy_loss)
|
||||
|
||||
with torch.no_grad():
|
||||
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
|
||||
reward_record['mean_episode_reward'].append(episode_reward)
|
||||
reward_record['unbalance'].append(episode_unbalance)
|
||||
print(f'current epsiode is {i_episode}, reward:{episode_reward},unbalance:{episode_unbalance}')
|
||||
act_save_path = f'{args.cwd}/actor.pth'
|
||||
loss_record_path = f'{args.cwd}/loss_data.pkl'
|
||||
reward_record_path = f'{args.cwd}/reward_data.pkl'
|
||||
|
||||
with open(loss_record_path, 'wb') as tf:
|
||||
pickle.dump(loss_record, tf)
|
||||
with open(reward_record_path, 'wb') as tf:
|
||||
pickle.dump(reward_record, tf)
|
||||
|
||||
if args.save_network:
|
||||
torch.save(agent.act.state_dict(), act_save_path)
|
||||
print('actor parameters have been saved')
|
||||
|
||||
if args.test_network:
|
||||
args.cwd = agent_name + '0618'
|
||||
agent.act.load_state_dict(torch.load(act_save_path))
|
||||
print('parameters have been reload and test')
|
||||
record = test_one_episode(env, agent.act, agent.device)
|
||||
eval_data = pd.DataFrame(record['information'])
|
||||
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2',
|
||||
'gen3', 'unbalance', 'operation_cost']
|
||||
if args.save_test_data:
|
||||
test_data_save_path = f'{args.cwd}/test_data.pkl'
|
||||
with open(test_data_save_path, 'wb') as tf:
|
||||
pickle.dump(record, tf)
|
||||
|
||||
'''compare with pyomo data and results'''
|
||||
if args.compare_with_pyomo:
|
||||
month = record['init_info'][0][0]
|
||||
day = record['init_info'][0][1]
|
||||
initial_soc = record['init_info'][0][3]
|
||||
base_result = optimization_base_result(env, month, day, initial_soc)
|
||||
if args.plot_on:
|
||||
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
|
||||
|
||||
plot_args = PlotArgs()
|
||||
plot_args.feature_change = ''
|
||||
args.cwd = agent_name
|
||||
plot_dir = make_dir(args.cwd, plot_args.feature_change)
|
||||
plot_optimization_result(base_result, plot_dir)
|
||||
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
|
||||
'''compare the different cost get from pyomo and PPO'''
|
||||
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
|
||||
print('operation_cost_sum:', sum(eval_data['operation_cost']))
|
||||
print('step_cost_sum:', sum(base_result['step_cost']))
|
||||
print('ration:', ration)
|
|
@ -0,0 +1,137 @@
|
|||
import pickle
|
||||
import pandas as pd
|
||||
import torch
|
||||
|
||||
from agent import AgentSAC
|
||||
from environment import ESSEnv
|
||||
from tools import *
|
||||
|
||||
|
||||
def update_buffer(_trajectory):
|
||||
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
|
||||
ary_other = torch.as_tensor([item[1] for item in _trajectory])
|
||||
ary_other[:, 0] = ary_other[:, 0] # ten_reward
|
||||
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
|
||||
|
||||
buffer.extend_buffer(ten_state, ary_other)
|
||||
|
||||
_steps = ten_state.shape[0]
|
||||
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
|
||||
return _steps, _r_exp
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = Arguments()
|
||||
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
|
||||
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
|
||||
args.visible_gpu = '0'
|
||||
for seed in args.random_seed_list:
|
||||
args.random_seed = seed
|
||||
args.agent = AgentSAC()
|
||||
agent_name = f'{args.agent.__class__.__name__}'
|
||||
args.agent.cri_target = True
|
||||
args.env = ESSEnv()
|
||||
args.init_before_training(if_main=True)
|
||||
'''init agent and environment'''
|
||||
agent = args.agent
|
||||
env = args.env
|
||||
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
||||
args.if_per_or_gae)
|
||||
'''init replay buffer'''
|
||||
buffer = ReplayBuffer(max_len=args.max_memo, state_dim=env.state_space.shape[0],
|
||||
action_dim=env.action_space.shape[0])
|
||||
'''start training'''
|
||||
cwd = args.cwd
|
||||
gamma = args.gamma
|
||||
batch_size = args.batch_size # how much data should be used to update net
|
||||
target_step = args.target_step # how manysteps of one episode should stop
|
||||
repeat_times = args.repeat_times # how many times should update for one batch size data
|
||||
soft_update_tau = args.soft_update_tau
|
||||
|
||||
agent.state = env.reset()
|
||||
|
||||
'''collect data and train and update network'''
|
||||
num_episode = args.num_episode
|
||||
'''here record real unbalance'''
|
||||
|
||||
##
|
||||
# args.train=False
|
||||
# args.save_network=False
|
||||
# args.test_network=False
|
||||
# args.save_test_data=False
|
||||
# args.compare_with_pyomo=False
|
||||
#
|
||||
if args.train:
|
||||
collect_data = True
|
||||
while collect_data:
|
||||
print(f'buffer:{buffer.now_len}')
|
||||
with torch.no_grad():
|
||||
trajectory = agent.explore_env(env, target_step)
|
||||
steps, r_exp = update_buffer(trajectory)
|
||||
buffer.update_now_len()
|
||||
if buffer.now_len >= 10000:
|
||||
collect_data = False
|
||||
for i_episode in range(num_episode):
|
||||
critic_loss, actor_loss, entropy_loss = agent.update_net(buffer, batch_size, repeat_times,
|
||||
soft_update_tau)
|
||||
loss_record['critic_loss'].append(critic_loss)
|
||||
loss_record['actor_loss'].append(actor_loss)
|
||||
loss_record['entropy_loss'].append(entropy_loss)
|
||||
with torch.no_grad():
|
||||
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
|
||||
reward_record['mean_episode_reward'].append(episode_reward)
|
||||
reward_record['unbalance'].append(episode_unbalance)
|
||||
print(
|
||||
f'current epsiode is {i_episode}, reward:{episode_reward},'
|
||||
f'unbalance:{episode_unbalance},buffer_length: {buffer.now_len}')
|
||||
if i_episode % 10 == 0:
|
||||
# target_step
|
||||
with torch.no_grad():
|
||||
trajectory = agent.explore_env(env, target_step)
|
||||
steps, r_exp = update_buffer(trajectory)
|
||||
act_save_path = f'{args.cwd}/actor.pth'
|
||||
loss_record_path = f'{args.cwd}/loss_data.pkl'
|
||||
reward_record_path = f'{args.cwd}/reward_data.pkl'
|
||||
|
||||
with open(loss_record_path, 'wb') as tf:
|
||||
pickle.dump(loss_record, tf)
|
||||
with open(reward_record_path, 'wb') as tf:
|
||||
pickle.dump(reward_record, tf)
|
||||
|
||||
if args.save_network:
|
||||
torch.save(agent.act.state_dict(), act_save_path)
|
||||
print('actor parameters have been saved')
|
||||
|
||||
if args.test_network:
|
||||
args.cwd = agent_name
|
||||
agent.act.load_state_dict(torch.load(act_save_path))
|
||||
print('parameters have been reload and test')
|
||||
record = test_one_episode(env, agent.act, agent.device)
|
||||
eval_data = pd.DataFrame(record['information'])
|
||||
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2',
|
||||
'gen3', 'unbalance', 'operation_cost']
|
||||
if args.save_test_data:
|
||||
test_data_save_path = f'{args.cwd}/test_data.pkl'
|
||||
with open(test_data_save_path, 'wb') as tf:
|
||||
pickle.dump(record, tf)
|
||||
|
||||
'''compare with pyomo data and results'''
|
||||
if args.compare_with_pyomo:
|
||||
month = record['init_info'][0][0]
|
||||
day = record['init_info'][0][1]
|
||||
initial_soc = record['init_info'][0][3]
|
||||
base_result = optimization_base_result(env, month, day, initial_soc)
|
||||
if args.plot_on:
|
||||
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
|
||||
|
||||
plot_args = PlotArgs()
|
||||
plot_args.feature_change = ''
|
||||
args.cwd = agent_name
|
||||
plot_dir = make_dir(args.cwd, plot_args.feature_change)
|
||||
plot_optimization_result(base_result, plot_dir)
|
||||
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
|
||||
'''compare the different cost get from pyomo and SAC'''
|
||||
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
|
||||
print('operation_cost_sum:', sum(eval_data['operation_cost']))
|
||||
print('step_cost_sum:', sum(base_result['step_cost']))
|
||||
print('ration:', ration)
|
|
@ -0,0 +1,133 @@
|
|||
import pickle
|
||||
import pandas as pd
|
||||
import torch
|
||||
|
||||
from agent import AgentTD3
|
||||
from environment import ESSEnv
|
||||
from tools import *
|
||||
|
||||
|
||||
def update_buffer(_trajectory):
|
||||
ten_state = torch.as_tensor([item[0] for item in _trajectory], dtype=torch.float32)
|
||||
ary_other = torch.as_tensor([item[1] for item in _trajectory])
|
||||
ary_other[:, 0] = ary_other[:, 0] # ten_reward
|
||||
ary_other[:, 1] = (1.0 - ary_other[:, 1]) * gamma # ten_mask = (1.0 - ary_done) * gamma
|
||||
|
||||
buffer.extend_buffer(ten_state, ary_other)
|
||||
|
||||
_steps = ten_state.shape[0]
|
||||
_r_exp = ary_other[:, 0].mean() # other = (reward, mask, action)
|
||||
return _steps, _r_exp
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = Arguments()
|
||||
reward_record = {'episode': [], 'steps': [], 'mean_episode_reward': [], 'unbalance': []}
|
||||
loss_record = {'episode': [], 'steps': [], 'critic_loss': [], 'actor_loss': [], 'entropy_loss': []}
|
||||
args.visible_gpu = '0'
|
||||
for seed in args.random_seed_list:
|
||||
args.random_seed = seed
|
||||
args.agent = AgentTD3()
|
||||
agent_name = f'{args.agent.__class__.__name__}'
|
||||
args.agent.cri_target = True
|
||||
args.env = ESSEnv()
|
||||
args.init_before_training(if_main=True)
|
||||
'''init agent and environment'''
|
||||
agent = args.agent
|
||||
env = args.env
|
||||
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
||||
args.if_per_or_gae)
|
||||
'''init replay buffer'''
|
||||
buffer = ReplayBuffer(max_len=args.max_memo, state_dim=env.state_space.shape[0],
|
||||
action_dim=env.action_space.shape[0])
|
||||
'''start training'''
|
||||
cwd = args.cwd
|
||||
gamma = args.gamma
|
||||
batch_size = args.batch_size # how much data should be used to update net
|
||||
target_step = args.target_step # how manysteps of one episode should stop
|
||||
repeat_times = args.repeat_times # how many times should update for one batch size data
|
||||
soft_update_tau = args.soft_update_tau
|
||||
|
||||
agent.state = env.reset()
|
||||
'''collect data and train and update network'''
|
||||
num_episode = args.num_episode
|
||||
|
||||
# args.train=False
|
||||
# args.save_network=False
|
||||
# args.test_network=False
|
||||
# args.save_test_data=False
|
||||
# args.compare_with_pyomo=False
|
||||
|
||||
if args.train:
|
||||
collect_data = True
|
||||
while collect_data:
|
||||
print(f'buffer:{buffer.now_len}')
|
||||
with torch.no_grad():
|
||||
trajectory = agent.explore_env(env, target_step)
|
||||
steps, r_exp = update_buffer(trajectory)
|
||||
buffer.update_now_len()
|
||||
if buffer.now_len >= 10000:
|
||||
collect_data = False
|
||||
for i_episode in range(num_episode):
|
||||
critic_loss, actor_loss = agent.update_net(buffer, batch_size, repeat_times, soft_update_tau)
|
||||
loss_record['critic_loss'].append(critic_loss)
|
||||
loss_record['actor_loss'].append(actor_loss)
|
||||
with torch.no_grad():
|
||||
episode_reward, episode_unbalance = get_episode_return(env, agent.act, agent.device)
|
||||
reward_record['mean_episode_reward'].append(episode_reward)
|
||||
reward_record['unbalance'].append(episode_unbalance)
|
||||
print(
|
||||
f'current epsiode is {i_episode}, reward:{episode_reward},'
|
||||
f'unbalance:{episode_unbalance},buffer_length: {buffer.now_len}')
|
||||
if i_episode % 10 == 0:
|
||||
# target_step
|
||||
with torch.no_grad():
|
||||
trajectory = agent.explore_env(env, target_step)
|
||||
steps, r_exp = update_buffer(trajectory)
|
||||
loss_record_path = f'{args.cwd}/loss_data.pkl'
|
||||
reward_record_path = f'{args.cwd}/reward_data.pkl'
|
||||
# current only store last seed corresponded actor
|
||||
act_save_path = f'{args.cwd}/actor.pth'
|
||||
|
||||
with open(loss_record_path, 'wb') as tf:
|
||||
pickle.dump(loss_record, tf)
|
||||
with open(reward_record_path, 'wb') as tf:
|
||||
pickle.dump(reward_record, tf)
|
||||
|
||||
if args.save_network:
|
||||
torch.save(agent.act.state_dict(), act_save_path)
|
||||
print('actor parameters have been saved')
|
||||
|
||||
if args.test_network:
|
||||
args.cwd = agent_name
|
||||
agent.act.load_state_dict(torch.load(act_save_path))
|
||||
print('parameters have been reload and test')
|
||||
record = test_one_episode(env, agent.act, agent.device)
|
||||
eval_data = pd.DataFrame(record['information'])
|
||||
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2',
|
||||
'gen3', 'unbalance', 'operation_cost']
|
||||
if args.save_test_data:
|
||||
test_data_save_path = f'{args.cwd}/test_data.pkl'
|
||||
with open(test_data_save_path, 'wb') as tf:
|
||||
pickle.dump(record, tf)
|
||||
|
||||
'''compare with pyomo data and results'''
|
||||
if args.compare_with_pyomo:
|
||||
month = record['init_info'][0][0]
|
||||
day = record['init_info'][0][1]
|
||||
initial_soc = record['init_info'][0][3]
|
||||
base_result = optimization_base_result(env, month, day, initial_soc)
|
||||
if args.plot_on:
|
||||
from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
|
||||
|
||||
plot_args = PlotArgs()
|
||||
plot_args.feature_change = ''
|
||||
args.cwd = agent_name
|
||||
plot_dir = make_dir(args.cwd, plot_args.feature_change)
|
||||
plot_optimization_result(base_result, plot_dir)
|
||||
plot_evaluation_information(args.cwd + '/' + 'test_data.pkl', plot_dir)
|
||||
'''compare the different cost get from pyomo and TD3'''
|
||||
ration = sum(eval_data['operation_cost']) / sum(base_result['step_cost'])
|
||||
print('operation_cost_sum:', sum(eval_data['operation_cost']))
|
||||
print('step_cost_sum:', sum(base_result['step_cost']))
|
||||
print('ration:', ration)
|
|
@ -0,0 +1,328 @@
|
|||
import numpy.random as rd
|
||||
import os
|
||||
from copy import deepcopy
|
||||
from net import *
|
||||
|
||||
|
||||
class AgentBase:
|
||||
def __init__(self):
|
||||
self.state = None
|
||||
self.device = None
|
||||
self.action_dim = None
|
||||
self.if_off_policy = None
|
||||
self.explore_noise = None
|
||||
self.trajectory_list = None
|
||||
|
||||
self.criterion = torch.nn.SmoothL1Loss()
|
||||
self.cri = self.cri_target = self.if_use_cri_target = self.cri_optim = self.ClassCri = None
|
||||
self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
|
||||
|
||||
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, _if_per_or_gae=False, gpu_id=0):
|
||||
# explict call self.init() for multiprocessing
|
||||
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
|
||||
self.action_dim = action_dim
|
||||
|
||||
self.cri = self.ClassCri(net_dim, state_dim, action_dim).to(self.device)
|
||||
self.act = self.ClassAct(net_dim, state_dim, action_dim).to(self.device) if self.ClassAct else self.cri
|
||||
self.cri_target = deepcopy(self.cri) if self.if_use_cri_target else self.cri
|
||||
self.act_target = deepcopy(self.act) if self.if_use_act_target else self.act
|
||||
|
||||
self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
|
||||
self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
|
||||
del self.ClassCri, self.ClassAct
|
||||
|
||||
def select_action(self, state) -> np.ndarray:
|
||||
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
|
||||
action = self.act(states)[0]
|
||||
action = (action + torch.randn_like(action) * self.explore_noise).clamp(-1, 1)
|
||||
return action.detach().cpu().numpy()
|
||||
|
||||
def explore_env(self, env, target_step):
|
||||
trajectory = list()
|
||||
|
||||
state = self.state
|
||||
for _ in range(target_step):
|
||||
action = self.select_action(state)
|
||||
state, next_state, reward, done, = env.step(action)
|
||||
trajectory.append((state, (reward, done, *action)))
|
||||
state = env.reset() if done else next_state
|
||||
self.state = state
|
||||
return trajectory
|
||||
|
||||
@staticmethod
|
||||
def optim_update(optimizer, objective):
|
||||
optimizer.zero_grad()
|
||||
objective.backward()
|
||||
optimizer.step()
|
||||
|
||||
@staticmethod
|
||||
def soft_update(target_net, current_net, tau):
|
||||
for tar, cur in zip(target_net.parameters(), current_net.parameters()):
|
||||
tar.data.copy_(cur.data * tau + tar.data * (1.0 - tau))
|
||||
|
||||
def save_or_load_agent(self, cwd, if_save):
|
||||
def load_torch_file(model_or_optim, _path):
|
||||
state_dict = torch.load(_path, map_location=lambda storage, loc: storage)
|
||||
model_or_optim.load_state_dict(state_dict)
|
||||
|
||||
name_obj_list = [('actor', self.act), ('act_target', self.act_target), ('act_optim', self.act_optim),
|
||||
('critic', self.cri), ('cri_target', self.cri_target), ('cri_optim', self.cri_optim), ]
|
||||
name_obj_list = [(name, obj) for name, obj in name_obj_list if obj is not None]
|
||||
if if_save:
|
||||
for name, obj in name_obj_list:
|
||||
save_path = f"{cwd}/{name}.pth"
|
||||
torch.save(obj.state_dict(), save_path)
|
||||
else:
|
||||
for name, obj in name_obj_list:
|
||||
save_path = f"{cwd}/{name}.pth"
|
||||
load_torch_file(obj, save_path) if os.path.isfile(save_path) else None
|
||||
|
||||
|
||||
class AgentDDPG(AgentBase):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.explore_noise = 0.1 # explore noise of action
|
||||
self.if_use_cri_target = self.if_use_act_target = True
|
||||
self.ClassCri = Critic
|
||||
self.ClassAct = Actor
|
||||
|
||||
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau) -> (float, float):
|
||||
buffer.update_now_len()
|
||||
obj_critic = obj_actor = None
|
||||
for _ in range(int(buffer.now_len / batch_size * repeat_times)):
|
||||
obj_critic, state = self.get_obj_critic(buffer, batch_size) # critic loss
|
||||
self.optim_update(self.cri_optim, obj_critic)
|
||||
self.soft_update(self.cri_target, self.cri, soft_update_tau)
|
||||
|
||||
action_pg = self.act(state) # policy gradient
|
||||
obj_actor = -self.cri(state, action_pg).mean() # actor loss, makes it bigger
|
||||
self.optim_update(self.act_optim, obj_actor)
|
||||
self.soft_update(self.act_target, self.act, soft_update_tau)
|
||||
return obj_actor.item(), obj_critic.item()
|
||||
|
||||
def get_obj_critic(self, buffer, batch_size) -> (torch.Tensor, torch.Tensor):
|
||||
with torch.no_grad():
|
||||
reward, mask, action, state, next_s = buffer.sample_batch(batch_size)
|
||||
next_q = self.cri_target(next_s, self.act_target(next_s))
|
||||
q_label = reward + mask * next_q
|
||||
q_value = self.cri(state, action)
|
||||
obj_critic = self.criterion(q_value, q_label)
|
||||
return obj_critic, state
|
||||
|
||||
|
||||
class AgentTD3(AgentBase):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.explore_noise = 0.1 # standard deviation of exploration noise
|
||||
self.policy_noise = 0.2 # standard deviation of policy noise
|
||||
self.update_freq = 2 # delay update frequency
|
||||
self.if_use_cri_target = self.if_use_act_target = True
|
||||
self.ClassCri = CriticTwin
|
||||
self.ClassAct = Actor
|
||||
|
||||
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau) -> tuple:
|
||||
buffer.update_now_len()
|
||||
obj_critic = obj_actor = None
|
||||
for update_c in range(int(buffer.now_len / batch_size * repeat_times)):
|
||||
obj_critic, state = self.get_obj_critic(buffer, batch_size)
|
||||
self.optim_update(self.cri_optim, obj_critic)
|
||||
|
||||
action_pg = self.act(state) # policy gradient
|
||||
obj_actor = -self.cri_target(state, action_pg).mean() # use cri_target instead of cri for stable training
|
||||
self.optim_update(self.act_optim, obj_actor)
|
||||
if update_c % self.update_freq == 0: # delay update
|
||||
self.soft_update(self.cri_target, self.cri, soft_update_tau)
|
||||
self.soft_update(self.act_target, self.act, soft_update_tau)
|
||||
return obj_critic.item() / 2, obj_actor.item()
|
||||
|
||||
def get_obj_critic(self, buffer, batch_size) -> (torch.Tensor, torch.Tensor):
|
||||
with torch.no_grad():
|
||||
reward, mask, action, state, next_s = buffer.sample_batch(batch_size)
|
||||
next_a = self.act_target.get_action(next_s, self.policy_noise) # policy noise
|
||||
next_q = torch.min(*self.cri_target.get_q1_q2(next_s, next_a)) # twin critics
|
||||
q_label = reward + mask * next_q
|
||||
|
||||
q1, q2 = self.cri.get_q1_q2(state, action)
|
||||
obj_critic = self.criterion(q1, q_label) + self.criterion(q2, q_label) # twin critics
|
||||
return obj_critic, state
|
||||
|
||||
|
||||
class AgentSAC(AgentBase):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.ClassCri = CriticTwin
|
||||
self.ClassAct = ActorSAC
|
||||
self.if_use_cri_target = True
|
||||
self.if_use_act_target = False
|
||||
|
||||
self.alpha_log = None
|
||||
self.alpha_optim = None
|
||||
self.target_entropy = None
|
||||
|
||||
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, _if_use_per=False, gpu_id=0, env_num=1):
|
||||
super().init(net_dim, state_dim, action_dim, learning_rate, _if_use_per, gpu_id)
|
||||
|
||||
self.alpha_log = torch.tensor((-np.log(action_dim) * np.e,), dtype=torch.float32,
|
||||
requires_grad=True, device=self.device) # trainable parameter
|
||||
self.alpha_optim = torch.optim.Adam((self.alpha_log,), lr=learning_rate)
|
||||
self.target_entropy = np.log(action_dim)
|
||||
|
||||
def select_action(self, state):
|
||||
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
|
||||
actions = self.act.get_action(states)
|
||||
return actions.detach().cpu().numpy()[0]
|
||||
|
||||
def explore_env(self, env, target_step):
|
||||
trajectory = list()
|
||||
|
||||
state = self.state
|
||||
for _ in range(target_step):
|
||||
action = self.select_action(state)
|
||||
state, next_state, reward, done, = env.step(action)
|
||||
trajectory.append((state, (reward, done, *action)))
|
||||
state = env.reset() if done else next_state
|
||||
self.state = state
|
||||
return trajectory
|
||||
|
||||
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
|
||||
buffer.update_now_len()
|
||||
|
||||
alpha = self.alpha_log.exp().detach()
|
||||
obj_critic = obj_actor = None
|
||||
for _ in range(int(buffer.now_len * repeat_times / batch_size)):
|
||||
'''objective of critic (loss function of critic)'''
|
||||
with torch.no_grad():
|
||||
reward, mask, action, state, next_s = buffer.sample_batch(batch_size)
|
||||
next_a, next_log_prob = self.act_target.get_action_logprob(next_s)
|
||||
next_q = torch.min(*self.cri_target.get_q1_q2(next_s, next_a))
|
||||
q_label = reward + mask * (next_q + next_log_prob * alpha)
|
||||
q1, q2 = self.cri.get_q1_q2(state, action)
|
||||
obj_critic = self.criterion(q1, q_label) + self.criterion(q2, q_label)
|
||||
self.optim_update(self.cri_optim, obj_critic)
|
||||
self.soft_update(self.cri_target, self.cri, soft_update_tau)
|
||||
|
||||
'''objective of alpha (temperature parameter automatic adjustment)'''
|
||||
action_pg, log_prob = self.act.get_action_logprob(state) # policy gradient
|
||||
obj_alpha = (self.alpha_log * (log_prob - self.target_entropy).detach()).mean()
|
||||
self.optim_update(self.alpha_optim, obj_alpha)
|
||||
|
||||
'''objective of actor'''
|
||||
alpha = self.alpha_log.exp().detach()
|
||||
with torch.no_grad():
|
||||
self.alpha_log[:] = self.alpha_log.clamp(-20, 2)
|
||||
obj_actor = -(torch.min(*self.cri_target.get_q1_q2(state, action_pg)) + log_prob * alpha).mean()
|
||||
self.optim_update(self.act_optim, obj_actor)
|
||||
|
||||
self.soft_update(self.act_target, self.act, soft_update_tau)
|
||||
|
||||
return obj_critic.item(), obj_actor.item(), alpha.item()
|
||||
|
||||
|
||||
class AgentPPO(AgentBase):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.ClassCri = CriticAdv
|
||||
self.ClassAct = ActorPPO
|
||||
|
||||
self.if_off_policy = False
|
||||
self.ratio_clip = 0.2 # ratio.clamp(1 - clip, 1 + clip)
|
||||
self.lambda_entropy = 0.02 # could be 0.01~0.05
|
||||
self.lambda_gae_adv = 0.98 # could be 0.95~0.99, GAE (Generalized Advantage Estimation. ICLR.2016.)
|
||||
self.get_reward_sum = None # self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
|
||||
|
||||
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, if_use_gae=False, gpu_id=0, env_num=1):
|
||||
super().init(net_dim, state_dim, action_dim, learning_rate, if_use_gae, gpu_id)
|
||||
self.trajectory_list = list()
|
||||
self.get_reward_sum = self.get_reward_sum_gae if if_use_gae else self.get_reward_sum_raw
|
||||
|
||||
def select_action(self, state):
|
||||
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
|
||||
actions, noises = self.act.get_action(states)
|
||||
return actions[0].detach().cpu().numpy(), noises[0].detach().cpu().numpy()
|
||||
|
||||
def explore_env(self, env, target_step):
|
||||
state = self.state
|
||||
trajectory_temp = list()
|
||||
last_done = 0
|
||||
for i in range(target_step):
|
||||
action, noise = self.select_action(state)
|
||||
next_state, reward, done, _ = env.step(np.tanh(action))
|
||||
trajectory_temp.append((state, reward, done, action, noise))
|
||||
if done:
|
||||
state = env.reset()
|
||||
last_done = i
|
||||
else:
|
||||
state = next_state
|
||||
self.state = state
|
||||
|
||||
'''splice list'''
|
||||
trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1]
|
||||
self.trajectory_list = trajectory_temp[last_done:]
|
||||
return trajectory_list
|
||||
|
||||
def update_net(self, buffer, batch_size, repeat_times, soft_update_tau):
|
||||
with torch.no_grad():
|
||||
buf_len = buffer[0].shape[0]
|
||||
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
|
||||
# (ten_state, ten_action, ten_noise, ten_reward, ten_mask) = buffer
|
||||
|
||||
'''get buf_r_sum, buf_logprob'''
|
||||
bs = 2 ** 10 # set a smaller 'BatchSize' when out of GPU memory.
|
||||
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
|
||||
buf_value = torch.cat(buf_value, dim=0)
|
||||
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
|
||||
|
||||
buf_r_sum, buf_advantage = self.get_reward_sum(buf_len, buf_reward, buf_mask, buf_value) # detach()
|
||||
buf_advantage = (buf_advantage - buf_advantage.mean()) / (buf_advantage.std() + 1e-5)
|
||||
del buf_noise, buffer[:]
|
||||
|
||||
'''PPO: Surrogate objective of Trust Region'''
|
||||
obj_critic = obj_actor = None
|
||||
for _ in range(int(buf_len / batch_size * repeat_times)):
|
||||
indices = torch.randint(buf_len, size=(batch_size,), requires_grad=False, device=self.device)
|
||||
|
||||
state = buf_state[indices]
|
||||
action = buf_action[indices]
|
||||
r_sum = buf_r_sum[indices]
|
||||
logprob = buf_logprob[indices]
|
||||
advantage = buf_advantage[indices]
|
||||
|
||||
new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action) # it is obj_actor
|
||||
ratio = (new_logprob - logprob.detach()).exp()
|
||||
surrogate1 = advantage * ratio
|
||||
surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
|
||||
obj_surrogate = -torch.min(surrogate1, surrogate2).mean()
|
||||
obj_actor = obj_surrogate + obj_entropy * self.lambda_entropy
|
||||
self.optim_update(self.act_optim, obj_actor)
|
||||
|
||||
value = self.cri(state).squeeze(1) # critic network predicts the reward_sum (Q value) of state
|
||||
obj_critic = self.criterion(value, r_sum) / (r_sum.std() + 1e-6)
|
||||
self.optim_update(self.cri_optim, obj_critic)
|
||||
self.soft_update(self.cri_target, self.cri, soft_update_tau) if self.cri_target is not self.cri else None
|
||||
|
||||
a_std_log = getattr(self.act, 'a_std_log', torch.zeros(1))
|
||||
return obj_critic.item(), obj_actor.item(), a_std_log.mean().item() # logging_tuple
|
||||
|
||||
def get_reward_sum_raw(self, buf_len, buf_reward, buf_mask, buf_value) -> (torch.Tensor, torch.Tensor):
|
||||
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # reward sum
|
||||
|
||||
pre_r_sum = 0
|
||||
for i in range(buf_len - 1, -1, -1):
|
||||
buf_r_sum[i] = buf_reward[i] + buf_mask[i] * pre_r_sum
|
||||
pre_r_sum = buf_r_sum[i]
|
||||
buf_advantage = buf_r_sum - (buf_mask * buf_value[:, 0])
|
||||
return buf_r_sum, buf_advantage
|
||||
|
||||
def get_reward_sum_gae(self, buf_len, ten_reward, ten_mask, ten_value):
|
||||
"""tensor, tensor """
|
||||
buf_r_sum = torch.empty(buf_len, dtype=torch.float32, device=self.device) # old policy value
|
||||
buf_advantage = torch.empty(buf_len, dtype=torch.float32, device=self.device) # advantage value
|
||||
|
||||
pre_r_sum = 0
|
||||
pre_advantage = 0 # advantage value of previous step
|
||||
for i in range(buf_len - 1, -1, -1):
|
||||
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
|
||||
pre_r_sum = buf_r_sum[i]
|
||||
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i]) # fix a bug here
|
||||
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
|
||||
return buf_r_sum, buf_advantage
|
|
@ -0,0 +1,2 @@
|
|||
state,lon,lat
|
||||
iowa,93.5,41.1
|
|
|
@ -0,0 +1,69 @@
|
|||
class Constant:
|
||||
MONTHS_LEN = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
|
||||
MAX_STEP_HOURS = 24 * 30
|
||||
|
||||
|
||||
class DataManager:
|
||||
def __init__(self) -> None:
|
||||
self.Pv = []
|
||||
self.Prices = []
|
||||
self.Load_Consumption = []
|
||||
self.Temperature = []
|
||||
self.Irradiance = []
|
||||
self.Wind = []
|
||||
|
||||
def add_pv_element(self, element): self.Pv.append(element)
|
||||
|
||||
def add_price_element(self, element): self.Prices.append(element)
|
||||
|
||||
def add_load_element(self, element): self.Load_Consumption.append(element)
|
||||
|
||||
def add_temperature_element(self, element): self.Temperature.append(element)
|
||||
|
||||
def add_irradiance_element(self, element): self.Irradiance.append(element)
|
||||
|
||||
def add_wind_element(self, element): self.Wind.append(element)
|
||||
|
||||
# get current time data based on given month day, and day_time
|
||||
def get_pv_data(self, month, day, day_time):
|
||||
return self.Pv[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
|
||||
|
||||
def get_price_data(self, month, day, day_time):
|
||||
return self.Prices[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
|
||||
|
||||
def get_load_cons_data(self, month, day, day_time):
|
||||
return self.Load_Consumption[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
|
||||
|
||||
def get_temperature_data(self, month, day, day_time):
|
||||
return self.Temperature[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
|
||||
|
||||
def get_irradiance_data(self, month, day, day_time):
|
||||
return self.Irradiance[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
|
||||
|
||||
def get_wind_data(self, month, day, day_time):
|
||||
return self.Wind[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + day_time]
|
||||
|
||||
# get series data for one episode
|
||||
def get_series_pv_data(self, month, day):
|
||||
return self.Pv[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
|
||||
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
|
||||
|
||||
def get_series_price_data(self, month, day):
|
||||
return self.Prices[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
|
||||
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
|
||||
|
||||
def get_series_load_cons_data(self, month, day):
|
||||
return self.Load_Consumption[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
|
||||
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
|
||||
|
||||
def get_series_temperature_data(self, month, day):
|
||||
return self.Temperature[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
|
||||
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
|
||||
|
||||
def get_series_irradiance_data(self, month, day):
|
||||
return self.Irradiance[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
|
||||
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
|
||||
|
||||
def get_series_wind_data(self, month, day):
|
||||
return self.Wind[(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24:
|
||||
(sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24 + 24]
|
|
@ -0,0 +1,194 @@
|
|||
import gym
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from gym import spaces
|
||||
from module import *
|
||||
from parameters import *
|
||||
from data_manager import *
|
||||
|
||||
|
||||
class ESSEnv(gym.Env):
|
||||
def __init__(self, **kwargs):
|
||||
super(ESSEnv, self).__init__()
|
||||
self.excess = None
|
||||
self.unbalance = None
|
||||
self.shedding = None
|
||||
self.real_unbalance = None
|
||||
self.operation_cost = None
|
||||
self.current_output = None
|
||||
self.final_step_outputs = None
|
||||
self.data_manager = DataManager()
|
||||
self._load_year_data()
|
||||
self.month = None
|
||||
self.day = None
|
||||
self.TRAIN = True
|
||||
self.current_time = None
|
||||
self.episode_length = kwargs.get('episode_length', 24)
|
||||
self.battery_parameters = kwargs.get('battery_parameters', battery_parameters)
|
||||
self.dg_parameters = kwargs.get('dg_parameters', dg_parameters)
|
||||
self.solar_parameters = kwargs.get('solar_parameters', solar_parameters)
|
||||
self.wind_parameters = kwargs.get('wind_parameters', wind_parameters)
|
||||
self.penalty_coefficient = 50 # control soft penalty constrain
|
||||
self.sell_coefficient = 0.5 # control sell benefits
|
||||
|
||||
self.grid = Grid()
|
||||
self.battery = Battery(self.battery_parameters)
|
||||
self.dg1 = DG(self.dg_parameters['gen_1'])
|
||||
self.dg2 = DG(self.dg_parameters['gen_2'])
|
||||
self.dg3 = DG(self.dg_parameters['gen_3'])
|
||||
self.solar = Solar(self.solar_parameters)
|
||||
self.wind = Wind(self.wind_parameters)
|
||||
|
||||
self.action_space = spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float32) # 已增加调节电压动作
|
||||
self.state_space = spaces.Box(low=-np.inf, high=np.inf, shape=(10,), dtype=np.float32)
|
||||
|
||||
def reset(self, *args):
|
||||
self.month = np.random.randint(1, 13) # choose 12 month
|
||||
if self.TRAIN:
|
||||
self.day = np.random.randint(1, 20)
|
||||
else:
|
||||
self.day = np.random.randint(20, Constant.MONTHS_LEN[self.month] - 1)
|
||||
self.current_time = 0
|
||||
self.battery.reset()
|
||||
self.dg1.reset()
|
||||
self.dg2.reset()
|
||||
self.dg3.reset()
|
||||
self.solar.reset()
|
||||
self.wind.reset()
|
||||
return self._build_state()
|
||||
|
||||
def _build_state(self):
|
||||
soc = self.battery.SOC()
|
||||
dg1_output = self.dg1.current_output
|
||||
dg2_output = self.dg2.current_output
|
||||
dg3_output = self.dg3.current_output
|
||||
time_step = self.current_time
|
||||
|
||||
price = self.data_manager.get_price_data(self.month, self.day, self.current_time)
|
||||
house_load = self.data_manager.get_load_cons_data(self.month, self.day, self.current_time)
|
||||
temperature = self.data_manager.get_temperature_data(self.month, self.day, self.current_time)
|
||||
irradiance = self.data_manager.get_irradiance_data(self.month, self.day, self.current_time)
|
||||
wind_speed = self.data_manager.get_wind_data(self.month, self.day, self.current_time)
|
||||
# print('house_load:', house_load)
|
||||
pv_generation = self.solar.step(temperature, irradiance)
|
||||
wd_generation = self.wind.step(wind_speed)
|
||||
generation = pv_generation + wd_generation
|
||||
net_load = house_load - generation
|
||||
|
||||
obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(soc), np.float32(net_load),
|
||||
np.float32(dg1_output), np.float32(dg2_output), np.float32(dg3_output),
|
||||
np.float32(temperature), np.float32(irradiance), np.float32(wind_speed)), axis=None)
|
||||
return obs
|
||||
|
||||
def step(self, action): # state transition: current_obs->take_action->get_reward->get_finish->next_obs
|
||||
# put action into each component
|
||||
current_obs = self._build_state()
|
||||
temperature = current_obs[7]
|
||||
irradiance = current_obs[8]
|
||||
wind_speed = current_obs[9]
|
||||
self.battery.step(action[0]) # execute the state-transition part, battery.current_capacity also changed
|
||||
self.dg1.step(action[1])
|
||||
self.dg2.step(action[2])
|
||||
self.dg3.step(action[3])
|
||||
self.solar.step(action[4], temperature, irradiance)
|
||||
self.wind.step(wind_speed)
|
||||
self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
|
||||
-self.battery.energy_change, self.solar.current_power, self.wind.current_power))
|
||||
actual_production = sum(self.current_output)
|
||||
price = current_obs[1]
|
||||
netload = current_obs[3]
|
||||
# print('actual_production:', actual_production, 'netload:', netload)
|
||||
unbalance = actual_production - netload
|
||||
|
||||
reward = 0
|
||||
excess_penalty = 0 # 过多
|
||||
deficient_penalty = 0 # 过少
|
||||
sell_benefit = 0
|
||||
buy_cost = 0
|
||||
self.excess = 0
|
||||
self.shedding = 0
|
||||
if unbalance >= 0: # now in excess condition
|
||||
if unbalance <= self.grid.exchange_ability:
|
||||
# sell money to grid is little [0.029,0.1]
|
||||
sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient
|
||||
else:
|
||||
sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient
|
||||
# real unbalance that even grid could not meet
|
||||
self.excess = unbalance - self.grid.exchange_ability
|
||||
excess_penalty = self.excess * self.penalty_coefficient
|
||||
else: # unbalance <0, its load shedding model, deficient penalty is used
|
||||
if abs(unbalance) <= self.grid.exchange_ability:
|
||||
buy_cost = self.grid.get_cost(price, abs(unbalance))
|
||||
else:
|
||||
buy_cost = self.grid.get_cost(price, self.grid.exchange_ability)
|
||||
self.shedding = abs(unbalance) - self.grid.exchange_ability
|
||||
deficient_penalty = self.shedding * self.penalty_coefficient
|
||||
battery_cost = self.battery.get_cost(self.battery.energy_change)
|
||||
dg1_cost = self.dg1.get_cost(self.dg1.current_output)
|
||||
dg2_cost = self.dg2.get_cost(self.dg2.current_output)
|
||||
dg3_cost = self.dg3.get_cost(self.dg3.current_output)
|
||||
solar_cost = self.solar.get_cost(self.solar.current_power)
|
||||
wind_cost = self.wind.gen_cost(self.wind.current_power)
|
||||
|
||||
reward -= (battery_cost + dg1_cost + dg2_cost + dg3_cost + solar_cost + wind_cost + excess_penalty +
|
||||
deficient_penalty - sell_benefit + buy_cost) / 1e3
|
||||
self.operation_cost = (battery_cost + dg1_cost + dg2_cost + dg3_cost + solar_cost + wind_cost + excess_penalty +
|
||||
deficient_penalty - sell_benefit + buy_cost)
|
||||
self.unbalance = unbalance
|
||||
self.real_unbalance = self.shedding + self.excess
|
||||
final_step_outputs = [self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
|
||||
self.battery.current_capacity, self.solar.current_power, self.wind.current_power]
|
||||
self.current_time += 1
|
||||
finish = (self.current_time == self.episode_length)
|
||||
if finish:
|
||||
self.final_step_outputs = final_step_outputs
|
||||
self.current_time = 0
|
||||
next_obs = self.reset()
|
||||
else:
|
||||
next_obs = self._build_state()
|
||||
return current_obs, next_obs, float(reward), finish
|
||||
|
||||
# def render(self, current_obs, next_obs, reward, finish):
|
||||
# print('day={},hour={:2d}, state={}, next_state={}, reward={:.4f}, terminal={}\n'.
|
||||
# format(self.day, self.current_time, current_obs, next_obs, reward, finish))
|
||||
|
||||
def _load_year_data(self):
|
||||
# pv_df = pd.read_csv('data/pv.csv', sep=',')
|
||||
price_df = pd.read_csv('data/prices.csv', sep=';')
|
||||
load_df = pd.read_csv('data/houseload.csv', sep=',')
|
||||
irradiance_df = pd.read_csv('data/irradiance.csv', sep=',')
|
||||
temperature_df = pd.read_csv('data/temper.csv', sep=',')
|
||||
wind_df = pd.read_csv('data/wind.csv', sep=',')
|
||||
|
||||
# pv = pv_df['pv'].to_numpy(dtype=float)
|
||||
price = price_df['Price'].apply(lambda x: x.replace(',', '.')).to_numpy(dtype=float)
|
||||
load = load_df['houseload'].to_numpy(dtype=float)
|
||||
irradiance = irradiance_df['irradiance'].to_numpy(dtype=float)
|
||||
temperature = temperature_df['t2m'].to_numpy(dtype=float)
|
||||
wind = wind_df['wind_speed'].to_numpy(dtype=float)
|
||||
|
||||
'''redesign the magnitude for price and amount of generation as well as demand'''
|
||||
def process_elements(elements, transform_function, add_function):
|
||||
for element in elements:
|
||||
transformed_element = transform_function(element)
|
||||
add_function(transformed_element)
|
||||
|
||||
# process_elements(pv, lambda x: x, self.data_manager.add_pv_element)
|
||||
process_elements(price, lambda x: max(x / 10, 0.5), self.data_manager.add_price_element)
|
||||
process_elements(load, lambda x: x * 5, self.data_manager.add_load_element)
|
||||
process_elements(irradiance, lambda x: x, self.data_manager.add_irradiance_element)
|
||||
process_elements(temperature, lambda x: x - 273.15, self.data_manager.add_temperature_element)
|
||||
process_elements(wind, lambda x: x, self.data_manager.add_wind_element)
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# env = ESSEnv()
|
||||
# env.TRAIN = False
|
||||
# rewards = []
|
||||
# env.reset()
|
||||
# tem_action = [0.1, 0.1, 0.1, 0.1, 0.1]
|
||||
# for _ in range(144):
|
||||
# print(f'current month is {env.month}, current day is {env.day}, current time is {env.current_time}')
|
||||
# current_obs, next_obs, reward, finish = env.step(tem_action)
|
||||
# env.render(current_obs, next_obs, reward, finish)
|
||||
# current_obs = next_obs
|
||||
# rewards.append(reward)
|
|
@ -0,0 +1,35 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
from nltk import word_tokenize
|
||||
|
||||
|
||||
def func(i, j):
|
||||
directory_path = "C:/Users/97532/Desktop/DRL-for-Energy-Systems/data/residential"
|
||||
entries = os.listdir(directory_path)[i:j]
|
||||
folders = [entry for entry in entries if os.path.join(directory_path, entry)]
|
||||
entries = [os.path.join(directory_path, folder) for folder in folders]
|
||||
combined_data = pd.DataFrame()
|
||||
|
||||
for file_path in entries:
|
||||
current_data = pd.read_csv(file_path, encoding='utf-8')
|
||||
tokenize_col = current_data.columns
|
||||
tokenized_sentences_list = [word_tokenize(text) for text in tokenize_col]
|
||||
|
||||
occurrences_electricity = np.array(
|
||||
[(i, j) for i, sublist in enumerate(tokenized_sentences_list) for j, word in enumerate(sublist) if
|
||||
word == 'Electricity'])[:, 0]
|
||||
|
||||
Electricity_consumption_wind_solar = current_data[tokenize_col[occurrences_electricity]].sum(axis=1)
|
||||
current_data = Electricity_consumption_wind_solar.to_frame("Household_load")
|
||||
|
||||
combined_data = pd.concat([combined_data, current_data], axis=1)
|
||||
|
||||
Series_type = combined_data.sum(axis=1)
|
||||
Series_type.to_csv('houseload.csv', index=False)
|
||||
total_energy = np.array(Series_type).ravel()
|
||||
return total_energy
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
func(0, 38)
|
|
@ -0,0 +1,4 @@
|
|||
|
||||
Gurobi 11.0.2 (win64, Python) logging started Fri May 17 12:42:18 2024
|
||||
|
||||
Set parameter LogFile to value "gurobi.log"
|
|
@ -0,0 +1,173 @@
|
|||
import numpy as np
|
||||
|
||||
|
||||
class DG:
|
||||
"""simulate a simple diesel generator"""
|
||||
|
||||
def __init__(self, parameters):
|
||||
self.current_output = None
|
||||
self.name = parameters.keys()
|
||||
self.a_factor = parameters['a']
|
||||
self.b_factor = parameters['b']
|
||||
self.c_factor = parameters['c']
|
||||
self.power_output_max = parameters['power_output_max']
|
||||
self.power_output_min = parameters['power_output_min']
|
||||
self.ramping_up = parameters['ramping_up']
|
||||
self.ramping_down = parameters['ramping_down']
|
||||
self.last_step_output = None
|
||||
|
||||
def step(self, action_gen):
|
||||
output_change = action_gen * self.ramping_up # constrain the output_change with ramping up boundary
|
||||
output = self.current_output + output_change
|
||||
if output > 0:
|
||||
output = max(self.power_output_min, min(self.power_output_max, output)) # meet the constraint
|
||||
else:
|
||||
output = 0
|
||||
self.current_output = output
|
||||
|
||||
def get_cost(self, output):
|
||||
if output <= 0:
|
||||
cost = 0
|
||||
else:
|
||||
cost = (self.a_factor * pow(output, 2) + self.b_factor * output + self.c_factor)
|
||||
return cost
|
||||
|
||||
def reset(self):
|
||||
self.current_output = 0
|
||||
|
||||
|
||||
class Battery:
|
||||
"""simulate a simple battery"""
|
||||
|
||||
def __init__(self, parameters):
|
||||
self.current_capacity = None
|
||||
self.energy_change = None
|
||||
self.capacity = parameters['capacity']
|
||||
self.max_soc = parameters['max_soc']
|
||||
self.initial_capacity = parameters['initial_capacity']
|
||||
self.min_soc = parameters['min_soc']
|
||||
self.degradation = parameters['degradation'] # degradation cost 1.2
|
||||
self.max_charge = parameters['max_charge'] # max charge ability
|
||||
self.max_discharge = parameters['max_discharge']
|
||||
self.efficiency = parameters['efficiency']
|
||||
|
||||
def step(self, action_battery):
|
||||
energy = action_battery * self.max_charge
|
||||
updated_capacity = max(self.min_soc,
|
||||
min(self.max_soc, (self.current_capacity * self.capacity + energy) / self.capacity))
|
||||
# if charge, positive, if discharge, negative
|
||||
self.energy_change = (updated_capacity - self.current_capacity) * self.capacity
|
||||
self.current_capacity = updated_capacity # update capacity to current codition
|
||||
|
||||
def get_cost(self, energy): # cost depends on the energy change
|
||||
cost = energy ** 2 * self.degradation
|
||||
return cost
|
||||
|
||||
def SOC(self):
|
||||
return self.current_capacity
|
||||
|
||||
def reset(self):
|
||||
self.current_capacity = np.random.uniform(0.2, 0.8)
|
||||
|
||||
|
||||
class Solar:
|
||||
def __init__(self, parameters):
|
||||
self.current_power = None
|
||||
self.base_voltage = parameters['V_b']
|
||||
self.sc_current = parameters['I_sc0']
|
||||
self.oc_voltage = parameters['V_oc0']
|
||||
self.s_resistance = parameters['R_s']
|
||||
self.sh_resistance = parameters['R_sh']
|
||||
self.temper_coefficient = parameters['k_v']
|
||||
self.opex_cofficient = parameters['k_o']
|
||||
self.refer_irradiance = parameters['G_ref']
|
||||
self.refer_temperature = parameters['T_ref']
|
||||
|
||||
def step(self, temperature, irradiance, action_voltage=0):
|
||||
I_sc = self.sc_current * (irradiance / self.refer_irradiance)
|
||||
V_oc = self.oc_voltage + self.temper_coefficient * (temperature - self.refer_temperature)
|
||||
|
||||
current = I_sc - (V_oc / self.sh_resistance)
|
||||
# current = I_sc
|
||||
# for _ in range(10): # 迭代次数
|
||||
# new_current = I_sc - (V_oc + current * self.s_resistance) / self.sh_resistance
|
||||
# if abs(new_current - current) < 1e-6: # 收敛条件
|
||||
# break
|
||||
# current = new_current
|
||||
self.current_power = max((1 + action_voltage) * self.base_voltage * current, 0)
|
||||
return self.current_power
|
||||
|
||||
def get_cost(self, current_power):
|
||||
cost = current_power * self.opex_cofficient
|
||||
return cost
|
||||
|
||||
def reset(self):
|
||||
self.current_power = 0
|
||||
|
||||
|
||||
class Wind:
|
||||
def __init__(self, parameters):
|
||||
self.current_power = None
|
||||
self.cutin_speed = parameters['cutin_speed']
|
||||
self.cutout_speed = parameters['cutout_speed']
|
||||
self.rated_speed = parameters['rated_speed']
|
||||
self.air_density = parameters['air_density']
|
||||
self.rotor_radius = parameters['rotor_radius']
|
||||
self.power_coefficient = parameters['power_coefficient']
|
||||
self.generator_efficiency = parameters['generator_efficiency']
|
||||
self.opex_cofficient = parameters['opex_cofficient']
|
||||
|
||||
def step(self, wind_speed):
|
||||
if self.cutin_speed <= wind_speed < self.rated_speed:
|
||||
self.current_power = (0.5 * self.air_density * self.rotor_radius ** 2 * wind_speed ** 3 *
|
||||
self.power_coefficient * self.generator_efficiency) / 1e3
|
||||
elif self.rated_speed <= wind_speed < self.cutout_speed:
|
||||
self.current_power = (0.5 * self.air_density * self.rotor_radius ** 2 * self.rated_speed ** 3 *
|
||||
self.power_coefficient * self.generator_efficiency) / 1e3
|
||||
else:
|
||||
self.current_power = 0
|
||||
|
||||
return self.current_power
|
||||
|
||||
def gen_cost(self, current_power):
|
||||
cost = current_power * self.opex_cofficient
|
||||
return cost
|
||||
|
||||
def reset(self):
|
||||
self.current_power = 0
|
||||
|
||||
|
||||
class Grid:
|
||||
def __init__(self):
|
||||
self.on = True
|
||||
self.delta = 1
|
||||
if self.on:
|
||||
self.exchange_ability = 100
|
||||
else:
|
||||
self.exchange_ability = 0
|
||||
|
||||
def get_cost(self, current_price, energy_exchange):
|
||||
return current_price * energy_exchange * self.delta
|
||||
|
||||
def retrieve_past_price(self):
|
||||
result = []
|
||||
# 过去24小时的价格起始、结束索引
|
||||
start_index = max(0, 24 * (self.day - 1) + self.time - 24)
|
||||
end_index = 24 * (self.day - 1) + self.time
|
||||
past_price = self.price[start_index:end_index]
|
||||
result.extend(past_price)
|
||||
# current_day_price = self.price[24 * self.day:24 * self.day + self.time]
|
||||
# result.extend(current_day_price)
|
||||
return result
|
||||
|
||||
# def retrive_past_price(self):
|
||||
# result = []
|
||||
# if self.day < 1:
|
||||
# past_price = self.past_price
|
||||
# else:
|
||||
# past_price = self.price[24 * (self.day - 1):24 * self.day]
|
||||
# for item in past_price[(self.time - 24)::]:
|
||||
# result.append(item)
|
||||
# for item in self.price[24 * self.day:(24 * self.day + self.time)]:
|
||||
# result.append(item)
|
||||
# return result
|
|
@ -0,0 +1,136 @@
|
|||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class Actor(nn.Module):
|
||||
def __init__(self, mid_dim, state_dim, action_dim):
|
||||
super().__init__()
|
||||
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
|
||||
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
|
||||
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
||||
nn.Linear(mid_dim, action_dim))
|
||||
|
||||
def forward(self, state):
|
||||
return self.net(state).tanh() # action.tanh()
|
||||
|
||||
def get_action(self, state, action_std):
|
||||
action = self.net(state).tanh()
|
||||
noise = (torch.randn_like(action) * action_std).clamp(-0.5, 0.5)
|
||||
return (action + noise).clamp(-1.0, 1.0)
|
||||
|
||||
|
||||
class ActorSAC(nn.Module):
|
||||
def __init__(self, mid_dim, state_dim, action_dim):
|
||||
super().__init__()
|
||||
self.net_state = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
|
||||
nn.Linear(mid_dim, mid_dim), nn.ReLU(), )
|
||||
self.net_a_avg = nn.Sequential(nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
||||
nn.Linear(mid_dim, action_dim)) # the average of action
|
||||
self.net_a_std = nn.Sequential(nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
||||
nn.Linear(mid_dim, action_dim)) # the log_std of action
|
||||
self.log_sqrt_2pi = np.log(np.sqrt(2 * np.pi))
|
||||
|
||||
def forward(self, state):
|
||||
tmp = self.net_state(state)
|
||||
return self.net_a_avg(tmp).tanh() # action
|
||||
|
||||
def get_action(self, state):
|
||||
t_tmp = self.net_state(state)
|
||||
a_avg = self.net_a_avg(t_tmp) # NOTICE! it is a_avg without .tanh()
|
||||
a_std = self.net_a_std(t_tmp).clamp(-20, 2).exp()
|
||||
return torch.normal(a_avg, a_std).tanh() # re-parameterize
|
||||
|
||||
def get_action_logprob(self, state):
|
||||
t_tmp = self.net_state(state)
|
||||
a_avg = self.net_a_avg(t_tmp) # NOTICE! it needs a_avg.tanh()
|
||||
a_std_log = self.net_a_std(t_tmp).clamp(-20, 2)
|
||||
a_std = a_std_log.exp()
|
||||
|
||||
noise = torch.randn_like(a_avg, requires_grad=True)
|
||||
a_tan = (a_avg + a_std * noise).tanh() # action.tanh()
|
||||
|
||||
log_prob = a_std_log + self.log_sqrt_2pi + noise.pow(2).__mul__(0.5) # noise.pow(2) * 0.5
|
||||
log_prob = log_prob + (-a_tan.pow(2) + 1.000001).log() # fix log_prob using the derivative of action.tanh()
|
||||
return a_tan, log_prob.sum(1, keepdim=True)
|
||||
|
||||
|
||||
class ActorPPO(nn.Module):
|
||||
def __init__(self, mid_dim, state_dim, action_dim):
|
||||
super().__init__()
|
||||
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
|
||||
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
|
||||
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
||||
nn.Linear(mid_dim, action_dim), )
|
||||
|
||||
# the logarithm (log) of standard deviation (std) of action, it is a trainable parameter
|
||||
self.a_std_log = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True)
|
||||
self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi))
|
||||
|
||||
def forward(self, state):
|
||||
return self.net(state).tanh() # action.tanh()# in this way limit the data output of action
|
||||
|
||||
def get_action(self, state):
|
||||
a_avg = self.net(state) # mean
|
||||
a_std = self.a_std_log.exp() # standard deviation
|
||||
|
||||
noise = torch.randn_like(a_avg)
|
||||
action = a_avg + noise * a_std
|
||||
return action, noise
|
||||
|
||||
def get_logprob_entropy(self, state, action):
|
||||
a_avg = self.net(state)
|
||||
a_std = self.a_std_log.exp()
|
||||
|
||||
delta = ((a_avg - action) / a_std).pow(2) * 0.5
|
||||
logprob = -(self.a_std_log + self.sqrt_2pi_log + delta).sum(1) # new_logprob
|
||||
|
||||
dist_entropy = (logprob.exp() * logprob).mean() # policy entropy
|
||||
return logprob, dist_entropy
|
||||
|
||||
def get_old_logprob(self, _action, noise): # noise = action - a_noise
|
||||
delta = noise.pow(2) * 0.5
|
||||
return -(self.a_std_log + self.sqrt_2pi_log + delta).sum(1) # old_logprob
|
||||
|
||||
|
||||
class Critic(nn.Module):
|
||||
def __init__(self, mid_dim, state_dim, action_dim):
|
||||
super().__init__()
|
||||
self.net = nn.Sequential(nn.Linear(state_dim + action_dim, mid_dim), nn.ReLU(),
|
||||
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
|
||||
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
||||
nn.Linear(mid_dim, 1))
|
||||
|
||||
def forward(self, state, action):
|
||||
return self.net(torch.cat((state, action), dim=1)) # q value
|
||||
|
||||
|
||||
class CriticAdv(nn.Module):
|
||||
def __init__(self, mid_dim, state_dim, _action_dim):
|
||||
super().__init__()
|
||||
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
|
||||
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
|
||||
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
||||
nn.Linear(mid_dim, 1))
|
||||
|
||||
def forward(self, state):
|
||||
return self.net(state) # advantage value
|
||||
|
||||
|
||||
class CriticTwin(nn.Module): # shared parameter
|
||||
def __init__(self, mid_dim, state_dim, action_dim):
|
||||
super().__init__()
|
||||
self.net_sa = nn.Sequential(nn.Linear(state_dim + action_dim, mid_dim), nn.ReLU(),
|
||||
nn.Linear(mid_dim, mid_dim), nn.ReLU()) # concat(state, action)
|
||||
self.net_q1 = nn.Sequential(nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
||||
nn.Linear(mid_dim, 1)) # q1 value
|
||||
self.net_q2 = nn.Sequential(nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
||||
nn.Linear(mid_dim, 1)) # q2 value
|
||||
|
||||
def forward(self, state, action):
|
||||
tmp = self.net_sa(torch.cat((state, action), dim=1))
|
||||
return self.net_q1(tmp) # one Q value
|
||||
|
||||
def get_q1_q2(self, state, action):
|
||||
tmp = self.net_sa(torch.cat((state, action), dim=1))
|
||||
return self.net_q1(tmp), self.net_q2(tmp) # two Q values
|
|
@ -0,0 +1,49 @@
|
|||
import numpy as np
|
||||
|
||||
solar_parameters = {
|
||||
'I_sc0': 8.0, # 参考条件下的短路电流 (A)
|
||||
'V_b': 24, # 基准电压
|
||||
'V_oc0': 36.0, # 参考条件下的开路电压 (V)
|
||||
'R_s': 0.1, # 串联电阻 (Ω)
|
||||
'R_sh': 100.0, # 并联电阻 (Ω)
|
||||
'k_v': -0.2, # 开路电压的温度系数 (V/°C)
|
||||
'k_o': 0.001, # 变动成本系数 (元/千瓦时)
|
||||
'G_ref': 1000, # 参考辐照度 (W/m²)
|
||||
'T_ref': 25, # 参考温度 (°C)
|
||||
}
|
||||
|
||||
wind_parameters = {
|
||||
'cutin_speed': 3,
|
||||
'cutout_speed': 12,
|
||||
'rated_speed': 8,
|
||||
'air_density': 1.225,
|
||||
'rotor_radius': 25,
|
||||
'power_coefficient': 0.5,
|
||||
'generator_efficiency': 0.9,
|
||||
'opex_cofficient': 0.01,
|
||||
}
|
||||
|
||||
battery_parameters = {
|
||||
'capacity': 500, # kw
|
||||
'max_charge': 100, # kw
|
||||
'max_discharge': 100, # kw
|
||||
'efficiency': 0.9,
|
||||
'degradation': 0.01, # euro/kw
|
||||
'max_soc': 0.8,
|
||||
'min_soc': 0.2,
|
||||
'initial_capacity': 0.4
|
||||
}
|
||||
|
||||
dg_parameters = {
|
||||
'gen_1': {'a': 0.0034, 'b': 3, 'c': 30, 'd': 0.03, 'e': 4.2, 'f': 0.031,
|
||||
'power_output_max': 150, 'power_output_min': 10,
|
||||
'ramping_up': 100, 'ramping_down': 100, 'min_up': 2, 'min_down': 1},
|
||||
|
||||
'gen_2': {'a': 0.001, 'b': 10, 'c': 40, 'd': 0.03, 'e': 4.2, 'f': 0.031,
|
||||
'power_output_max': 375, 'power_output_min': 50,
|
||||
'ramping_up': 100, 'ramping_down': 100, 'min_up': 2, 'min_down': 1},
|
||||
|
||||
'gen_3': {'a': 0.001, 'b': 15, 'c': 70, 'd': 0.03, 'e': 4.2, 'f': 0.031,
|
||||
'power_output_max': 500, 'power_output_min': 100,
|
||||
'ramping_up': 200, 'ramping_down': 200, 'min_up': 2, 'min_down': 1}
|
||||
}
|
|
@ -0,0 +1,150 @@
|
|||
import os
|
||||
import pickle
|
||||
import matplotlib
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
|
||||
matplotlib.rc('text', usetex=True)
|
||||
pd.options.display.notebook_repr_html = False
|
||||
|
||||
|
||||
def plot_optimization_result(datasource, directory): # data source is dataframe
|
||||
sns.set_theme(style='whitegrid') # "white", "dark", "whitegrid", "darkgrid", "ticks"
|
||||
plt.rcParams["figure.figsize"] = (16, 9)
|
||||
fig, axs = plt.subplots(2, 2)
|
||||
plt.subplots_adjust(wspace=0.7, hspace=0.3)
|
||||
plt.autoscale(tight=True)
|
||||
T = np.array([i for i in range(24)])
|
||||
# plot step cost in ax[0]
|
||||
axs[0, 0].cla()
|
||||
axs[0, 0].set_ylabel('Costs')
|
||||
axs[0, 0].set_xlabel('Time(h)')
|
||||
axs[0, 0].bar(T, datasource['step_cost'])
|
||||
# axs[0,0].set_xticks([i for i in range(24)],[i for i in range(1,25)])
|
||||
|
||||
# plot soc and price in ax[1]
|
||||
axs[0, 1].cla()
|
||||
axs[0, 1].set_ylabel('Price')
|
||||
axs[0, 1].set_xlabel('Time(h)')
|
||||
|
||||
axs[0, 1].plot(T, datasource['price'], drawstyle='steps-mid', label='Price', color='pink')
|
||||
axs[0, 1] = axs[0, 1].twinx()
|
||||
|
||||
axs[0, 1].set_ylabel('SOC')
|
||||
axs[0, 1].plot(T, datasource['soc'], drawstyle='steps-mid', label='SOC', color='grey')
|
||||
# axs[0,1].set_xticks([i for i in range(24)],[i for i in range(1,25)])
|
||||
axs[0, 1].legend(loc='upper right', fontsize=12, frameon=False, labelspacing=0.3)
|
||||
|
||||
# plot accumulated generation and consumption in ax[2]
|
||||
axs[1, 0].cla()
|
||||
axs[1, 0].set_ylabel('Outputs of DGs and Battery')
|
||||
axs[1, 0].set_xlabel('Time(h)')
|
||||
battery_positive = np.array(datasource['battery_energy_change'])
|
||||
battery_negative = np.array(datasource['battery_energy_change'])
|
||||
battery_negative = np.minimum(battery_negative, 0) # discharge
|
||||
battery_positive = np.maximum(battery_positive, 0) # charge
|
||||
# deal with power exchange within the figure
|
||||
imported_from_grid = np.array(datasource['grid_import'])
|
||||
exported_2_grid = np.array(datasource['grid_export'])
|
||||
axs[1, 0].bar(T, datasource['gen1'], label='Gen1')
|
||||
axs[1, 0].bar(T, datasource['gen2'], label='Gen2', bottom=datasource['gen1'])
|
||||
axs[1, 0].bar(T, datasource['gen3'], label='Gen3', bottom=datasource['gen2'] + datasource['gen1'])
|
||||
axs[1, 0].bar(T, -battery_positive, color='blue', hatch='/', label='ESS charge')
|
||||
axs[1, 0].bar(T, -battery_negative, hatch='/', label='ESS discharge',
|
||||
bottom=datasource['gen3'] + datasource['gen2'] + datasource['gen1'])
|
||||
# import as generate
|
||||
axs[1, 0].bar(T, imported_from_grid, label='Grid import',
|
||||
bottom=-battery_negative + datasource['gen3'] + datasource['gen2'] + datasource['gen1'])
|
||||
# export as load
|
||||
axs[1, 0].bar(T, -exported_2_grid, label='Grid export', bottom=-battery_positive)
|
||||
axs[1, 0].plot(T, datasource['netload'], label='Netload', drawstyle='steps-mid', alpha=0.7)
|
||||
axs[1, 0].legend(loc='upper right', fontsize=12, frameon=False, labelspacing=0.3)
|
||||
# axs[1,0].set_xticks([i for i in range(24)],[i for i in range(1,25)])
|
||||
# plt.show()
|
||||
fig.savefig(f"{directory}/optimization_information.svg", format='svg', dpi=600, bbox_inches='tight')
|
||||
print('optimization result has been ploted')
|
||||
|
||||
|
||||
def plot_evaluation_information(datasource, directory):
|
||||
sns.set_theme(style='whitegrid')
|
||||
with open(datasource, 'rb') as tf:
|
||||
test_data = pickle.load(tf)
|
||||
# plot unbalance, and reward of each step by bar figures
|
||||
plt.rcParams["figure.figsize"] = (16, 9)
|
||||
fig, axs = plt.subplots(2, 2)
|
||||
plt.subplots_adjust(wspace=0.7, hspace=0.3)
|
||||
plt.autoscale(tight=True)
|
||||
|
||||
# prepare data for evaluation the environment here
|
||||
eval_data = pd.DataFrame(test_data['information'])
|
||||
eval_data.columns = ['time_step', 'price', 'netload', 'action', 'real_action', 'soc', 'battery', 'gen1', 'gen2',
|
||||
'gen3', 'unbalance', 'operation_cost']
|
||||
|
||||
# plot unbalance in axs[0]
|
||||
axs[0, 0].cla()
|
||||
axs[0, 0].set_ylabel('Unbalance of Generation and Load')
|
||||
axs[0, 0].bar(eval_data['time_step'], eval_data['unbalance'], label='Exchange with Grid', width=0.4)
|
||||
axs[0, 0].bar(eval_data['time_step'] + 0.4, eval_data['netload'], label='Netload', width=0.4)
|
||||
axs[0, 0].legend(loc='upper right', fontsize=12, frameon=False, labelspacing=0.5)
|
||||
# axs[0,0].set_xticks([i for i in range(24)],[i for i in range(1,25)])
|
||||
|
||||
# plot reward in axs[1]
|
||||
axs[1, 1].cla()
|
||||
axs[1, 1].set_ylabel('Costs')
|
||||
axs[1, 1].bar(eval_data['time_step'], eval_data['operation_cost'])
|
||||
|
||||
# plot generation and netload in ax[2]
|
||||
axs[1, 0].cla()
|
||||
axs[1, 0].set_ylabel('Outputs of Units and Netload (kWh)')
|
||||
# axs[1,0].set_xticks([i for i in range(24)], [i for i in range(1, 25)])
|
||||
battery_positive = np.array(eval_data['battery'])
|
||||
battery_negative = np.array(eval_data['battery'])
|
||||
battery_positive = np.maximum(battery_positive, 0) # charge
|
||||
battery_negative = np.minimum(battery_negative, 0) # discharge
|
||||
|
||||
# deal with power exchange within the figure
|
||||
imported_from_grid = np.minimum(np.array(eval_data['unbalance']), 0)
|
||||
exported_2_grid = np.maximum(np.array(eval_data['unbalance']), 0)
|
||||
x = eval_data['time_step']
|
||||
axs[1, 0].bar(x, eval_data['gen1'], label='Gen1')
|
||||
axs[1, 0].bar(x, eval_data['gen2'], label='Gen2', bottom=eval_data['gen1'])
|
||||
axs[1, 0].bar(x, eval_data['gen3'], label='Gen3', bottom=eval_data['gen1'] + eval_data['gen2'])
|
||||
axs[1, 0].bar(x, -battery_positive, color='blue', hatch='/', label='ESS charge')
|
||||
axs[1, 0].bar(x, -battery_negative, label='ESS discharge', hatch='/',
|
||||
bottom=eval_data['gen1'] + eval_data['gen2'] + eval_data['gen3'])
|
||||
axs[1, 0].bar(x, -imported_from_grid, label='Grid import',
|
||||
bottom=eval_data['gen1'] + eval_data['gen2'] + eval_data['gen3'] - battery_negative)
|
||||
axs[1, 0].bar(x, -exported_2_grid, label='Grid export', bottom=-battery_positive)
|
||||
|
||||
axs[1, 0].plot(x, eval_data['netload'], drawstyle='steps-mid', label='Netload')
|
||||
axs[1, 0].legend(loc='upper right', fontsize=12, frameon=False, labelspacing=0.3)
|
||||
|
||||
# plot energy charge/discharge with price in ax[3].
|
||||
axs[0, 1].cla()
|
||||
axs[0, 1].set_ylabel('Price')
|
||||
axs[0, 1].set_xlabel('Time Steps')
|
||||
|
||||
axs[0, 1].plot(eval_data['time_step'], eval_data['price'], drawstyle='steps-mid', label='Price', color='pink')
|
||||
axs[0, 1] = axs[0, 1].twinx()
|
||||
axs[0, 1].set_ylabel('SOC')
|
||||
# axs[0,1].set_xticks([i for i in range(24)], [i for i in range(1, 25)])
|
||||
axs[0, 1].plot(eval_data['time_step'], eval_data['soc'], drawstyle='steps-mid', label='SOC', color='grey')
|
||||
axs[0, 1].legend(loc='upper right', fontsize=12, frameon=False, labelspacing=0.3)
|
||||
# plt.show()
|
||||
fig.savefig(f"{directory}/Evoluation Information.svg", format='svg', dpi=600, bbox_inches='tight')
|
||||
print('the evaluation figure have been plot and saved')
|
||||
|
||||
|
||||
def make_dir(directory, feature_change):
|
||||
cwd = f'{directory}/DRL_{feature_change}_plots'
|
||||
os.makedirs(cwd, exist_ok=True)
|
||||
return cwd
|
||||
|
||||
|
||||
class PlotArgs:
|
||||
def __init__(self) -> None:
|
||||
self.cwd = None
|
||||
self.feature_change = None
|
||||
self.plot_on = None
|
|
@ -0,0 +1,403 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"id": "initial_id",
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-11T06:41:37.671133500Z",
|
||||
"start_time": "2024-06-11T06:41:37.652982600Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['./data/iowa.nc']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import netCDF4 as nc\n",
|
||||
"import os\n",
|
||||
"import pandas as pd\n",
|
||||
"import datetime as dt\n",
|
||||
"nc_path = \"./data/\"\n",
|
||||
"nc_files = [f\"{nc_path}{x}\" for x in os.listdir(nc_path) if x.endswith('.nc')]\n",
|
||||
"print(nc_files)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"stations = pd.read_csv('./data/station.csv')\n",
|
||||
"stations.head()\n",
|
||||
"stations.dropna(inplace=True)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-11T06:41:37.722796200Z",
|
||||
"start_time": "2024-06-11T06:41:37.663028100Z"
|
||||
}
|
||||
},
|
||||
"id": "9c4ed0ec8dc7ff7d"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['longitude', 'latitude', 'time', 't2m']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"nc_data = nc.Dataset(nc_files[0]) # 读文件\n",
|
||||
"print(list(nc_data.variables.keys())) # keys"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-11T06:41:37.730197500Z",
|
||||
"start_time": "2024-06-11T06:41:37.678192900Z"
|
||||
}
|
||||
},
|
||||
"id": "a88ddc29d8fa3d06"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "(13, 31)"
|
||||
},
|
||||
"execution_count": 37,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"lat = np.asarray(nc_data['latitude'][:]).tolist()\n",
|
||||
"lon = np.asarray(nc_data['longitude'][:]).tolist()\n",
|
||||
"len(lat), len(lon)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-11T06:41:37.730197500Z",
|
||||
"start_time": "2024-06-11T06:41:37.693749900Z"
|
||||
}
|
||||
},
|
||||
"id": "4ec1510ef5baa4b9"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "array([1016832, 1016833, 1016834, ..., 1025613, 1025614, 1025615])"
|
||||
},
|
||||
"execution_count": 38,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"np.asarray(nc_data['time'][:])"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-11T06:41:37.731222200Z",
|
||||
"start_time": "2024-06-11T06:41:37.710255800Z"
|
||||
}
|
||||
},
|
||||
"id": "9e1cfd59ea63dee"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "<class 'netCDF4._netCDF4.Variable'>\nint32 time(time)\n units: hours since 1900-01-01 00:00:00.0\n long_name: time\n calendar: gregorian\nunlimited dimensions: \ncurrent shape = (8784,)\nfilling on, default _FillValue of -2147483647 used"
|
||||
},
|
||||
"execution_count": 39,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"nc_data['time']"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-11T06:41:37.739215300Z",
|
||||
"start_time": "2024-06-11T06:41:37.723796300Z"
|
||||
}
|
||||
},
|
||||
"id": "305b595052773792"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "datetime.datetime(1900, 1, 1, 0, 0)"
|
||||
},
|
||||
"execution_count": 40,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"start_date = dt.datetime(1900, 1, 1)\n",
|
||||
"start_date"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-11T06:41:37.788384900Z",
|
||||
"start_time": "2024-06-11T06:41:37.739215300Z"
|
||||
}
|
||||
},
|
||||
"id": "9dc1b63dbd6e5847"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "array([[[267.10449818, 266.56524854, 265.81574602, ..., 266.35826383,\n 266.94435738, 267.10231939],\n [266.75916053, 266.21773211, 265.4017766 , ..., 266.49334859,\n 267.59908269, 267.86380524],\n [266.70360148, 265.86258993, 265.06733289, ..., 266.6197182 ,\n 267.66117811, 267.95640366],\n ...,\n [270.23759303, 270.20055367, 269.91731143, ..., 270.11231282,\n 269.99683714, 269.90750689],\n [271.34223774, 271.32153927, 271.25726507, ..., 270.55569585,\n 270.37376718, 270.24086121],\n [271.76274352, 271.85643134, 271.79215714, ..., 271.32371806,\n 270.86617291, 270.71583665]],\n\n [[267.45746158, 267.17966631, 266.73846206, ..., 266.19376546,\n 266.66983029, 266.83541806],\n [267.5337191 , 267.25374505, 266.67418786, ..., 266.32231386,\n 267.30821471, 267.57620544],\n [267.67425083, 267.02933005, 266.40183956, ..., 266.40183956,\n 267.33218136, 267.62522813],\n ...,\n [269.75172367, 269.78549485, 269.62317527, ..., 270.06873709,\n 269.95652959, 269.69289643],\n [270.76159117, 270.80080932, 270.75832299, ..., 270.33237024,\n 270.18748095, 269.91186447],\n [271.10692881, 271.19516966, 271.15268333, ..., 270.63848973,\n 270.47290197, 270.30513541]],\n\n [[267.29841017, 267.25483444, 267.06745881, ..., 265.76345515,\n 266.3157775 , 266.53692432],\n [267.55441757, 267.51084184, 267.31801925, ..., 266.01728376,\n 266.99338007, 267.2613708 ],\n [267.77338561, 267.59363573, 267.37684648, ..., 266.09898825,\n 267.00318461, 267.28860563],\n ...,\n [269.32359214, 269.41945874, 269.29091034, ..., 270.08507799,\n 269.90750689, 269.55781167],\n [270.24957636, 270.34871114, 270.35088993, ..., 270.20600063,\n 270.01317803, 269.67110857],\n [270.62105944, 270.76703813, 270.76921692, ..., 270.43695199,\n 270.18857034, 270.01099925]],\n\n ...,\n\n [[271.48168007, 271.36729379, 271.24637114, ..., 270.89776532,\n 270.98164859, 270.96857587],\n [271.81830257, 271.70282689, 271.56447396, ..., 271.43701495,\n 271.53723913, 271.69084357],\n [272.18107051, 271.92179493, 271.77690563, ..., 272.04271757,\n 272.15165689, 272.3107083 ],\n ...,\n [275.28910932, 274.99388376, 274.74441272, ..., 276.07565121,\n 276.18350114, 276.35344648],\n [275.60285456, 275.36536684, 275.34575777, ..., 276.91993095,\n 276.99618847, 277.08551871],\n [275.90788466, 275.95254978, 275.9329407 , ..., 278.0169499 ,\n 277.9058318 , 277.90365301]],\n\n [[269.88462964, 269.78658425, 269.64496313, ..., 269.72231005,\n 269.85957359, 269.92602658],\n [270.1308325 , 270.03278711, 269.93147354, ..., 270.19946427,\n 270.40427019, 270.72237301],\n [270.45656107, 270.21471578, 270.10904464, ..., 270.84329566,\n 271.05463794, 271.37056197],\n ...,\n [273.1440941 , 273.02535024, 272.86738823, ..., 274.12127981,\n 274.27488425, 274.4742432 ],\n [273.5765832 , 273.50795143, 273.58638774, ..., 274.72698243,\n 274.85553083, 275.00150951],\n [273.94697689, 274.12127981, 274.19971612, ..., 275.72704539,\n 275.64207272, 275.67148634]],\n\n [[269.74954488, 269.63951617, 269.51096777, ..., 268.99459539,\n 269.11442864, 269.11224985],\n [269.87046752, 269.76043881, 269.70596915, ..., 269.16345133,\n 269.50443141, 269.90097053],\n [269.85630541, 269.81381908, 269.78222667, ..., 269.61446012,\n 269.94672505, 270.34435357],\n ...,\n [273.29007279, 273.09833959, 273.18440165, ..., 273.11794867,\n 273.21381527, 273.48943175],\n [274.379466 , 274.08641922, 273.44258784, ..., 273.57331502,\n 273.64957255, 273.81298153],\n [273.89904359, 273.55370595, 272.90769578, ..., 274.27815243,\n 274.27488425, 274.25309638]]])"
|
||||
},
|
||||
"execution_count": 41,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"np.asarray(nc_data['t2m'][:])"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-11T06:41:37.790393500Z",
|
||||
"start_time": "2024-06-11T06:41:37.756209300Z"
|
||||
}
|
||||
},
|
||||
"id": "d189f90ec07c8690"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "(8784, 13, 31)"
|
||||
},
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"np.asarray(nc_data['t2m'][:]).shape"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-11T06:41:37.862579700Z",
|
||||
"start_time": "2024-06-11T06:41:37.787379500Z"
|
||||
}
|
||||
},
|
||||
"id": "6a90a501e4374806"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def find_closest_number_index(arr, target):\n",
|
||||
" arr.sort() # 对数组进行排序\n",
|
||||
" left, right = 0, len(arr) - 1\n",
|
||||
" closest_index = 0\n",
|
||||
"\n",
|
||||
" while left <= right:\n",
|
||||
" mid = left + (right - left) // 2\n",
|
||||
" # 更新最接近数值的索引\n",
|
||||
" if abs(arr[mid] - target) < abs(arr[closest_index] - target):\n",
|
||||
" closest_index = mid\n",
|
||||
" # 根据目标值与中间值的比较,决定搜索左半部分还是右半部分\n",
|
||||
" if arr[mid] < target:\n",
|
||||
" left = mid + 1\n",
|
||||
" elif arr[mid] > target:\n",
|
||||
" right = mid - 1\n",
|
||||
" else:\n",
|
||||
" return mid # 如果找到精确匹配,直接返回索引\n",
|
||||
" return closest_index"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-11T06:41:37.870067500Z",
|
||||
"start_time": "2024-06-11T06:41:37.820315500Z"
|
||||
}
|
||||
},
|
||||
"id": "738d2236e3320758"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"target_cols = ['t2m']\n",
|
||||
"times = np.asarray(nc_data['time'][:]).tolist()\n",
|
||||
"times = [start_date + dt.timedelta(hours=x) for x in times]"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-11T06:41:37.870067500Z",
|
||||
"start_time": "2024-06-11T06:41:37.834951500Z"
|
||||
}
|
||||
},
|
||||
"id": "336150c5fe22b4db"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"time_index = pd.to_datetime(times * stations.shape[0])\n",
|
||||
"stations['lat'] = stations['lat'].astype(float)\n",
|
||||
"stations['lon'] = stations['lon'].astype(float)\n",
|
||||
"stations_out = stations.copy()"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-11T06:41:37.871070500Z",
|
||||
"start_time": "2024-06-11T06:41:37.848726Z"
|
||||
}
|
||||
},
|
||||
"id": "dfc81e6752e2c51a"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": " state lon lat lon_index lat_index best_lon best_lat\n0 iowa 93.5 41.1 30 2 -89.0 41.0",
|
||||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>state</th>\n <th>lon</th>\n <th>lat</th>\n <th>lon_index</th>\n <th>lat_index</th>\n <th>best_lon</th>\n <th>best_lat</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>iowa</td>\n <td>93.5</td>\n <td>41.1</td>\n <td>30</td>\n <td>2</td>\n <td>-89.0</td>\n <td>41.0</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||||
},
|
||||
"execution_count": 46,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"lon_index = list()\n",
|
||||
"lat_index = list()\n",
|
||||
"best_lons = list()\n",
|
||||
"best_lats = list()\n",
|
||||
"for i in range(stations.shape[0]):\n",
|
||||
" # 获得观测点所在的经纬度\n",
|
||||
" stat_lat = stations.iloc[i]['lat']\n",
|
||||
" stat_lon = stations.iloc[i]['lon']\n",
|
||||
" # 获得观测点所在经纬度对应最近的网格数据的经纬度\n",
|
||||
" best_lat_index = find_closest_number_index(lat, stat_lat)\n",
|
||||
" best_lon_index = find_closest_number_index(lon, stat_lon)\n",
|
||||
" lat_index.append(best_lat_index)\n",
|
||||
" lon_index.append(best_lon_index)\n",
|
||||
" best_lons.append(lon[best_lon_index])\n",
|
||||
" best_lats.append(lat[best_lat_index])\n",
|
||||
"stations_out['lon_index'] = lon_index\n",
|
||||
"stations_out['lat_index'] = lat_index\n",
|
||||
"stations_out['best_lon'] = best_lons\n",
|
||||
"stations_out['best_lat'] = best_lats\n",
|
||||
"stations_out"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-11T06:41:37.880042100Z",
|
||||
"start_time": "2024-06-11T06:41:37.866999300Z"
|
||||
}
|
||||
},
|
||||
"id": "3eccbc89ab82bf16"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for tgt in target_cols:\n",
|
||||
" tmp = np.asarray(nc_data[tgt][:])\n",
|
||||
" stations_out[tgt] = stations_out.apply(lambda x: tmp[:, len(lat) - x['lat_index'] - 1, x['lon_index']].squeeze(),axis=1)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-11T06:41:37.953643400Z",
|
||||
"start_time": "2024-06-11T06:41:37.882051700Z"
|
||||
}
|
||||
},
|
||||
"id": "eb2d82717535c00b"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result_df = stations_out.explode(column=target_cols).reset_index(drop=True)\n",
|
||||
"result_df['date_time'] = time_index\n",
|
||||
"result_df[['date_time', 't2m']].to_csv('./temper.csv', index=False, encoding='utf-8-sig')"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-11T06:41:37.961281200Z",
|
||||
"start_time": "2024-06-11T06:41:37.913630800Z"
|
||||
}
|
||||
},
|
||||
"id": "f347ca967bfcafce"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
|
@ -0,0 +1,111 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "initial_id",
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-11T06:48:54.873304Z",
|
||||
"start_time": "2024-06-11T06:48:54.842415500Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"pv_df = pd.read_csv('data/PV.csv', sep=';')\n",
|
||||
"temperature_df = pd.read_csv('data/temper.csv', sep=',')\n",
|
||||
"\n",
|
||||
"pv_data = pv_df['P_PV_'].apply(lambda x: x.replace(',', '.')).to_numpy(dtype=float)\n",
|
||||
"temper = temperature_df['t2m'].to_numpy(dtype=float)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "array([0., 0., 0., ..., 0., 0., 0.])"
|
||||
},
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pv_data"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-11T06:48:54.881858Z",
|
||||
"start_time": "2024-06-11T06:48:54.867302600Z"
|
||||
}
|
||||
},
|
||||
"id": "d37d5b6cd5cb7387"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "array([-3.2424931, -3.4571036, -3.5921883, ..., 3.2034465, 1.3242432,\n 0.3394317])"
|
||||
},
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"temper -= 273.15\n",
|
||||
"temper"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-11T06:48:54.906957500Z",
|
||||
"start_time": "2024-06-11T06:48:54.880858700Z"
|
||||
}
|
||||
},
|
||||
"id": "3ef2774e5245627"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"outputs": [],
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-11T06:48:54.915543800Z",
|
||||
"start_time": "2024-06-11T06:48:54.896403Z"
|
||||
}
|
||||
},
|
||||
"id": "33f9458d47973337"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
# from joblib import load
|
||||
#
|
||||
# model = load('AgentPPO/reward_data.pkl')
|
||||
#
|
||||
# data = model['mean_episode_reward']
|
||||
# print(data)
|
||||
|
||||
|
||||
# import torch
|
||||
#
|
||||
# model_path = 'AgentDDPG/actor.pth'
|
||||
#
|
||||
# checkpoint = torch.load(model_path, map_location=torch.device('cuda'))
|
||||
#
|
||||
# # 如果.pth文件保存的是整个模型(包含模型结构和参数)
|
||||
# model = checkpoint # 这里假设checkpoint直接就是模型实例,有时候可能需要model.load_state_dict(checkpoint['state_dict'])
|
||||
|
||||
# 如果.pth文件仅保存了state_dict(模型参数)
|
||||
# model = YourModelClass() # 实例化你的模型
|
||||
# model.load_state_dict(checkpoint)
|
||||
|
||||
# print(model)
|
||||
|
||||
import pickle
|
||||
|
||||
a = 'DDPG'
|
||||
b = 'PPO'
|
||||
c = 'SAC'
|
||||
d = 'TD3'
|
||||
|
||||
a1 = '/reward_data.pkl'
|
||||
a2 = '/loss_data.pkl'
|
||||
a3 = '/test_data.pkl'
|
||||
|
||||
filename = './Agent' + a + a3
|
||||
|
||||
# 使用 'rb' 模式打开文件,读取二进制数据
|
||||
with open(filename, 'rb') as f:
|
||||
data = pickle.load(f)
|
||||
|
||||
print(data)
|
|
@ -0,0 +1,285 @@
|
|||
import os
|
||||
import gurobipy as gp
|
||||
import numpy as np
|
||||
import numpy.random as rd
|
||||
import pandas as pd
|
||||
import torch
|
||||
from gurobipy import GRB
|
||||
|
||||
|
||||
def optimization_base_result(env, month, day, initial_soc):
|
||||
price = env.data_manager.get_series_price_data(month, day)
|
||||
load = env.data_manager.get_series_load_cons_data(month, day)
|
||||
temperature = env.data_manager.get_series_temperature_data(month, day)
|
||||
irradiance = env.data_manager.get_series_irradiance_data(month, day)
|
||||
wind_speed = env.data_manager.get_series_wind_data(month, day)
|
||||
|
||||
pv = [env.solar.step(temp, irr) for temp, irr in zip(temperature, irradiance)]
|
||||
wind = [env.wind.step(ws) for ws in wind_speed]
|
||||
|
||||
period = env.episode_length
|
||||
DG_parameters = env.dg_parameters
|
||||
|
||||
def get_dg_info(parameters):
|
||||
p_max = []
|
||||
p_min = []
|
||||
ramping_up = []
|
||||
ramping_down = []
|
||||
a_para = []
|
||||
b_para = []
|
||||
c_para = []
|
||||
|
||||
for name, gen_info in parameters.items():
|
||||
p_max.append(gen_info['power_output_max'])
|
||||
p_min.append(gen_info['power_output_min'])
|
||||
ramping_up.append(gen_info['ramping_up'])
|
||||
ramping_down.append(gen_info['ramping_down'])
|
||||
a_para.append(gen_info['a'])
|
||||
b_para.append(gen_info['b'])
|
||||
c_para.append(gen_info['c'])
|
||||
return p_max, p_min, ramping_up, ramping_down, a_para, b_para, c_para
|
||||
|
||||
p_max, p_min, ramping_up, ramping_down, a_para, b_para, c_para = get_dg_info(parameters=DG_parameters)
|
||||
NUM_GEN = len(DG_parameters.keys())
|
||||
battery_capacity = env.battery.capacity
|
||||
battery_efficiency = env.battery.efficiency
|
||||
|
||||
m = gp.Model("UC")
|
||||
|
||||
# set system variables
|
||||
on_off = m.addVars(NUM_GEN, period, vtype=GRB.BINARY, name='on_off')
|
||||
gen_output = m.addVars(NUM_GEN, period, vtype=GRB.CONTINUOUS, name='output')
|
||||
# set constrains for charge/discharge
|
||||
battery_energy_change = m.addVars(period, vtype=GRB.CONTINUOUS, lb=env.battery.max_discharge,
|
||||
ub=env.battery.max_charge, name='battery_action')
|
||||
# set constrains for exchange between external grid and distributed energy system
|
||||
grid_energy_import = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=env.grid.exchange_ability, name='import')
|
||||
grid_energy_export = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, ub=env.grid.exchange_ability, name='export')
|
||||
soc = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0.2, ub=0.8, name='SOC')
|
||||
|
||||
# 1. add balance constrain
|
||||
m.addConstrs(((sum(gen_output[g, t] for g in range(NUM_GEN)) + pv[t] + wind[t] + grid_energy_import[t] >= load[t] +
|
||||
battery_energy_change[t] + grid_energy_export[t]) for t in range(period)), name='powerbalance')
|
||||
# 2. add constrain for pmax pmin
|
||||
m.addConstrs((gen_output[g, t] <= on_off[g, t] * p_max[g] for g in range(NUM_GEN) for t in range(period)),
|
||||
'gen_output_max')
|
||||
m.addConstrs((gen_output[g, t] >= on_off[g, t] * p_min[g] for g in range(NUM_GEN) for t in range(period)),
|
||||
'gen_output_min')
|
||||
# 3. add constrain for ramping up ramping down
|
||||
m.addConstrs((gen_output[g, t + 1] - gen_output[g, t] <= ramping_up[g] for g in range(NUM_GEN)
|
||||
for t in range(period - 1)), 'ramping_up')
|
||||
m.addConstrs((gen_output[g, t] - gen_output[g, t + 1] <= ramping_down[g] for g in range(NUM_GEN)
|
||||
for t in range(period - 1)), 'ramping_down')
|
||||
# 4. add constrains for SOC
|
||||
m.addConstr(battery_capacity * soc[0] == battery_capacity * initial_soc +
|
||||
(battery_energy_change[0] * battery_efficiency), name='soc0')
|
||||
m.addConstrs((battery_capacity * soc[t] == battery_capacity * soc[t - 1] +
|
||||
(battery_energy_change[t] * battery_efficiency) for t in range(1, period)), name='soc update')
|
||||
# 5. add constrain for pv output
|
||||
m.addConstrs((pv[t] >= 0 for t in range(period)), name='pv_output_min')
|
||||
# set cost function
|
||||
# 1 cost of generator
|
||||
cost_gen = gp.quicksum(
|
||||
(a_para[g] * gen_output[g, t] * gen_output[g, t] + b_para[g] * gen_output[g, t] + c_para[g] * on_off[g, t]) for
|
||||
t in range(period) for g in range(NUM_GEN))
|
||||
cost_grid_import = gp.quicksum(grid_energy_import[t] * price[t] for t in range(period))
|
||||
cost_grid_export = gp.quicksum(grid_energy_export[t] * price[t] * env.sell_coefficient for t in range(period))
|
||||
|
||||
m.setObjective((cost_gen + cost_grid_import - cost_grid_export), GRB.MINIMIZE)
|
||||
m.optimize()
|
||||
|
||||
output_record = {'pv': [], 'temperature': [], 'irradiance': [], 'wind_speed': [], 'price': [], 'load': [],
|
||||
'netload': [], 'soc': [], 'battery_energy_change': [], 'grid_import': [], 'grid_export': [],
|
||||
'gen1': [], 'gen2': [], 'gen3': [], 'step_cost': []}
|
||||
for t in range(period):
|
||||
gen_cost = sum((on_off[g, t].x * (
|
||||
a_para[g] * gen_output[g, t].x * gen_output[g, t].x + b_para[g] * gen_output[g, t].x + c_para[g]))
|
||||
for g in range(NUM_GEN))
|
||||
grid_import_cost = grid_energy_import[t].x * price[t]
|
||||
grid_export_cost = grid_energy_export[t].x * price[t] * env.sell_coefficient
|
||||
output_record['pv'].append(pv[t].x)
|
||||
output_record['temperature'].append(temperature[t])
|
||||
output_record['irradiance'].append(irradiance[t])
|
||||
output_record['wind_speed'].append(wind[t])
|
||||
output_record['price'].append(price[t])
|
||||
output_record['load'].append(load[t])
|
||||
output_record['netload'].append(load[t] - pv[t].x)
|
||||
output_record['soc'].append(soc[t].x)
|
||||
output_record['battery_energy_change'].append(battery_energy_change[t].x)
|
||||
output_record['grid_import'].append(grid_energy_import[t].x)
|
||||
output_record['grid_export'].append(grid_energy_export[t].x)
|
||||
output_record['gen1'].append(gen_output[0, t].x)
|
||||
output_record['gen2'].append(gen_output[1, t].x)
|
||||
output_record['gen3'].append(gen_output[2, t].x)
|
||||
output_record['step_cost'].append(gen_cost + grid_import_cost - grid_export_cost)
|
||||
|
||||
output_record_df = pd.DataFrame.from_dict(output_record)
|
||||
return output_record_df
|
||||
|
||||
|
||||
class Arguments:
|
||||
"""revise here for our own purpose"""
|
||||
|
||||
def __init__(self, agent=None, env=None):
|
||||
self.agent = agent # Deep Reinforcement Learning algorithm
|
||||
self.env = env # the environment for training
|
||||
# self.plot_shadow_on = False # control do we need to plot all shadow figures
|
||||
self.cwd = None # current work directory. None means set automatically
|
||||
self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
|
||||
self.visible_gpu = '0,1,2,3' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
|
||||
# self.worker_num = 2 # rollout workers number pre GPU (adjust it to get high GPU usage)
|
||||
self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
|
||||
|
||||
'''Arguments for training'''
|
||||
self.num_episode = 1000
|
||||
self.gamma = 0.995 # discount factor of future rewards
|
||||
# self.reward_scale = 1 # an approximate target reward usually be closed to 256
|
||||
self.learning_rate = 2 ** -14 # 2 ** -14 ~= 6e-5
|
||||
self.soft_update_tau = 2 ** -8 # 2 ** -8 ~= 5e-3
|
||||
self.net_dim = 256 # the network width 256
|
||||
self.batch_size = 4096 # num of transitions sampled from replay buffer.
|
||||
self.repeat_times = 2 ** 5 # repeatedly update network to keep critic's loss small
|
||||
self.target_step = 4096 # collect target_step experiences, then update network, 1024
|
||||
self.max_memo = 500000 # capacity of replay buffer
|
||||
self.if_per_or_gae = False # PER for off-policy sparse reward: Prioritized Experience Replay.
|
||||
|
||||
'''Arguments for evaluate'''
|
||||
# self.eval_gap = 2 ** 6 # evaluate the agent per eval_gap seconds
|
||||
# self.eval_times = 2 # number of times that get episode return in first
|
||||
self.random_seed = 0 # initialize random seed in self.init_before_training()
|
||||
# self.random_seed_list = [1234, 2234, 3234, 4234, 5234]
|
||||
self.random_seed_list = [1234]
|
||||
'''Arguments for save and plot issues'''
|
||||
self.train = True
|
||||
self.save_network = True
|
||||
self.test_network = True
|
||||
self.save_test_data = True
|
||||
self.compare_with_pyomo = True
|
||||
self.plot_on = True
|
||||
|
||||
def init_before_training(self, if_main):
|
||||
if self.cwd is None:
|
||||
agent_name = self.agent.__class__.__name__
|
||||
self.cwd = f'./{agent_name}'
|
||||
|
||||
if if_main:
|
||||
import shutil # remove history according to bool(if_remove)
|
||||
if self.if_remove is None:
|
||||
self.if_remove = bool(input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y')
|
||||
elif self.if_remove:
|
||||
shutil.rmtree(self.cwd, ignore_errors=True)
|
||||
print(f"| Remove cwd: {self.cwd}")
|
||||
os.makedirs(self.cwd, exist_ok=True)
|
||||
|
||||
np.random.seed(self.random_seed)
|
||||
torch.manual_seed(self.random_seed)
|
||||
torch.set_num_threads(self.num_threads)
|
||||
torch.set_default_dtype(torch.float32)
|
||||
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu) # control how many GPU is used
|
||||
|
||||
|
||||
def test_one_episode(env, act, device):
|
||||
"""to get evaluate information, here record the unbalance of after taking action"""
|
||||
record_state = []
|
||||
record_action = []
|
||||
record_reward = []
|
||||
record_output = []
|
||||
record_cost = []
|
||||
record_unbalance = []
|
||||
record_system_info = [] # [time price,netload,action,real action, output*4,soc,unbalance(exchange+penalty)]
|
||||
record_init_info = [] # include month,day,time,intial soc
|
||||
env.TRAIN = False
|
||||
state = env.reset()
|
||||
record_init_info.append([env.month, env.day, env.current_time, env.battery.current_capacity])
|
||||
print(f'current testing month is {env.month}, day is {env.day},initial_soc is {env.battery.current_capacity}')
|
||||
for i in range(24):
|
||||
s_tensor = torch.as_tensor((state,), device=device)
|
||||
a_tensor = act(s_tensor)
|
||||
action = a_tensor.detach().cpu().numpy()[0] # not need detach(), because with torch.no_grad() outside
|
||||
real_action = action
|
||||
state, next_state, reward, done = env.step(action)
|
||||
|
||||
record_system_info.append([state[0], state[1], state[3], action, real_action, env.battery.SOC(),
|
||||
env.battery.energy_change, next_state[4], next_state[5], next_state[6],
|
||||
next_state[7], next_state[8], env.unbalance, env.operation_cost])
|
||||
record_state.append(state)
|
||||
record_action.append(real_action)
|
||||
record_reward.append(reward)
|
||||
record_output.append(env.current_output)
|
||||
record_unbalance.append(env.unbalance)
|
||||
state = next_state
|
||||
# add information of last step dg1, dh2, dg3, soc
|
||||
record_system_info[-1][7:10] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2]]
|
||||
record_system_info[-1][5] = env.final_step_outputs[3]
|
||||
record = {'init_info': record_init_info, 'system_info': record_system_info, 'state': record_state,
|
||||
'action': record_action, 'reward': record_reward, 'cost': record_cost, 'unbalance': record_unbalance,
|
||||
'record_output': record_output}
|
||||
return record
|
||||
|
||||
|
||||
def get_episode_return(env, act, device):
|
||||
episode_return = 0.0 # sum of rewards in an episode
|
||||
episode_unbalance = 0.0
|
||||
state = env.reset()
|
||||
for i in range(24):
|
||||
s_tensor = torch.as_tensor((state,), device=device)
|
||||
a_tensor = act(s_tensor)
|
||||
action = a_tensor.detach().cpu().numpy()[0] # not need detach(), because with torch.no_grad() outside
|
||||
state, next_state, reward, done, = env.step(action)
|
||||
state = next_state
|
||||
episode_return += reward
|
||||
episode_unbalance += env.real_unbalance
|
||||
if done:
|
||||
break
|
||||
return episode_return, episode_unbalance
|
||||
|
||||
|
||||
class ReplayBuffer:
|
||||
def __init__(self, max_len, state_dim, action_dim, gpu_id=0):
|
||||
self.now_len = 0
|
||||
self.next_idx = 0
|
||||
self.if_full = False
|
||||
self.max_len = max_len
|
||||
self.data_type = torch.float32
|
||||
self.action_dim = action_dim
|
||||
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
|
||||
|
||||
other_dim = 1 + 1 + self.action_dim
|
||||
self.buf_other = torch.empty(size=(max_len, other_dim), dtype=self.data_type, device=self.device)
|
||||
|
||||
if isinstance(state_dim, int): # state is pixel
|
||||
self.buf_state = torch.empty((max_len, state_dim), dtype=torch.float32, device=self.device)
|
||||
elif isinstance(state_dim, tuple):
|
||||
self.buf_state = torch.empty((max_len, *state_dim), dtype=torch.uint8, device=self.device)
|
||||
else:
|
||||
raise ValueError('state_dim')
|
||||
|
||||
def extend_buffer(self, state, other): # CPU array to CPU array
|
||||
size = len(other)
|
||||
next_idx = self.next_idx + size
|
||||
|
||||
if next_idx > self.max_len:
|
||||
self.buf_state[self.next_idx:self.max_len] = state[:self.max_len - self.next_idx]
|
||||
self.buf_other[self.next_idx:self.max_len] = other[:self.max_len - self.next_idx]
|
||||
self.if_full = True
|
||||
|
||||
next_idx = next_idx - self.max_len
|
||||
self.buf_state[0:next_idx] = state[-next_idx:]
|
||||
self.buf_other[0:next_idx] = other[-next_idx:]
|
||||
else:
|
||||
self.buf_state[self.next_idx:next_idx] = state
|
||||
self.buf_other[self.next_idx:next_idx] = other
|
||||
self.next_idx = next_idx
|
||||
|
||||
def sample_batch(self, batch_size) -> tuple:
|
||||
indices = rd.randint(self.now_len - 1, size=batch_size)
|
||||
r_m_a = self.buf_other[indices]
|
||||
return (r_m_a[:, 0:1],
|
||||
r_m_a[:, 1:2],
|
||||
r_m_a[:, 2:],
|
||||
self.buf_state[indices],
|
||||
self.buf_state[indices + 1])
|
||||
|
||||
def update_now_len(self):
|
||||
self.now_len = self.max_len if self.if_full else self.next_idx
|