edit the layer normalization
This commit is contained in:
parent
ceff6e0ffe
commit
53d3ac9ca8
|
@ -2,7 +2,7 @@
|
||||||
<module type="PYTHON_MODULE" version="4">
|
<module type="PYTHON_MODULE" version="4">
|
||||||
<component name="NewModuleRootManager">
|
<component name="NewModuleRootManager">
|
||||||
<content url="file://$MODULE_DIR$" />
|
<content url="file://$MODULE_DIR$" />
|
||||||
<orderEntry type="jdk" jdkName="rl-microgrid" jdkType="Python SDK" />
|
<orderEntry type="jdk" jdkName="Remote Python 3.9.18 (sftp://chenxd@124.16.151.196:22121/home/chenxd/miniconda3/envs/grid/bin/python3.9)" jdkType="Python SDK" />
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
</component>
|
</component>
|
||||||
<component name="PyDocumentationSettings">
|
<component name="PyDocumentationSettings">
|
||||||
|
|
|
@ -0,0 +1,22 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="PublishConfigData" autoUpload="Always" serverName="chenxd@124.16.151.196:22121 password" remoteFilesAllowedToDisappearOnAutoupload="false">
|
||||||
|
<serverData>
|
||||||
|
<paths name="chenxd@124.16.151.196:22121 password">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping deploy="/tmp/pycharm_project_78" local="$PROJECT_DIR$" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@124.16.151.196:10531 password">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping deploy="/mnt/chenxd/DRL-for-Energy-Systems" local="$PROJECT_DIR$" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
</serverData>
|
||||||
|
<option name="myAutoUpload" value="ALWAYS" />
|
||||||
|
</component>
|
||||||
|
</project>
|
|
@ -3,5 +3,5 @@
|
||||||
<component name="Black">
|
<component name="Black">
|
||||||
<option name="sdkName" value="rl-microgrid" />
|
<option name="sdkName" value="rl-microgrid" />
|
||||||
</component>
|
</component>
|
||||||
<component name="ProjectRootManager" version="2" project-jdk-name="rl-microgrid" project-jdk-type="Python SDK" />
|
<component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.9.18 (sftp://chenxd@124.16.151.196:22121/home/chenxd/miniconda3/envs/grid/bin/python3.9)" project-jdk-type="Python SDK" />
|
||||||
</project>
|
</project>
|
64
PPO.py
64
PPO.py
|
@ -12,33 +12,34 @@ os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
|
||||||
script_name = os.path.basename(__file__)
|
script_name = os.path.basename(__file__)
|
||||||
|
|
||||||
|
|
||||||
# after adding layer normalization, it doesn't work
|
|
||||||
class ActorPPO(nn.Module):
|
class ActorPPO(nn.Module):
|
||||||
def __init__(self, mid_dim, state_dim, action_dim, layer_norm=False):
|
def __init__(self, mid_dim, state_dim, action_dim, layer_norm=False):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
self.layer_norm = layer_norm
|
||||||
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
|
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
|
||||||
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
|
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
|
||||||
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
||||||
nn.Linear(mid_dim, action_dim), )
|
nn.Linear(mid_dim, action_dim))
|
||||||
|
|
||||||
# the logarithm (log) of standard deviation (std) of action, it is a trainable parameter
|
# the logarithm (log) of standard deviation (std) of action, it is a trainable parameter
|
||||||
self.a_logstd = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True)
|
self.a_logstd = nn.Parameter(torch.zeros((1, action_dim)) - 0.5, requires_grad=True)
|
||||||
self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi))
|
self.sqrt_2pi_log = np.log(np.sqrt(2 * np.pi))
|
||||||
if layer_norm:
|
|
||||||
self.layer_norm(self.net)
|
|
||||||
|
|
||||||
@staticmethod
|
if self.layer_norm:
|
||||||
def layer_norm(layer, std=1.0, bias_const=0.0):
|
self.apply_layer_norm()
|
||||||
for i in layer:
|
|
||||||
if hasattr(i, 'weight'):
|
def apply_layer_norm(self):
|
||||||
torch.nn.init.orthogonal_(i.weight, std)
|
def init_weights(layer):
|
||||||
torch.nn.init.constant_(i.bias, bias_const)
|
if isinstance(layer, nn.Linear):
|
||||||
|
nn.init.orthogonal_(layer.weight, 1.0)
|
||||||
|
nn.init.constant_(layer.bias, 0.0)
|
||||||
|
|
||||||
|
self.net.apply(init_weights)
|
||||||
|
|
||||||
def forward(self, state):
|
def forward(self, state):
|
||||||
return self.net(state).tanh() # action.tanh() # in this way limit the data output of action
|
return self.net(state).tanh() # action.tanh() limit the data output of action
|
||||||
|
|
||||||
def get_action(self, state):
|
def get_action(self, state):
|
||||||
a_avg = self.net(state) # too big for the action
|
a_avg = self.forward(state) # too big for the action
|
||||||
a_std = self.a_logstd.exp()
|
a_std = self.a_logstd.exp()
|
||||||
|
|
||||||
noise = torch.randn_like(a_avg)
|
noise = torch.randn_like(a_avg)
|
||||||
|
@ -46,7 +47,7 @@ class ActorPPO(nn.Module):
|
||||||
return action, noise
|
return action, noise
|
||||||
|
|
||||||
def get_logprob_entropy(self, state, action):
|
def get_logprob_entropy(self, state, action):
|
||||||
a_avg = self.net(state)
|
a_avg = self.forward(state)
|
||||||
a_std = self.a_logstd.exp()
|
a_std = self.a_logstd.exp()
|
||||||
|
|
||||||
delta = ((a_avg - action) / a_std).pow(2) * 0.5
|
delta = ((a_avg - action) / a_std).pow(2) * 0.5
|
||||||
|
@ -63,19 +64,21 @@ class ActorPPO(nn.Module):
|
||||||
class CriticAdv(nn.Module):
|
class CriticAdv(nn.Module):
|
||||||
def __init__(self, mid_dim, state_dim, _action_dim, layer_norm=False):
|
def __init__(self, mid_dim, state_dim, _action_dim, layer_norm=False):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
self.layer_norm = layer_norm
|
||||||
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
|
self.net = nn.Sequential(nn.Linear(state_dim, mid_dim), nn.ReLU(),
|
||||||
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
|
nn.Linear(mid_dim, mid_dim), nn.ReLU(),
|
||||||
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
nn.Linear(mid_dim, mid_dim), nn.Hardswish(),
|
||||||
nn.Linear(mid_dim, 1))
|
nn.Linear(mid_dim, 1))
|
||||||
if layer_norm:
|
if self.layer_norm:
|
||||||
self.layer_norm(self.net, std=1.0)
|
self.apply_layer_norm()
|
||||||
|
|
||||||
@staticmethod
|
def apply_layer_norm(self):
|
||||||
def layer_norm(layer, std=1.0, bias_const=0.0):
|
def init_weights(layer):
|
||||||
for i in layer:
|
if isinstance(layer, nn.Linear):
|
||||||
if hasattr(i, 'weight'):
|
nn.init.orthogonal_(layer.weight, 1.0)
|
||||||
torch.nn.init.orthogonal_(i.weight, std)
|
nn.init.constant_(layer.bias, 0.0)
|
||||||
torch.nn.init.constant_(i.bias, bias_const)
|
|
||||||
|
self.net.apply(init_weights)
|
||||||
|
|
||||||
def forward(self, state):
|
def forward(self, state):
|
||||||
return self.net(state) # Advantage value
|
return self.net(state) # Advantage value
|
||||||
|
@ -116,7 +119,6 @@ class AgentPPO:
|
||||||
|
|
||||||
self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
|
self.cri_optim = torch.optim.Adam(self.cri.parameters(), learning_rate)
|
||||||
self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
|
self.act_optim = torch.optim.Adam(self.act.parameters(), learning_rate) if self.ClassAct else self.cri
|
||||||
del self.ClassCri, self.ClassAct # why del self.ClassCri and self.ClassAct here, to save memory?
|
|
||||||
|
|
||||||
def select_action(self, state):
|
def select_action(self, state):
|
||||||
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
|
states = torch.as_tensor((state,), dtype=torch.float32, device=self.device)
|
||||||
|
@ -129,8 +131,8 @@ class AgentPPO:
|
||||||
last_done = 0
|
last_done = 0
|
||||||
for i in range(target_step):
|
for i in range(target_step):
|
||||||
action, noise = self.select_action(state)
|
action, noise = self.select_action(state)
|
||||||
state, next_state, reward, done, = env.step(
|
# the step of cut action is finally organized into the environment
|
||||||
np.tanh(action)) # the step of cut action is finally organized into the environment.
|
state, next_state, reward, done, = env.step(np.tanh(action))
|
||||||
trajectory_temp.append((state, reward, done, action, noise))
|
trajectory_temp.append((state, reward, done, action, noise))
|
||||||
if done:
|
if done:
|
||||||
state = env.reset()
|
state = env.reset()
|
||||||
|
@ -140,8 +142,8 @@ class AgentPPO:
|
||||||
self.state = state
|
self.state = state
|
||||||
|
|
||||||
'''splice list'''
|
'''splice list'''
|
||||||
trajectory_list = self.trajectory_list + trajectory_temp[
|
# store 0 trajectory information to list
|
||||||
:last_done + 1] # store 0 trajectory information to the list
|
trajectory_list = self.trajectory_list + trajectory_temp[:last_done + 1]
|
||||||
self.trajectory_list = trajectory_temp[last_done:]
|
self.trajectory_list = trajectory_temp[last_done:]
|
||||||
return trajectory_list
|
return trajectory_list
|
||||||
|
|
||||||
|
@ -149,12 +151,12 @@ class AgentPPO:
|
||||||
"""put data extract and update network together"""
|
"""put data extract and update network together"""
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
buf_len = buffer[0].shape[0]
|
buf_len = buffer[0].shape[0]
|
||||||
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in
|
# decompose buffer data
|
||||||
buffer] # decompose buffer data
|
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
|
||||||
|
|
||||||
'''get buf_r_sum, buf_logprob'''
|
'''get buf_r_sum, buf_logprob'''
|
||||||
bs = 4096 # set a smaller 'BatchSize' when out of GPU memory: 1024, could change to 4096
|
bs = 4096 # set a smaller 'BatchSize' when out of GPU memory: 1024, could change to 4096
|
||||||
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)] #
|
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
|
||||||
buf_value = torch.cat(buf_value, dim=0)
|
buf_value = torch.cat(buf_value, dim=0)
|
||||||
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
|
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
|
||||||
|
|
||||||
|
@ -317,7 +319,7 @@ if __name__ == '__main__':
|
||||||
agent = args.agent
|
agent = args.agent
|
||||||
env = args.env
|
env = args.env
|
||||||
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
||||||
args.if_per_or_gae)
|
args.if_per_or_gae, layer_norm=True)
|
||||||
|
|
||||||
cwd = args.cwd
|
cwd = args.cwd
|
||||||
gamma = args.gamma
|
gamma = args.gamma
|
||||||
|
|
1
tools.py
1
tools.py
|
@ -49,7 +49,6 @@ def optimization_base_result(env, month, day, initial_soc):
|
||||||
# 设置系统变量
|
# 设置系统变量
|
||||||
on_off = m.addVars(NUM_GEN, period, vtype=GRB.BINARY, name='on_off')
|
on_off = m.addVars(NUM_GEN, period, vtype=GRB.BINARY, name='on_off')
|
||||||
gen_output = m.addVars(NUM_GEN, period, vtype=GRB.CONTINUOUS, name='output')
|
gen_output = m.addVars(NUM_GEN, period, vtype=GRB.CONTINUOUS, name='output')
|
||||||
pv = m.addVars(period, vtype=GRB.CONTINUOUS, lb=0, name='pv')
|
|
||||||
# 设置充放电约束
|
# 设置充放电约束
|
||||||
battery_energy_change = m.addVars(period, vtype=GRB.CONTINUOUS, lb=env.battery.max_discharge,
|
battery_energy_change = m.addVars(period, vtype=GRB.CONTINUOUS, lb=env.battery.max_discharge,
|
||||||
ub=env.battery.max_charge, name='battery_action')
|
ub=env.battery.max_charge, name='battery_action')
|
||||||
|
|
Loading…
Reference in New Issue