nothing
This commit is contained in:
parent
9d0b220b54
commit
0fd80456ac
File diff suppressed because it is too large
Load Diff
Before Width: | Height: | Size: 142 KiB After Width: | Height: | Size: 141 KiB |
File diff suppressed because it is too large
Load Diff
Before Width: | Height: | Size: 127 KiB After Width: | Height: | Size: 125 KiB |
13
agent.py
13
agent.py
|
@ -19,7 +19,7 @@ class AgentBase:
|
||||||
self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
|
self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
|
||||||
|
|
||||||
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, _if_per_or_gae=False, gpu_id=0):
|
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, _if_per_or_gae=False, gpu_id=0):
|
||||||
# explict call self.init() for multiprocessing
|
# 显式调用self.init()进行多进程
|
||||||
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
|
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
|
||||||
self.action_dim = action_dim
|
self.action_dim = action_dim
|
||||||
|
|
||||||
|
@ -81,7 +81,7 @@ class AgentBase:
|
||||||
class AgentDDPG(AgentBase):
|
class AgentDDPG(AgentBase):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.explore_noise = 0.1 # explore noise of action
|
self.explore_noise = 0.1
|
||||||
self.if_use_cri_target = self.if_use_act_target = True
|
self.if_use_cri_target = self.if_use_act_target = True
|
||||||
self.ClassCri = Critic
|
self.ClassCri = Critic
|
||||||
self.ClassAct = Actor
|
self.ClassAct = Actor
|
||||||
|
@ -163,7 +163,7 @@ class AgentSAC(AgentBase):
|
||||||
super().init(net_dim, state_dim, action_dim, learning_rate, _if_use_per, gpu_id)
|
super().init(net_dim, state_dim, action_dim, learning_rate, _if_use_per, gpu_id)
|
||||||
|
|
||||||
self.alpha_log = torch.tensor((-np.log(action_dim) * np.e,), dtype=torch.float32,
|
self.alpha_log = torch.tensor((-np.log(action_dim) * np.e,), dtype=torch.float32,
|
||||||
requires_grad=True, device=self.device) # trainable parameter
|
requires_grad=True, device=self.device)
|
||||||
self.alpha_optim = torch.optim.Adam((self.alpha_log,), lr=learning_rate)
|
self.alpha_optim = torch.optim.Adam((self.alpha_log,), lr=learning_rate)
|
||||||
self.target_entropy = np.log(action_dim)
|
self.target_entropy = np.log(action_dim)
|
||||||
|
|
||||||
|
@ -250,10 +250,9 @@ class AgentPPO(AgentBase):
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
buf_len = buffer[0].shape[0]
|
buf_len = buffer[0].shape[0]
|
||||||
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
|
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
|
||||||
# (ten_state, ten_action, ten_noise, ten_reward, ten_mask) = buffer
|
|
||||||
|
|
||||||
'''get buf_r_sum, buf_logprob'''
|
'''get buf_r_sum, buf_logprob'''
|
||||||
bs = 2 ** 10 # set a smaller 'BatchSize' when out of GPU memory.
|
bs = 2 ** 10
|
||||||
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
|
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
|
||||||
buf_value = torch.cat(buf_value, dim=0)
|
buf_value = torch.cat(buf_value, dim=0)
|
||||||
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
|
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
|
||||||
|
@ -273,7 +272,7 @@ class AgentPPO(AgentBase):
|
||||||
logprob = buf_logprob[indices]
|
logprob = buf_logprob[indices]
|
||||||
advantage = buf_advantage[indices]
|
advantage = buf_advantage[indices]
|
||||||
|
|
||||||
new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action) # it is obj_actor
|
new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action) # it's obj_actor
|
||||||
ratio = (new_logprob - logprob.detach()).exp()
|
ratio = (new_logprob - logprob.detach()).exp()
|
||||||
surrogate1 = advantage * ratio
|
surrogate1 = advantage * ratio
|
||||||
surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
|
surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
|
||||||
|
@ -309,6 +308,6 @@ class AgentPPO(AgentBase):
|
||||||
for i in range(buf_len - 1, -1, -1):
|
for i in range(buf_len - 1, -1, -1):
|
||||||
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
|
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
|
||||||
pre_r_sum = buf_r_sum[i]
|
pre_r_sum = buf_r_sum[i]
|
||||||
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i]) # fix a bug here
|
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i])
|
||||||
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
|
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
|
||||||
return buf_r_sum, buf_advantage
|
return buf_r_sum, buf_advantage
|
||||||
|
|
|
@ -28,8 +28,8 @@ class ESSEnv(gym.Env):
|
||||||
self.dg_parameters = kwargs.get('dg_parameters', dg_parameters)
|
self.dg_parameters = kwargs.get('dg_parameters', dg_parameters)
|
||||||
self.solar_parameters = kwargs.get('solar_parameters', solar_parameters)
|
self.solar_parameters = kwargs.get('solar_parameters', solar_parameters)
|
||||||
self.wind_parameters = kwargs.get('wind_parameters', wind_parameters)
|
self.wind_parameters = kwargs.get('wind_parameters', wind_parameters)
|
||||||
self.penalty_coefficient = 50 # control soft penalty constrain
|
self.penalty_coefficient = 50 # 约束惩罚系数
|
||||||
self.sell_coefficient = 0.5 # control sell benefits
|
self.sell_coefficient = 0.5 # 售出利润系数
|
||||||
|
|
||||||
self.grid = Grid()
|
self.grid = Grid()
|
||||||
self.battery = Battery(self.battery_parameters)
|
self.battery = Battery(self.battery_parameters)
|
||||||
|
@ -81,12 +81,12 @@ class ESSEnv(gym.Env):
|
||||||
return obs
|
return obs
|
||||||
|
|
||||||
def step(self, action): # state transition: current_obs->take_action->get_reward->get_finish->next_obs
|
def step(self, action): # state transition: current_obs->take_action->get_reward->get_finish->next_obs
|
||||||
# put action into each component
|
# 在每个组件中添加动作
|
||||||
current_obs = self._build_state()
|
current_obs = self._build_state()
|
||||||
temperature = current_obs[7]
|
temperature = current_obs[7]
|
||||||
irradiance = current_obs[8]
|
irradiance = current_obs[8]
|
||||||
wind_speed = current_obs[9]
|
wind_speed = current_obs[9]
|
||||||
self.battery.step(action[0]) # execute the state-transition part, battery.current_capacity also changed
|
self.battery.step(action[0]) # 执行状态转换,电池当前容量也改变
|
||||||
self.dg1.step(action[1])
|
self.dg1.step(action[1])
|
||||||
self.dg2.step(action[2])
|
self.dg2.step(action[2])
|
||||||
self.dg3.step(action[3])
|
self.dg3.step(action[3])
|
||||||
|
@ -106,16 +106,16 @@ class ESSEnv(gym.Env):
|
||||||
buy_cost = 0
|
buy_cost = 0
|
||||||
self.excess = 0
|
self.excess = 0
|
||||||
self.shedding = 0
|
self.shedding = 0
|
||||||
if unbalance >= 0: # now in excess condition
|
if unbalance >= 0: # 现在过剩
|
||||||
if unbalance <= self.grid.exchange_ability:
|
if unbalance <= self.grid.exchange_ability:
|
||||||
# sell money to grid is little [0.029,0.1]
|
# sell money to grid is little [0.029,0.1]
|
||||||
sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient
|
sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient
|
||||||
else:
|
else:
|
||||||
sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient
|
sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient
|
||||||
# real unbalance that grid could not meet
|
# real unbalance:电网也无法满足
|
||||||
self.excess = unbalance - self.grid.exchange_ability
|
self.excess = unbalance - self.grid.exchange_ability
|
||||||
excess_penalty = self.excess * self.penalty_coefficient
|
excess_penalty = self.excess * self.penalty_coefficient
|
||||||
else: # unbalance <0, its load shedding model, deficient penalty is used
|
else: # unbalance <0, 采用缺少惩罚
|
||||||
if abs(unbalance) <= self.grid.exchange_ability:
|
if abs(unbalance) <= self.grid.exchange_ability:
|
||||||
buy_cost = self.grid.get_cost(price, abs(unbalance))
|
buy_cost = self.grid.get_cost(price, abs(unbalance))
|
||||||
else:
|
else:
|
||||||
|
@ -164,7 +164,7 @@ class ESSEnv(gym.Env):
|
||||||
temperature = temperature_df['t2m'].to_numpy(dtype=float)
|
temperature = temperature_df['t2m'].to_numpy(dtype=float)
|
||||||
wind = wind_df['wind_speed'].to_numpy(dtype=float)
|
wind = wind_df['wind_speed'].to_numpy(dtype=float)
|
||||||
|
|
||||||
'''redesign the magnitude for price and amount of generation as well as demand'''
|
'''重新设计价格和发电量以及需求的大小'''
|
||||||
def process_elements(elements, transform_function, add_function):
|
def process_elements(elements, transform_function, add_function):
|
||||||
for element in elements:
|
for element in elements:
|
||||||
transformed_element = transform_function(element)
|
transformed_element = transform_function(element)
|
||||||
|
|
19
module.py
19
module.py
|
@ -2,7 +2,7 @@ import numpy as np
|
||||||
|
|
||||||
|
|
||||||
class DG:
|
class DG:
|
||||||
"""simulate a simple diesel generator"""
|
"""simulate a diesel generator"""
|
||||||
|
|
||||||
def __init__(self, parameters):
|
def __init__(self, parameters):
|
||||||
self.current_output = None
|
self.current_output = None
|
||||||
|
@ -37,7 +37,7 @@ class DG:
|
||||||
|
|
||||||
|
|
||||||
class Battery:
|
class Battery:
|
||||||
"""simulate a simple battery"""
|
"""simulate a battery"""
|
||||||
|
|
||||||
def __init__(self, parameters):
|
def __init__(self, parameters):
|
||||||
self.current_capacity = None
|
self.current_capacity = None
|
||||||
|
@ -71,6 +71,7 @@ class Battery:
|
||||||
|
|
||||||
|
|
||||||
class Solar:
|
class Solar:
|
||||||
|
"""simulate a solar panel"""
|
||||||
def __init__(self, parameters):
|
def __init__(self, parameters):
|
||||||
self.current_power = None
|
self.current_power = None
|
||||||
self.base_voltage = parameters['V_b']
|
self.base_voltage = parameters['V_b']
|
||||||
|
@ -106,6 +107,7 @@ class Solar:
|
||||||
|
|
||||||
|
|
||||||
class Wind:
|
class Wind:
|
||||||
|
"""simulate a wind turbine"""
|
||||||
def __init__(self, parameters):
|
def __init__(self, parameters):
|
||||||
self.current_power = None
|
self.current_power = None
|
||||||
self.cutin_speed = parameters['cutin_speed']
|
self.cutin_speed = parameters['cutin_speed']
|
||||||
|
@ -137,6 +139,7 @@ class Wind:
|
||||||
|
|
||||||
|
|
||||||
class Grid:
|
class Grid:
|
||||||
|
"""simulate a grid"""
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.on = True
|
self.on = True
|
||||||
self.delta = 1
|
self.delta = 1
|
||||||
|
@ -158,15 +161,3 @@ class Grid:
|
||||||
# current_day_price = self.price[24 * self.day:24 * self.day + self.time]
|
# current_day_price = self.price[24 * self.day:24 * self.day + self.time]
|
||||||
# result.extend(current_day_price)
|
# result.extend(current_day_price)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# def retrive_past_price(self):
|
|
||||||
# result = []
|
|
||||||
# if self.day < 1:
|
|
||||||
# past_price = self.past_price
|
|
||||||
# else:
|
|
||||||
# past_price = self.price[24 * (self.day - 1):24 * self.day]
|
|
||||||
# for item in past_price[(self.time - 24)::]:
|
|
||||||
# result.append(item)
|
|
||||||
# for item in self.price[24 * self.day:(24 * self.day + self.time)]:
|
|
||||||
# result.append(item)
|
|
||||||
# return result
|
|
||||||
|
|
|
@ -17,6 +17,7 @@ def plot_optimization_result(datasource, directory): # data source is dataframe
|
||||||
plt.subplots_adjust(wspace=0.7, hspace=0.3)
|
plt.subplots_adjust(wspace=0.7, hspace=0.3)
|
||||||
plt.autoscale(tight=True)
|
plt.autoscale(tight=True)
|
||||||
T = np.array([i for i in range(24)])
|
T = np.array([i for i in range(24)])
|
||||||
|
|
||||||
# plot step cost in ax[0]
|
# plot step cost in ax[0]
|
||||||
axs[0, 0].cla()
|
axs[0, 0].cla()
|
||||||
axs[0, 0].set_ylabel('Costs')
|
axs[0, 0].set_ylabel('Costs')
|
||||||
|
|
10
tools.py
10
tools.py
|
@ -116,13 +116,11 @@ class Arguments:
|
||||||
"""revise here for our own purpose"""
|
"""revise here for our own purpose"""
|
||||||
|
|
||||||
def __init__(self, agent=None, env=None):
|
def __init__(self, agent=None, env=None):
|
||||||
self.agent = agent # Deep Reinforcement Learning algorithm
|
self.agent = agent
|
||||||
self.env = env # the environment for training
|
self.env = env
|
||||||
# self.plot_shadow_on = False # control do we need to plot all shadow figures
|
self.cwd = None # current work directory, None means set automatically
|
||||||
self.cwd = None # current work directory. None means set automatically
|
|
||||||
self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
|
self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
|
||||||
self.visible_gpu = '0,1,2,3' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
|
self.visible_gpu = '0,1,2,3' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
|
||||||
# self.worker_num = 2 # rollout workers number pre GPU (adjust it to get high GPU usage)
|
|
||||||
self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
|
self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
|
||||||
|
|
||||||
'''Arguments for training'''
|
'''Arguments for training'''
|
||||||
|
@ -171,7 +169,7 @@ class Arguments:
|
||||||
torch.set_num_threads(self.num_threads)
|
torch.set_num_threads(self.num_threads)
|
||||||
torch.set_default_dtype(torch.float32)
|
torch.set_default_dtype(torch.float32)
|
||||||
|
|
||||||
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu) # control how many GPU is used
|
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
|
||||||
|
|
||||||
|
|
||||||
def test_one_episode(env, act, device):
|
def test_one_episode(env, act, device):
|
||||||
|
|
Loading…
Reference in New Issue