This commit is contained in:
chenxiaodong 2024-06-25 14:18:29 +08:00
parent 9d0b220b54
commit 0fd80456ac
7 changed files with 4172 additions and 4191 deletions

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 142 KiB

After

Width:  |  Height:  |  Size: 141 KiB

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 127 KiB

After

Width:  |  Height:  |  Size: 125 KiB

View File

@ -19,7 +19,7 @@ class AgentBase:
self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, _if_per_or_gae=False, gpu_id=0):
# explict call self.init() for multiprocessing
# 显式调用self.init()进行多进程
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
self.action_dim = action_dim
@ -81,7 +81,7 @@ class AgentBase:
class AgentDDPG(AgentBase):
def __init__(self):
super().__init__()
self.explore_noise = 0.1 # explore noise of action
self.explore_noise = 0.1
self.if_use_cri_target = self.if_use_act_target = True
self.ClassCri = Critic
self.ClassAct = Actor
@ -163,7 +163,7 @@ class AgentSAC(AgentBase):
super().init(net_dim, state_dim, action_dim, learning_rate, _if_use_per, gpu_id)
self.alpha_log = torch.tensor((-np.log(action_dim) * np.e,), dtype=torch.float32,
requires_grad=True, device=self.device) # trainable parameter
requires_grad=True, device=self.device)
self.alpha_optim = torch.optim.Adam((self.alpha_log,), lr=learning_rate)
self.target_entropy = np.log(action_dim)
@ -250,10 +250,9 @@ class AgentPPO(AgentBase):
with torch.no_grad():
buf_len = buffer[0].shape[0]
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
# (ten_state, ten_action, ten_noise, ten_reward, ten_mask) = buffer
'''get buf_r_sum, buf_logprob'''
bs = 2 ** 10 # set a smaller 'BatchSize' when out of GPU memory.
bs = 2 ** 10
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
buf_value = torch.cat(buf_value, dim=0)
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
@ -273,7 +272,7 @@ class AgentPPO(AgentBase):
logprob = buf_logprob[indices]
advantage = buf_advantage[indices]
new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action) # it is obj_actor
new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action) # it's obj_actor
ratio = (new_logprob - logprob.detach()).exp()
surrogate1 = advantage * ratio
surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
@ -309,6 +308,6 @@ class AgentPPO(AgentBase):
for i in range(buf_len - 1, -1, -1):
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
pre_r_sum = buf_r_sum[i]
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i]) # fix a bug here
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i])
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
return buf_r_sum, buf_advantage

View File

@ -28,8 +28,8 @@ class ESSEnv(gym.Env):
self.dg_parameters = kwargs.get('dg_parameters', dg_parameters)
self.solar_parameters = kwargs.get('solar_parameters', solar_parameters)
self.wind_parameters = kwargs.get('wind_parameters', wind_parameters)
self.penalty_coefficient = 50 # control soft penalty constrain
self.sell_coefficient = 0.5 # control sell benefits
self.penalty_coefficient = 50 # 约束惩罚系数
self.sell_coefficient = 0.5 # 售出利润系数
self.grid = Grid()
self.battery = Battery(self.battery_parameters)
@ -81,12 +81,12 @@ class ESSEnv(gym.Env):
return obs
def step(self, action): # state transition: current_obs->take_action->get_reward->get_finish->next_obs
# put action into each component
# 在每个组件中添加动作
current_obs = self._build_state()
temperature = current_obs[7]
irradiance = current_obs[8]
wind_speed = current_obs[9]
self.battery.step(action[0]) # execute the state-transition part, battery.current_capacity also changed
self.battery.step(action[0]) # 执行状态转换,电池当前容量也改变
self.dg1.step(action[1])
self.dg2.step(action[2])
self.dg3.step(action[3])
@ -106,16 +106,16 @@ class ESSEnv(gym.Env):
buy_cost = 0
self.excess = 0
self.shedding = 0
if unbalance >= 0: # now in excess condition
if unbalance >= 0: # 现在过剩
if unbalance <= self.grid.exchange_ability:
# sell money to grid is little [0.029,0.1]
sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient
else:
sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient
# real unbalance that grid could not meet
# real unbalance:电网也无法满足
self.excess = unbalance - self.grid.exchange_ability
excess_penalty = self.excess * self.penalty_coefficient
else: # unbalance <0, its load shedding model, deficient penalty is used
else: # unbalance <0, 采用缺少惩罚
if abs(unbalance) <= self.grid.exchange_ability:
buy_cost = self.grid.get_cost(price, abs(unbalance))
else:
@ -164,7 +164,7 @@ class ESSEnv(gym.Env):
temperature = temperature_df['t2m'].to_numpy(dtype=float)
wind = wind_df['wind_speed'].to_numpy(dtype=float)
'''redesign the magnitude for price and amount of generation as well as demand'''
'''重新设计价格和发电量以及需求的大小'''
def process_elements(elements, transform_function, add_function):
for element in elements:
transformed_element = transform_function(element)

View File

@ -2,7 +2,7 @@ import numpy as np
class DG:
"""simulate a simple diesel generator"""
"""simulate a diesel generator"""
def __init__(self, parameters):
self.current_output = None
@ -37,7 +37,7 @@ class DG:
class Battery:
"""simulate a simple battery"""
"""simulate a battery"""
def __init__(self, parameters):
self.current_capacity = None
@ -71,6 +71,7 @@ class Battery:
class Solar:
"""simulate a solar panel"""
def __init__(self, parameters):
self.current_power = None
self.base_voltage = parameters['V_b']
@ -106,6 +107,7 @@ class Solar:
class Wind:
"""simulate a wind turbine"""
def __init__(self, parameters):
self.current_power = None
self.cutin_speed = parameters['cutin_speed']
@ -137,6 +139,7 @@ class Wind:
class Grid:
"""simulate a grid"""
def __init__(self):
self.on = True
self.delta = 1
@ -158,15 +161,3 @@ class Grid:
# current_day_price = self.price[24 * self.day:24 * self.day + self.time]
# result.extend(current_day_price)
return result
# def retrive_past_price(self):
# result = []
# if self.day < 1:
# past_price = self.past_price
# else:
# past_price = self.price[24 * (self.day - 1):24 * self.day]
# for item in past_price[(self.time - 24)::]:
# result.append(item)
# for item in self.price[24 * self.day:(24 * self.day + self.time)]:
# result.append(item)
# return result

View File

@ -17,6 +17,7 @@ def plot_optimization_result(datasource, directory): # data source is dataframe
plt.subplots_adjust(wspace=0.7, hspace=0.3)
plt.autoscale(tight=True)
T = np.array([i for i in range(24)])
# plot step cost in ax[0]
axs[0, 0].cla()
axs[0, 0].set_ylabel('Costs')

View File

@ -116,13 +116,11 @@ class Arguments:
"""revise here for our own purpose"""
def __init__(self, agent=None, env=None):
self.agent = agent # Deep Reinforcement Learning algorithm
self.env = env # the environment for training
# self.plot_shadow_on = False # control do we need to plot all shadow figures
self.cwd = None # current work directory. None means set automatically
self.agent = agent
self.env = env
self.cwd = None # current work directory, None means set automatically
self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
self.visible_gpu = '0,1,2,3' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
# self.worker_num = 2 # rollout workers number pre GPU (adjust it to get high GPU usage)
self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
'''Arguments for training'''
@ -171,7 +169,7 @@ class Arguments:
torch.set_num_threads(self.num_threads)
torch.set_default_dtype(torch.float32)
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu) # control how many GPU is used  
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
def test_one_episode(env, act, device):