nothing
This commit is contained in:
parent
9d0b220b54
commit
0fd80456ac
File diff suppressed because it is too large
Load Diff
Before Width: | Height: | Size: 142 KiB After Width: | Height: | Size: 141 KiB |
File diff suppressed because it is too large
Load Diff
Before Width: | Height: | Size: 127 KiB After Width: | Height: | Size: 125 KiB |
13
agent.py
13
agent.py
|
@ -19,7 +19,7 @@ class AgentBase:
|
|||
self.act = self.act_target = self.if_use_act_target = self.act_optim = self.ClassAct = None
|
||||
|
||||
def init(self, net_dim, state_dim, action_dim, learning_rate=1e-4, _if_per_or_gae=False, gpu_id=0):
|
||||
# explict call self.init() for multiprocessing
|
||||
# 显式调用self.init()进行多进程
|
||||
self.device = torch.device(f"cuda:{gpu_id}" if (torch.cuda.is_available() and (gpu_id >= 0)) else "cpu")
|
||||
self.action_dim = action_dim
|
||||
|
||||
|
@ -81,7 +81,7 @@ class AgentBase:
|
|||
class AgentDDPG(AgentBase):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.explore_noise = 0.1 # explore noise of action
|
||||
self.explore_noise = 0.1
|
||||
self.if_use_cri_target = self.if_use_act_target = True
|
||||
self.ClassCri = Critic
|
||||
self.ClassAct = Actor
|
||||
|
@ -163,7 +163,7 @@ class AgentSAC(AgentBase):
|
|||
super().init(net_dim, state_dim, action_dim, learning_rate, _if_use_per, gpu_id)
|
||||
|
||||
self.alpha_log = torch.tensor((-np.log(action_dim) * np.e,), dtype=torch.float32,
|
||||
requires_grad=True, device=self.device) # trainable parameter
|
||||
requires_grad=True, device=self.device)
|
||||
self.alpha_optim = torch.optim.Adam((self.alpha_log,), lr=learning_rate)
|
||||
self.target_entropy = np.log(action_dim)
|
||||
|
||||
|
@ -250,10 +250,9 @@ class AgentPPO(AgentBase):
|
|||
with torch.no_grad():
|
||||
buf_len = buffer[0].shape[0]
|
||||
buf_state, buf_action, buf_noise, buf_reward, buf_mask = [ten.to(self.device) for ten in buffer]
|
||||
# (ten_state, ten_action, ten_noise, ten_reward, ten_mask) = buffer
|
||||
|
||||
'''get buf_r_sum, buf_logprob'''
|
||||
bs = 2 ** 10 # set a smaller 'BatchSize' when out of GPU memory.
|
||||
bs = 2 ** 10
|
||||
buf_value = [self.cri_target(buf_state[i:i + bs]) for i in range(0, buf_len, bs)]
|
||||
buf_value = torch.cat(buf_value, dim=0)
|
||||
buf_logprob = self.act.get_old_logprob(buf_action, buf_noise)
|
||||
|
@ -273,7 +272,7 @@ class AgentPPO(AgentBase):
|
|||
logprob = buf_logprob[indices]
|
||||
advantage = buf_advantage[indices]
|
||||
|
||||
new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action) # it is obj_actor
|
||||
new_logprob, obj_entropy = self.act.get_logprob_entropy(state, action) # it's obj_actor
|
||||
ratio = (new_logprob - logprob.detach()).exp()
|
||||
surrogate1 = advantage * ratio
|
||||
surrogate2 = advantage * ratio.clamp(1 - self.ratio_clip, 1 + self.ratio_clip)
|
||||
|
@ -309,6 +308,6 @@ class AgentPPO(AgentBase):
|
|||
for i in range(buf_len - 1, -1, -1):
|
||||
buf_r_sum[i] = ten_reward[i] + ten_mask[i] * pre_r_sum
|
||||
pre_r_sum = buf_r_sum[i]
|
||||
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i]) # fix a bug here
|
||||
buf_advantage[i] = ten_reward[i] + ten_mask[i] * (pre_advantage - ten_value[i])
|
||||
pre_advantage = ten_value[i] + buf_advantage[i] * self.lambda_gae_adv
|
||||
return buf_r_sum, buf_advantage
|
||||
|
|
|
@ -28,8 +28,8 @@ class ESSEnv(gym.Env):
|
|||
self.dg_parameters = kwargs.get('dg_parameters', dg_parameters)
|
||||
self.solar_parameters = kwargs.get('solar_parameters', solar_parameters)
|
||||
self.wind_parameters = kwargs.get('wind_parameters', wind_parameters)
|
||||
self.penalty_coefficient = 50 # control soft penalty constrain
|
||||
self.sell_coefficient = 0.5 # control sell benefits
|
||||
self.penalty_coefficient = 50 # 约束惩罚系数
|
||||
self.sell_coefficient = 0.5 # 售出利润系数
|
||||
|
||||
self.grid = Grid()
|
||||
self.battery = Battery(self.battery_parameters)
|
||||
|
@ -81,12 +81,12 @@ class ESSEnv(gym.Env):
|
|||
return obs
|
||||
|
||||
def step(self, action): # state transition: current_obs->take_action->get_reward->get_finish->next_obs
|
||||
# put action into each component
|
||||
# 在每个组件中添加动作
|
||||
current_obs = self._build_state()
|
||||
temperature = current_obs[7]
|
||||
irradiance = current_obs[8]
|
||||
wind_speed = current_obs[9]
|
||||
self.battery.step(action[0]) # execute the state-transition part, battery.current_capacity also changed
|
||||
self.battery.step(action[0]) # 执行状态转换,电池当前容量也改变
|
||||
self.dg1.step(action[1])
|
||||
self.dg2.step(action[2])
|
||||
self.dg3.step(action[3])
|
||||
|
@ -106,16 +106,16 @@ class ESSEnv(gym.Env):
|
|||
buy_cost = 0
|
||||
self.excess = 0
|
||||
self.shedding = 0
|
||||
if unbalance >= 0: # now in excess condition
|
||||
if unbalance >= 0: # 现在过剩
|
||||
if unbalance <= self.grid.exchange_ability:
|
||||
# sell money to grid is little [0.029,0.1]
|
||||
sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient
|
||||
else:
|
||||
sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient
|
||||
# real unbalance that grid could not meet
|
||||
# real unbalance:电网也无法满足
|
||||
self.excess = unbalance - self.grid.exchange_ability
|
||||
excess_penalty = self.excess * self.penalty_coefficient
|
||||
else: # unbalance <0, its load shedding model, deficient penalty is used
|
||||
else: # unbalance <0, 采用缺少惩罚
|
||||
if abs(unbalance) <= self.grid.exchange_ability:
|
||||
buy_cost = self.grid.get_cost(price, abs(unbalance))
|
||||
else:
|
||||
|
@ -164,7 +164,7 @@ class ESSEnv(gym.Env):
|
|||
temperature = temperature_df['t2m'].to_numpy(dtype=float)
|
||||
wind = wind_df['wind_speed'].to_numpy(dtype=float)
|
||||
|
||||
'''redesign the magnitude for price and amount of generation as well as demand'''
|
||||
'''重新设计价格和发电量以及需求的大小'''
|
||||
def process_elements(elements, transform_function, add_function):
|
||||
for element in elements:
|
||||
transformed_element = transform_function(element)
|
||||
|
|
19
module.py
19
module.py
|
@ -2,7 +2,7 @@ import numpy as np
|
|||
|
||||
|
||||
class DG:
|
||||
"""simulate a simple diesel generator"""
|
||||
"""simulate a diesel generator"""
|
||||
|
||||
def __init__(self, parameters):
|
||||
self.current_output = None
|
||||
|
@ -37,7 +37,7 @@ class DG:
|
|||
|
||||
|
||||
class Battery:
|
||||
"""simulate a simple battery"""
|
||||
"""simulate a battery"""
|
||||
|
||||
def __init__(self, parameters):
|
||||
self.current_capacity = None
|
||||
|
@ -71,6 +71,7 @@ class Battery:
|
|||
|
||||
|
||||
class Solar:
|
||||
"""simulate a solar panel"""
|
||||
def __init__(self, parameters):
|
||||
self.current_power = None
|
||||
self.base_voltage = parameters['V_b']
|
||||
|
@ -106,6 +107,7 @@ class Solar:
|
|||
|
||||
|
||||
class Wind:
|
||||
"""simulate a wind turbine"""
|
||||
def __init__(self, parameters):
|
||||
self.current_power = None
|
||||
self.cutin_speed = parameters['cutin_speed']
|
||||
|
@ -137,6 +139,7 @@ class Wind:
|
|||
|
||||
|
||||
class Grid:
|
||||
"""simulate a grid"""
|
||||
def __init__(self):
|
||||
self.on = True
|
||||
self.delta = 1
|
||||
|
@ -158,15 +161,3 @@ class Grid:
|
|||
# current_day_price = self.price[24 * self.day:24 * self.day + self.time]
|
||||
# result.extend(current_day_price)
|
||||
return result
|
||||
|
||||
# def retrive_past_price(self):
|
||||
# result = []
|
||||
# if self.day < 1:
|
||||
# past_price = self.past_price
|
||||
# else:
|
||||
# past_price = self.price[24 * (self.day - 1):24 * self.day]
|
||||
# for item in past_price[(self.time - 24)::]:
|
||||
# result.append(item)
|
||||
# for item in self.price[24 * self.day:(24 * self.day + self.time)]:
|
||||
# result.append(item)
|
||||
# return result
|
||||
|
|
|
@ -17,6 +17,7 @@ def plot_optimization_result(datasource, directory): # data source is dataframe
|
|||
plt.subplots_adjust(wspace=0.7, hspace=0.3)
|
||||
plt.autoscale(tight=True)
|
||||
T = np.array([i for i in range(24)])
|
||||
|
||||
# plot step cost in ax[0]
|
||||
axs[0, 0].cla()
|
||||
axs[0, 0].set_ylabel('Costs')
|
||||
|
|
10
tools.py
10
tools.py
|
@ -116,13 +116,11 @@ class Arguments:
|
|||
"""revise here for our own purpose"""
|
||||
|
||||
def __init__(self, agent=None, env=None):
|
||||
self.agent = agent # Deep Reinforcement Learning algorithm
|
||||
self.env = env # the environment for training
|
||||
# self.plot_shadow_on = False # control do we need to plot all shadow figures
|
||||
self.cwd = None # current work directory. None means set automatically
|
||||
self.agent = agent
|
||||
self.env = env
|
||||
self.cwd = None # current work directory, None means set automatically
|
||||
self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
|
||||
self.visible_gpu = '0,1,2,3' # for example: os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
|
||||
# self.worker_num = 2 # rollout workers number pre GPU (adjust it to get high GPU usage)
|
||||
self.num_threads = 32 # cpu_num for evaluate model, torch.set_num_threads(self.num_threads)
|
||||
|
||||
'''Arguments for training'''
|
||||
|
@ -171,7 +169,7 @@ class Arguments:
|
|||
torch.set_num_threads(self.num_threads)
|
||||
torch.set_default_dtype(torch.float32)
|
||||
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu) # control how many GPU is used
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
|
||||
|
||||
|
||||
def test_one_episode(env, act, device):
|
||||
|
|
Loading…
Reference in New Issue