Merge pull request '合并推理代码到main分支' (#2) from master into main
Reviewed-on: #2
This commit is contained in:
commit
c68482a939
|
@ -0,0 +1,9 @@
|
||||||
|
# 默认忽略的文件
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# 基于编辑器的 HTTP 客户端请求
|
||||||
|
/httpRequests/
|
||||||
|
# Datasource local storage ignored files
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
||||||
|
/.idea/
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,5 @@
|
||||||
|
time,action
|
||||||
|
1,[-0.85844654 -0.913628 ]
|
||||||
|
1,[-0.97137856 -0.9997079 ]
|
||||||
|
1,[-0.97137856 -0.9997079 ]
|
||||||
|
1,[-0.97137856 -0.9997079 ]
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
reward,unbalance
|
||||||
|
-0.09503999999999999,0.03
|
||||||
|
-0.09503999999999999,0.03
|
||||||
|
-0.09503999999999999,0.03
|
|
119
inference.py
119
inference.py
|
@ -0,0 +1,119 @@
|
||||||
|
import queue
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from train import *
|
||||||
|
|
||||||
|
|
||||||
|
def test_one_step(env, act, device, data, action_path):
|
||||||
|
env.rec_data = data
|
||||||
|
state = env.reset()
|
||||||
|
s_tensor = torch.as_tensor((state,), device=device)
|
||||||
|
a_tensor = act(s_tensor)
|
||||||
|
action = a_tensor.detach().cpu().numpy()[0]
|
||||||
|
state, next_state, reward, done = env.step(action)
|
||||||
|
print(f'The action of {env.current_time} is {action}')
|
||||||
|
|
||||||
|
with open(action_path, 'a') as af:
|
||||||
|
af.write(f'{env.current_time},{action}\n')
|
||||||
|
return reward, env.unbalance
|
||||||
|
|
||||||
|
|
||||||
|
def run_service_test(env, agent, data):
|
||||||
|
service_result_path = 'data/service_result.csv'
|
||||||
|
action_path = 'data/service_actions.csv'
|
||||||
|
|
||||||
|
if not os.path.exists(service_result_path):
|
||||||
|
with open(service_result_path, 'w') as f:
|
||||||
|
f.write('reward,unbalance\n')
|
||||||
|
|
||||||
|
if not os.path.exists(action_path):
|
||||||
|
with open(action_path, 'w') as af:
|
||||||
|
af.write('time,action\n')
|
||||||
|
|
||||||
|
service_rewards = []
|
||||||
|
service_unbalances = []
|
||||||
|
|
||||||
|
service_reward, service_unbalance = test_one_step(env, agent.act, agent.device, data, action_path)
|
||||||
|
service_rewards.append(service_reward)
|
||||||
|
service_unbalances.append(service_unbalance)
|
||||||
|
|
||||||
|
if service_rewards:
|
||||||
|
avg_reward = sum(service_rewards) / len(service_rewards)
|
||||||
|
avg_unbalance = sum(service_unbalances) / len(service_unbalances)
|
||||||
|
|
||||||
|
with open(service_result_path, 'a') as f:
|
||||||
|
f.write(f'{avg_reward},{avg_unbalance}\n')
|
||||||
|
|
||||||
|
|
||||||
|
# 接听端
|
||||||
|
def listener_thread(env, agent, data_queue):
|
||||||
|
while True:
|
||||||
|
time.sleep(0.1) # 等待
|
||||||
|
if not data_queue.empty():
|
||||||
|
new_data = data_queue.get()
|
||||||
|
print(f"Data received: {new_data}")
|
||||||
|
run_service_test(env, agent, new_data)
|
||||||
|
data_queue.task_done()
|
||||||
|
|
||||||
|
|
||||||
|
# 发送端
|
||||||
|
def sender_thread(data_queue):
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
time.sleep(0.5)
|
||||||
|
user_input = input("请输入当前时刻的price, temper, solar, load, heat, people(用逗号分隔): \n")
|
||||||
|
|
||||||
|
# 将输入字符串分割并转换为浮点数列表
|
||||||
|
input_data = list(map(float, user_input.split(',')))
|
||||||
|
|
||||||
|
# 检查输入是否包含六个数值
|
||||||
|
if len(input_data) != 6:
|
||||||
|
print("输入格式不正确,请输入六个数值。")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 将数据放入队列
|
||||||
|
print(f"Sending data: {input_data}")
|
||||||
|
data_queue.put(input_data)
|
||||||
|
|
||||||
|
except ValueError:
|
||||||
|
print("输入格式不正确,请输入数值。")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = Arguments()
|
||||||
|
args.visible_gpu = '0'
|
||||||
|
for seed in args.random_seed_list:
|
||||||
|
args.random_seed = seed
|
||||||
|
args.agent = AgentPPO()
|
||||||
|
args.agent.cri_target = True
|
||||||
|
args.env = WgzGym()
|
||||||
|
args.init_before_training()
|
||||||
|
|
||||||
|
agent = args.agent
|
||||||
|
env = args.env
|
||||||
|
env.TRAIN = False
|
||||||
|
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate)
|
||||||
|
|
||||||
|
act_save_path = './data/actor.pth'
|
||||||
|
agent.act.load_state_dict(torch.load(act_save_path))
|
||||||
|
|
||||||
|
# 创建一个队列用于线程间通信
|
||||||
|
data_queue = queue.Queue()
|
||||||
|
|
||||||
|
listener = threading.Thread(target=listener_thread, args=(env, agent, data_queue))
|
||||||
|
listener.daemon = True
|
||||||
|
listener.start()
|
||||||
|
|
||||||
|
sender = threading.Thread(target=sender_thread, args=(data_queue,))
|
||||||
|
sender.daemon = True
|
||||||
|
sender.start()
|
||||||
|
|
||||||
|
# 主线程保持运行,等待数据传递
|
||||||
|
while True:
|
||||||
|
time.sleep(10)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
158
models/env.py
158
models/env.py
|
@ -1,19 +1,18 @@
|
||||||
import gym
|
import gym
|
||||||
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from data_manager import *
|
from models.data_manager import *
|
||||||
from module import *
|
from models.module import *
|
||||||
from parameters import *
|
from models.parameters import *
|
||||||
|
|
||||||
|
|
||||||
class WgzGym(gym.Env):
|
class WgzGym(gym.Env):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super(WgzGym, self).__init__()
|
super(WgzGym, self).__init__()
|
||||||
self.excess = None
|
self.rec_data = None
|
||||||
self.shedding = None
|
|
||||||
self.unbalance = None
|
self.unbalance = None
|
||||||
self.real_unbalance = None
|
self.reward = None
|
||||||
self.operation_cost = None
|
|
||||||
self.current_output = None
|
self.current_output = None
|
||||||
self.final_step_outputs = None
|
self.final_step_outputs = None
|
||||||
self.data_manager = DataManager()
|
self.data_manager = DataManager()
|
||||||
|
@ -23,21 +22,26 @@ class WgzGym(gym.Env):
|
||||||
self.TRAIN = True
|
self.TRAIN = True
|
||||||
self.current_time = None
|
self.current_time = None
|
||||||
self.episode_length = 24
|
self.episode_length = 24
|
||||||
self.penalty_coefficient = 50 # 约束惩罚系数
|
self.penalty_coefficient = 10 # 约束惩罚系数
|
||||||
self.sell_coefficient = 0.1 # 售出利润系数
|
self.sell_coefficient = 0.5 # 售出利润系数
|
||||||
|
self.a = 0.5
|
||||||
|
self.b = 0.3
|
||||||
|
self.c = 0.2
|
||||||
|
self.heat_a = 0.6
|
||||||
|
self.power_a = 0.4
|
||||||
self.EC_parameters = kwargs.get('EC_parameters', EC_parameters) # 电解水制氢器
|
self.EC_parameters = kwargs.get('EC_parameters', EC_parameters) # 电解水制氢器
|
||||||
self.HST_parameters = kwargs.get('dg_parameters', dg_parameters) # 储氢罐
|
self.HST_parameters = kwargs.get('HST_parameters', HST_parameters) # 储氢罐
|
||||||
|
|
||||||
self.grid = Grid()
|
self.grid = Grid()
|
||||||
self.EC = EC(self.EC_parameters)
|
self.EC = EC(self.EC_parameters)
|
||||||
self.HST = HST(self.HST_parameters)
|
self.HST = HST(self.HST_parameters)
|
||||||
|
|
||||||
self.action_space = gym.spaces.Box(low=-1, high=1, shape=(3,), dtype=np.float32)
|
self.action_space = gym.spaces.Box(low=-1, high=1, shape=(2,), dtype=np.float32)
|
||||||
'''
|
'''
|
||||||
时间 光伏 温度(湿度暂未考虑) 电需 热需(转化为对应热水所需瓦数) 人数 电价 7
|
时间 光伏 温度(湿度暂未考虑) 电需 热需(转化为对应热水所需瓦数) 人数 电价 7
|
||||||
电解水制氢功率 市电功率 储氢罐容量占比 3
|
电解水制氢功率 储氢罐容量占比 2 市电功率(注意标准化)->舍(由供需控制)
|
||||||
'''
|
'''
|
||||||
self.state_space = gym.spaces.Box(low=0, high=1, shape=(10,), dtype=np.float32)
|
self.state_space = gym.spaces.Box(low=0, high=1, shape=(9,), dtype=np.float32)
|
||||||
|
|
||||||
def reset(self, *args):
|
def reset(self, *args):
|
||||||
self.month = np.random.randint(1, 13) # choose 12 month
|
self.month = np.random.randint(1, 13) # choose 12 month
|
||||||
|
@ -51,74 +55,76 @@ class WgzGym(gym.Env):
|
||||||
return self._build_state()
|
return self._build_state()
|
||||||
|
|
||||||
def _build_state(self):
|
def _build_state(self):
|
||||||
soc = self.HST.SOC()
|
hst_soc = self.HST.current_soc
|
||||||
ec_output = self.EC.current_output
|
ec_out = self.EC.get_hydrogen()
|
||||||
|
# grid_ex = self.grid.trade_energy
|
||||||
time_step = self.current_time
|
time_step = self.current_time
|
||||||
|
|
||||||
price = self.data_manager.get_price_data(self.month, self.day, self.current_time)
|
if self.TRAIN:
|
||||||
temper = self.data_manager.get_temperature_data(self.month, self.day, self.current_time)
|
price = self.data_manager.get_price_data(self.month, self.day, self.current_time)
|
||||||
solar = self.data_manager.get_solar_data(self.month, self.day, self.current_time)
|
temper = self.data_manager.get_temper_data(self.month, self.day, self.current_time)
|
||||||
load = self.data_manager.get_load_data(self.month, self.day, self.current_time)
|
solar = self.data_manager.get_solar_data(self.month, self.day, self.current_time)
|
||||||
heat = self.data_manager.get_heat_data(self.month, self.day, self.current_time)
|
load = self.data_manager.get_load_data(self.month, self.day, self.current_time)
|
||||||
people = self.data_manager.get_people_data(self.month, self.day, self.current_time)
|
heat = self.data_manager.get_heat_data(self.month, self.day, self.current_time)
|
||||||
|
people = self.data_manager.get_people_data(self.month, self.day, self.current_time)
|
||||||
|
else:
|
||||||
|
price = self.rec_data[0]
|
||||||
|
temper = self.rec_data[1]
|
||||||
|
solar = self.rec_data[2]
|
||||||
|
load = self.rec_data[3]
|
||||||
|
heat = self.rec_data[4]
|
||||||
|
people = self.rec_data[5]
|
||||||
|
|
||||||
obs = np.concatenate((np.float32(time_step), np.float32(soc), np.float32(price), np.float32(netload),
|
obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(temper),
|
||||||
np.float32(dg1_output), np.float32(dg2_output), np.float32(dg3_output),
|
np.float32(solar), np.float32(load), np.float32(heat),
|
||||||
np.float32(temperature), np.float32(irradiance), np.float32(windspeed)), axis=None)
|
np.float32(people), np.float32(ec_out), np.float32(hst_soc)), axis=None)
|
||||||
return obs
|
return obs
|
||||||
|
|
||||||
def step(self, action): # state transition: current_obs->take_action->get_reward->get_finish->next_obs
|
def step(self, action):
|
||||||
# 在每个组件中添加动作
|
# 每个组件执行动作 one step
|
||||||
current_obs = self._build_state()
|
current_obs = self._build_state()
|
||||||
temperature = current_obs[7]
|
self.EC.step(action[0])
|
||||||
irradiance = current_obs[8]
|
self.HST.step(action[1])
|
||||||
self.wind.current_power = current_obs[9]
|
# self.grid.step(action[2], self.EC.power_max)
|
||||||
self.battery.step(action[0]) # 执行状态转换,电池当前容量也改变
|
|
||||||
self.dg1.step(action[1])
|
|
||||||
self.dg2.step(action[2])
|
|
||||||
self.dg3.step(action[3])
|
|
||||||
self.solar.step(temperature, irradiance, action[4])
|
|
||||||
self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
|
|
||||||
-self.battery.energy_change, self.solar.current_power, self.wind.current_power))
|
|
||||||
actual_production = sum(self.current_output)
|
|
||||||
price = current_obs[1]
|
price = current_obs[1]
|
||||||
netload = current_obs[3] - self.solar.output_change
|
temper = current_obs[2] # 用途待补充
|
||||||
unbalance = actual_production - netload
|
solar = current_obs[3]
|
||||||
|
load = current_obs[4]
|
||||||
|
heat = current_obs[5]
|
||||||
|
people = current_obs[6] # 用途待补充
|
||||||
|
|
||||||
|
power_gap = solar + self.HST.get_power() - self.EC.current_power - load
|
||||||
|
heat_gap = self.HST.get_heat() + self.EC.get_heat() - heat
|
||||||
|
|
||||||
# reward = 0.0
|
# reward = 0.0
|
||||||
excess_penalty = 0
|
|
||||||
deficient_penalty = 0
|
|
||||||
sell_benefit, buy_cost = 0, 0
|
sell_benefit, buy_cost = 0, 0
|
||||||
self.excess, self.shedding = 0, 0
|
if power_gap >= 0: # 过剩
|
||||||
if unbalance >= 0: # 过剩
|
sell_benefit = self.grid.get_cost(price, power_gap) * self.sell_coefficient
|
||||||
if unbalance <= self.grid.exchange_ability:
|
power_gap = 0
|
||||||
sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient
|
power_penalty = 0
|
||||||
else:
|
else: # 缺少
|
||||||
sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient
|
power_gap = abs(power_gap)
|
||||||
# real unbalance:超电网限值
|
buy_cost = self.grid.get_cost(price, power_gap)
|
||||||
self.excess = unbalance - self.grid.exchange_ability
|
power_penalty = power_gap * self.penalty_coefficient
|
||||||
excess_penalty = self.excess * self.penalty_coefficient
|
|
||||||
else: # unbalance <0, 缺少惩罚
|
|
||||||
if abs(unbalance) <= self.grid.exchange_ability:
|
|
||||||
buy_cost = self.grid.get_cost(price, abs(unbalance))
|
|
||||||
else:
|
|
||||||
buy_cost = self.grid.get_cost(price, self.grid.exchange_ability)
|
|
||||||
self.shedding = abs(unbalance) - self.grid.exchange_ability
|
|
||||||
deficient_penalty = self.shedding * self.penalty_coefficient
|
|
||||||
battery_cost = self.battery.get_cost(self.battery.energy_change)
|
|
||||||
dg1_cost = self.dg1.get_cost(self.dg1.current_output)
|
|
||||||
dg2_cost = self.dg2.get_cost(self.dg2.current_output)
|
|
||||||
dg3_cost = self.dg3.get_cost(self.dg3.current_output)
|
|
||||||
solar_cost = self.solar.get_cost(self.solar.current_power)
|
|
||||||
wind_cost = self.wind.gen_cost(self.wind.current_power)
|
|
||||||
|
|
||||||
self.operation_cost = (battery_cost + dg1_cost + dg2_cost + dg3_cost + solar_cost + wind_cost
|
if heat_gap >= 0:
|
||||||
+ excess_penalty + deficient_penalty - sell_benefit + buy_cost)
|
heat_gap = 0
|
||||||
reward = - self.operation_cost / 1e3
|
heat_penalty = 0
|
||||||
self.unbalance = unbalance
|
else:
|
||||||
self.real_unbalance = self.shedding + self.excess
|
heat_gap = abs(heat_gap)
|
||||||
final_step_outputs = [self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
|
heat_penalty = heat_gap * self.penalty_coefficient
|
||||||
self.battery.current_capacity, self.solar.current_power, self.wind.current_power]
|
|
||||||
|
hst_cost = self.HST.get_cost()
|
||||||
|
ec_cost = self.EC.get_cost(price)
|
||||||
|
solar_cost = solar # 待补充
|
||||||
|
|
||||||
|
economic_cost = hst_cost + ec_cost + solar_cost - sell_benefit + buy_cost
|
||||||
|
demand_cost = self.heat_a * heat_penalty + self.power_a * power_penalty
|
||||||
|
eco_benifit = self.EC.less_carbon() - self.grid.get_carbon(power_gap)
|
||||||
|
reward = (- self.a * demand_cost - self.b * economic_cost + self.c * eco_benifit) / 1e3
|
||||||
|
|
||||||
|
self.unbalance = (power_gap + heat_gap) / 1e3
|
||||||
|
final_step_outputs = [self.HST.current_soc, self.HST.get_power(), self.EC.current_power]
|
||||||
self.current_time += 1
|
self.current_time += 1
|
||||||
finish = (self.current_time == self.episode_length)
|
finish = (self.current_time == self.episode_length)
|
||||||
if finish:
|
if finish:
|
||||||
|
@ -134,7 +140,7 @@ class WgzGym(gym.Env):
|
||||||
solar = data_df['solar_power'].to_numpy(dtype=float)
|
solar = data_df['solar_power'].to_numpy(dtype=float)
|
||||||
temper = data_df['temper'].to_numpy(dtype=float)
|
temper = data_df['temper'].to_numpy(dtype=float)
|
||||||
energy = data_df['energy_demand'].to_numpy(dtype=float)
|
energy = data_df['energy_demand'].to_numpy(dtype=float)
|
||||||
water = data_df['water_demand'].to_numpy(dtype=float)
|
heat = data_df['water_demand'].to_numpy(dtype=float)
|
||||||
people = data_df['people_count'].to_numpy(dtype=float)
|
people = data_df['people_count'].to_numpy(dtype=float)
|
||||||
price = data_df['price'].to_numpy(dtype=float)
|
price = data_df['price'].to_numpy(dtype=float)
|
||||||
|
|
||||||
|
@ -145,9 +151,9 @@ class WgzGym(gym.Env):
|
||||||
transformed_e = transform_function(e)
|
transformed_e = transform_function(e)
|
||||||
add_function(transformed_e)
|
add_function(transformed_e)
|
||||||
|
|
||||||
process_elements(solar, lambda x: x, self.data_manager.add_load_element)
|
process_elements(solar, lambda x: x, self.data_manager.add_solar_element)
|
||||||
process_elements(temper, lambda x: x, self.data_manager.add_load_element)
|
process_elements(temper, lambda x: x, self.data_manager.add_temper_element)
|
||||||
process_elements(energy, lambda x: x, self.data_manager.add_irradiance_element)
|
process_elements(energy, lambda x: x, self.data_manager.add_electricity_element)
|
||||||
process_elements(water, lambda x: x, self.data_manager.add_temperature_element)
|
process_elements(heat, lambda x: x, self.data_manager.add_heat_element)
|
||||||
process_elements(people, lambda x: x, self.data_manager.add_wind_element)
|
process_elements(people, lambda x: x, self.data_manager.add_people_element)
|
||||||
process_elements(price, lambda x: x, self.data_manager.add_price_element)
|
process_elements(price, lambda x: x, self.data_manager.add_price_element)
|
||||||
|
|
|
@ -1,70 +1,96 @@
|
||||||
class EC:
|
class EC:
|
||||||
def __init__(self, params):
|
def __init__(self, params):
|
||||||
self.current_output = None
|
self.current_power = None
|
||||||
self.electricity_efficiency = params['electricity_efficiency']
|
|
||||||
self.hydrogen_produce = params['hydrogen_produce']
|
self.hydrogen_produce = params['hydrogen_produce']
|
||||||
self.power_max = params['power_max']
|
self.power_max = params['power_max']
|
||||||
self.power_min = params['power_min']
|
self.power_min = params['power_min']
|
||||||
self.ramp = params['ramp']
|
self.ramp = params['ramp']
|
||||||
self.lifetime = params['lifetime']
|
self.lifetime = params['lifetime']
|
||||||
self.equipment_cost = params['equipment_cost']
|
self.equipment_cost = params['equipment_cost']
|
||||||
|
self.electrolysis_efficiency = params['electrolysis_efficiency']
|
||||||
self.carbon_reduce = params['carbon_reduce']
|
self.carbon_reduce = params['carbon_reduce']
|
||||||
|
|
||||||
def step(self, action_ec):
|
def step(self, action_ec):
|
||||||
output = self.current_output + action_ec * self.ramp
|
output = self.current_power + action_ec * self.ramp
|
||||||
output = max(self.power_min, min(self.power_max, output)) if output > 0 else 0
|
output = max(self.power_min, min(self.power_max, output)) if output > 0 else 0
|
||||||
self.current_output = output
|
self.current_power = output
|
||||||
|
|
||||||
def get_cost(self, price):
|
def get_cost(self, price):
|
||||||
return self.equipment_cost / self.lifetime + price * self.current_output
|
# 成本 = 设备费用 / 生命周期 * 电价 * (用电量 / 最大用电量)
|
||||||
|
return self.equipment_cost / self.lifetime * price * self.current_power / self.power_max
|
||||||
|
|
||||||
def get_hydrogen(self):
|
def get_hydrogen(self):
|
||||||
return self.current_output * self.electricity_efficiency * self.hydrogen_produce
|
return self.current_power * self.electrolysis_efficiency * self.hydrogen_produce
|
||||||
|
|
||||||
|
def get_heat(self):
|
||||||
|
return self.current_power * (1 - self.electrolysis_efficiency)
|
||||||
|
|
||||||
|
def less_carbon(self):
|
||||||
|
return self.current_power * self.carbon_reduce
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self.current_output = 0
|
self.current_power = 0
|
||||||
|
|
||||||
|
|
||||||
class HST:
|
class HST:
|
||||||
def __init__(self, params):
|
def __init__(self, params):
|
||||||
self.current_capacity = None
|
self.current_soc = None
|
||||||
self.hydrogen_change = None
|
self.hydrogen_charge = None
|
||||||
self.capacity = params['capacity']
|
self.capacity = params['capacity']
|
||||||
self.min_soc = params['min_soc']
|
self.min_soc = params['min_soc']
|
||||||
self.max_soc = params['max_soc']
|
self.max_soc = params['max_soc']
|
||||||
self.degradation = params['degradation']
|
self.lifetime = params['lifetime']
|
||||||
self.holding = params['holding']
|
self.equipment_cost = params['equipment_cost']
|
||||||
self.ramp = params['ramp']
|
self.charge_efficiency = params['charge_efficiency']
|
||||||
self.efficiency = params['efficiency']
|
self.generate_efficiency = params['generate_efficiency']
|
||||||
|
self.lower_heating_value = params['lower_heating_value']
|
||||||
|
|
||||||
'''
|
'''
|
||||||
储氢罐的充气速率 = 电解水制氢速率 (电解水制氢会满足热水需求?)
|
储氢罐的充气速率 = 电解水制氢速率 (电解水制氢放的热会满足热水需求?)
|
||||||
|
如何控制上述待补充
|
||||||
|
储氢罐的放气速率 = 供电 (电价低时多电解,电价高时释放)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def step(self, action_hst):
|
def step(self, action_hst):
|
||||||
energy = action_hst * self.ramp
|
energy = action_hst * self.capacity
|
||||||
current_energy = self.current_capacity * self.capacity
|
updated_soc = max(self.min_soc, min(self.max_soc, (self.current_soc * self.capacity + energy) / self.capacity))
|
||||||
updated_capacity = max(self.min_soc, min(self.max_soc, (current_energy + energy) / self.capacity))
|
self.hydrogen_charge = (updated_soc - self.current_soc) * self.capacity
|
||||||
self.hydrogen_change = (updated_capacity - self.current_capacity) * self.capacity
|
self.current_soc = updated_soc
|
||||||
self.current_capacity = updated_capacity # update capacity to current state
|
|
||||||
|
|
||||||
def get_cost(self, energy_change):
|
def get_power(self):
|
||||||
cost = abs(energy_change) * self.degradation
|
if self.hydrogen_charge > 0:
|
||||||
|
return self.hydrogen_charge * self.charge_efficiency * self.lower_heating_value * self.generate_efficiency
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def get_heat(self):
|
||||||
|
if self.hydrogen_charge < 0:
|
||||||
|
return self.hydrogen_charge * self.charge_efficiency * (1 - self.generate_efficiency)
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def get_cost(self):
|
||||||
|
cost = self.equipment_cost / self.lifetime * abs(self.hydrogen_charge)
|
||||||
return cost
|
return cost
|
||||||
|
|
||||||
def SOC(self):
|
|
||||||
return self.current_capacity
|
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self.current_capacity = 0.2
|
self.current_soc = 0.1
|
||||||
|
|
||||||
|
|
||||||
class Grid:
|
class Grid:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.delta = 1
|
self.delta = 1
|
||||||
self.exchange_ability = 100
|
self.carbon_increace = 0.9
|
||||||
|
# self.trade_energy = None
|
||||||
|
|
||||||
def get_cost(self, current_price, energy_exchange):
|
def get_cost(self, price, trade_energy):
|
||||||
return current_price * energy_exchange * self.delta
|
return price * trade_energy * self.delta
|
||||||
|
|
||||||
|
def get_carbon(self, trade_energy):
|
||||||
|
return trade_energy * self.carbon_increace
|
||||||
|
|
||||||
|
# def step(self, action_grid, ec_power_max):
|
||||||
|
# self.trade_energy = (action_grid + 1) / 2 * ec_power_max # 反标准化
|
||||||
|
|
||||||
def retrieve_past_price(self):
|
def retrieve_past_price(self):
|
||||||
result = []
|
result = []
|
||||||
|
|
|
@ -1,17 +1,21 @@
|
||||||
EC_parameters = {
|
EC_parameters = {
|
||||||
'electrolysis_efficiency': 0.8,
|
|
||||||
'hydrogen_produce': 0.5,
|
'hydrogen_produce': 0.5,
|
||||||
'power_max': 200,
|
'power_max': 200,
|
||||||
'power_min': 0,
|
'power_min': 0,
|
||||||
'ramp': 100,
|
'ramp': 100,
|
||||||
'lifetime': 6000, # hour
|
'lifetime': 6000, # hour
|
||||||
'equipment_cost': 10000, # yuan
|
'equipment_cost': 10000, # yuan
|
||||||
'carbon_reduce': 1,
|
'electrolysis_efficiency': 0.8,
|
||||||
|
'carbon_reduce': 0.9,
|
||||||
}
|
}
|
||||||
|
|
||||||
HST_parameters = {
|
HST_parameters = {
|
||||||
'capacity': 1000,
|
'capacity': 1000,
|
||||||
'min_soc': 0.1,
|
'min_soc': 0.1,
|
||||||
'max_soc': 0.9,
|
'max_soc': 0.9,
|
||||||
'efficiency': 0.95,
|
'lifetime': 6000, # hour
|
||||||
|
'equipment_cost': 10000, # yuan
|
||||||
|
'charge_efficiency': 0.95,
|
||||||
|
'generate_efficiency': 0.6,
|
||||||
|
'lower_heating_value': 33.33,
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,39 +2,24 @@ import torch
|
||||||
|
|
||||||
|
|
||||||
def test_one_episode(env, act, device):
|
def test_one_episode(env, act, device):
|
||||||
"""to get evaluate information, here record the unbalance of after taking action"""
|
"""get evaluate information, record the unbalance of after taking action"""
|
||||||
record_state = []
|
record_system_info = [] # same as observation
|
||||||
record_action = []
|
record_init_info = [] # include month,day,time
|
||||||
record_reward = []
|
|
||||||
record_unbalance = []
|
|
||||||
record_system_info = [] # [time,price,netload,action,real action,soc,output*4,unbalance(exchange+penalty),cost]
|
|
||||||
record_init_info = [] # include month,day,time,intial soc
|
|
||||||
env.TRAIN = False
|
env.TRAIN = False
|
||||||
state = env.reset()
|
state = env.reset()
|
||||||
record_init_info.append([env.month, env.day, env.current_time, env.battery.current_capacity])
|
record_init_info.append([env.month, env.day, env.current_time])
|
||||||
print(f'current testing month is {env.month}, day is {env.day},initial_soc is {env.battery.current_capacity}')
|
print(f'current testing month is {env.month}, day is {env.day}')
|
||||||
for i in range(24):
|
for i in range(24):
|
||||||
s_tensor = torch.as_tensor((state,), device=device)
|
s_tensor = torch.as_tensor((state,), device=device)
|
||||||
a_tensor = act(s_tensor)
|
a_tensor = act(s_tensor)
|
||||||
action = a_tensor.detach().cpu().numpy()[0] # not need detach(), because with torch.no_grad() outside
|
action = a_tensor.detach().cpu().numpy()[0]
|
||||||
real_action = action
|
|
||||||
state, next_state, reward, done = env.step(action)
|
state, next_state, reward, done = env.step(action)
|
||||||
|
record_system_info.append([state[1], state[2], env.HST.current_soc(), env.HST.get_power(),
|
||||||
record_system_info.append([state[0], state[1], state[3] + env.wind.current_power, action, real_action,
|
env.EC.current_power, action, reward])
|
||||||
env.battery.SOC(), env.battery.energy_change, next_state[4], next_state[5],
|
|
||||||
next_state[6], env.solar.current_power, env.wind.current_power, env.unbalance,
|
|
||||||
env.operation_cost, reward])
|
|
||||||
record_state.append(state)
|
|
||||||
record_action.append(real_action)
|
|
||||||
record_reward.append(reward)
|
|
||||||
record_unbalance.append(env.unbalance)
|
|
||||||
state = next_state
|
state = next_state
|
||||||
# add information of last step dg1, dh2, dg3, soc, tem, irr
|
# add information of last step EC, HST.current_soc, HST.power, grid
|
||||||
record_system_info[-1][7:12] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2],
|
record_system_info[-1][2:5] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2]]
|
||||||
env.final_step_outputs[4], env.final_step_outputs[5]]
|
record = {'init_info': record_init_info, 'system_info': record_system_info}
|
||||||
record_system_info[-1][5] = env.final_step_outputs[3]
|
|
||||||
record = {'init_info': record_init_info, 'system_info': record_system_info, 'state': record_state,
|
|
||||||
'action': record_action, 'reward': record_reward, 'unbalance': record_unbalance}
|
|
||||||
return record
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
@ -49,7 +34,7 @@ def get_episode_return(env, act, device):
|
||||||
state, next_state, reward, done, = env.step(action)
|
state, next_state, reward, done, = env.step(action)
|
||||||
state = next_state
|
state = next_state
|
||||||
episode_reward += reward
|
episode_reward += reward
|
||||||
episode_unbalance += env.real_unbalance
|
episode_unbalance += env.unbalance
|
||||||
if done:
|
if done:
|
||||||
break
|
break
|
||||||
return episode_reward, episode_unbalance
|
return episode_reward, episode_unbalance
|
||||||
|
|
41
train.py
41
train.py
|
@ -3,11 +3,12 @@ import pickle
|
||||||
|
|
||||||
os.environ['OMP_WAIT_POLICY'] = 'PASSIVE' # 确保在pytorch前设置
|
os.environ['OMP_WAIT_POLICY'] = 'PASSIVE' # 确保在pytorch前设置
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
import pandas as pd
|
import numpy as np
|
||||||
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from models.env import WgzGym
|
from models.env import WgzGym
|
||||||
from models.net import ActorPPO, CriticAdv
|
from models.net import ActorPPO, CriticAdv
|
||||||
from models.tools import get_episode_return, test_one_episode
|
from models.tools import get_episode_return
|
||||||
|
|
||||||
|
|
||||||
def smooth_rewards(rewards, window=10):
|
def smooth_rewards(rewards, window=10):
|
||||||
|
@ -170,8 +171,6 @@ class Arguments:
|
||||||
def __init__(self, agent=None, env=None):
|
def __init__(self, agent=None, env=None):
|
||||||
self.agent = agent
|
self.agent = agent
|
||||||
self.env = env
|
self.env = env
|
||||||
self.cwd = None # current work directory. None means set automatically
|
|
||||||
self.if_remove = False # remove the cwd folder? (True, False, None:ask me)
|
|
||||||
self.visible_gpu = '0' # os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
|
self.visible_gpu = '0' # os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
|
||||||
self.num_threads = 32 # cpu_num for evaluate model
|
self.num_threads = 32 # cpu_num for evaluate model
|
||||||
|
|
||||||
|
@ -193,14 +192,8 @@ class Arguments:
|
||||||
self.random_seed_list = [1234]
|
self.random_seed_list = [1234]
|
||||||
self.train = True
|
self.train = True
|
||||||
self.save_network = True
|
self.save_network = True
|
||||||
self.test_network = True
|
|
||||||
self.save_test_data = True
|
|
||||||
|
|
||||||
def init_before_training(self):
|
def init_before_training(self):
|
||||||
if self.cwd is None:
|
|
||||||
agent_name = self.agent.__class__.__name__
|
|
||||||
self.cwd = f'./{agent_name}'
|
|
||||||
|
|
||||||
np.random.seed(self.random_seed)
|
np.random.seed(self.random_seed)
|
||||||
torch.manual_seed(self.random_seed)
|
torch.manual_seed(self.random_seed)
|
||||||
torch.set_num_threads(self.num_threads)
|
torch.set_num_threads(self.num_threads)
|
||||||
|
@ -216,7 +209,6 @@ if __name__ == '__main__':
|
||||||
for seed in args.random_seed_list:
|
for seed in args.random_seed_list:
|
||||||
args.random_seed = seed
|
args.random_seed = seed
|
||||||
args.agent = AgentPPO()
|
args.agent = AgentPPO()
|
||||||
agent_name = f'{args.agent.__class__.__name__}'
|
|
||||||
args.agent.cri_target = True
|
args.agent.cri_target = True
|
||||||
args.env = WgzGym()
|
args.env = WgzGym()
|
||||||
args.init_before_training()
|
args.init_before_training()
|
||||||
|
@ -225,9 +217,9 @@ if __name__ == '__main__':
|
||||||
env = args.env
|
env = args.env
|
||||||
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate)
|
agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate)
|
||||||
gamma = args.gamma
|
gamma = args.gamma
|
||||||
batch_size = args.batch_size # data used to update net
|
batch_size = args.batch_size
|
||||||
target_step = args.target_step # steps of one episode should stop
|
target_step = args.target_step
|
||||||
repeat_times = args.repeat_times # times should update for one batch size data
|
repeat_times = args.repeat_times
|
||||||
soft_update_tau = args.soft_update_tau
|
soft_update_tau = args.soft_update_tau
|
||||||
num_episode = args.num_episode
|
num_episode = args.num_episode
|
||||||
agent.state = env.reset()
|
agent.state = env.reset()
|
||||||
|
@ -236,8 +228,6 @@ if __name__ == '__main__':
|
||||||
'''init training params'''
|
'''init training params'''
|
||||||
# args.train = False
|
# args.train = False
|
||||||
# args.save_network = False
|
# args.save_network = False
|
||||||
# args.test_network = False
|
|
||||||
# args.save_test_data = False
|
|
||||||
if args.train:
|
if args.train:
|
||||||
for i_episode in range(num_episode):
|
for i_episode in range(num_episode):
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
|
@ -255,9 +245,9 @@ if __name__ == '__main__':
|
||||||
reward_record['unbalance'].append(episode_unbalance)
|
reward_record['unbalance'].append(episode_unbalance)
|
||||||
print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}')
|
print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}')
|
||||||
|
|
||||||
act_save_path = f'{args.cwd}/actor.pth'
|
act_save_path = './data/actor.pth'
|
||||||
loss_record_path = f'{args.cwd}/loss.pkl'
|
loss_record_path = './data/loss.pkl'
|
||||||
reward_record_path = f'{args.cwd}/reward.pkl'
|
reward_record_path = './data/reward.pkl'
|
||||||
|
|
||||||
if args.save_network:
|
if args.save_network:
|
||||||
with open(loss_record_path, 'wb') as tf:
|
with open(loss_record_path, 'wb') as tf:
|
||||||
|
@ -266,16 +256,3 @@ if __name__ == '__main__':
|
||||||
pickle.dump(reward_record, tf)
|
pickle.dump(reward_record, tf)
|
||||||
torch.save(agent.act.state_dict(), act_save_path)
|
torch.save(agent.act.state_dict(), act_save_path)
|
||||||
print('actor params have been saved')
|
print('actor params have been saved')
|
||||||
|
|
||||||
if args.test_network:
|
|
||||||
args.cwd = agent_name
|
|
||||||
agent.act.load_state_dict(torch.load(act_save_path))
|
|
||||||
print('params have been reload and test')
|
|
||||||
record = test_one_episode(env, agent.act, agent.device)
|
|
||||||
eval_data = pd.DataFrame(record['system_info'])
|
|
||||||
eval_data.columns = ['time_step', 'price', 'load', 'action', 'real_action', 'soc', 'battery',
|
|
||||||
'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
|
|
||||||
if args.save_test_data:
|
|
||||||
test_data_save_path = f'{args.cwd}/test.pkl'
|
|
||||||
with open(test_data_save_path, 'wb') as tf:
|
|
||||||
pickle.dump(record, tf)
|
|
||||||
|
|
Loading…
Reference in New Issue