diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a93d29e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,9 @@
+# 默认忽略的文件
+/shelf/
+/workspace.xml
+# 基于编辑器的 HTTP 客户端请求
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
+/.idea/
diff --git a/data/actor.pth b/data/actor.pth
new file mode 100644
index 0000000..c58c212
Binary files /dev/null and b/data/actor.pth differ
diff --git a/data/loss.pkl b/data/loss.pkl
new file mode 100644
index 0000000..3723816
Binary files /dev/null and b/data/loss.pkl differ
diff --git a/data/reward.pkl b/data/reward.pkl
new file mode 100644
index 0000000..b65f4d9
Binary files /dev/null and b/data/reward.pkl differ
diff --git a/data/service_actions.csv b/data/service_actions.csv
new file mode 100644
index 0000000..f6fd2a1
--- /dev/null
+++ b/data/service_actions.csv
@@ -0,0 +1,5 @@
+time,action
+1,[-0.85844654 -0.913628  ]
+1,[-0.97137856 -0.9997079 ]
+1,[-0.97137856 -0.9997079 ]
+1,[-0.97137856 -0.9997079 ]
diff --git a/data/service_result.csv b/data/service_result.csv
new file mode 100644
index 0000000..2659b21
--- /dev/null
+++ b/data/service_result.csv
@@ -0,0 +1,4 @@
+reward,unbalance
+-0.09503999999999999,0.03
+-0.09503999999999999,0.03
+-0.09503999999999999,0.03
diff --git a/inference.py b/inference.py
index e69de29..f14afcd 100644
--- a/inference.py
+++ b/inference.py
@@ -0,0 +1,119 @@
+import queue
+import threading
+import time
+import torch
+
+from train import *
+
+
+def test_one_step(env, act, device, data, action_path):
+    env.rec_data = data
+    state = env.reset()
+    s_tensor = torch.as_tensor((state,), device=device)
+    a_tensor = act(s_tensor)
+    action = a_tensor.detach().cpu().numpy()[0]
+    state, next_state, reward, done = env.step(action)
+    print(f'The action of {env.current_time} is {action}')
+
+    with open(action_path, 'a') as af:
+        af.write(f'{env.current_time},{action}\n')
+    return reward, env.unbalance
+
+
+def run_service_test(env, agent, data):
+    service_result_path = 'data/service_result.csv'
+    action_path = 'data/service_actions.csv'
+
+    if not os.path.exists(service_result_path):
+        with open(service_result_path, 'w') as f:
+            f.write('reward,unbalance\n')
+
+    if not os.path.exists(action_path):
+        with open(action_path, 'w') as af:
+            af.write('time,action\n')
+
+    service_rewards = []
+    service_unbalances = []
+
+    service_reward, service_unbalance = test_one_step(env, agent.act, agent.device, data, action_path)
+    service_rewards.append(service_reward)
+    service_unbalances.append(service_unbalance)
+
+    if service_rewards:
+        avg_reward = sum(service_rewards) / len(service_rewards)
+        avg_unbalance = sum(service_unbalances) / len(service_unbalances)
+
+        with open(service_result_path, 'a') as f:
+            f.write(f'{avg_reward},{avg_unbalance}\n')
+
+
+# 接听端
+def listener_thread(env, agent, data_queue):
+    while True:
+        time.sleep(0.1)  # 等待
+        if not data_queue.empty():
+            new_data = data_queue.get()
+            print(f"Data received: {new_data}")
+            run_service_test(env, agent, new_data)
+            data_queue.task_done()
+
+
+# 发送端
+def sender_thread(data_queue):
+    while True:
+        try:
+            time.sleep(0.5)
+            user_input = input("请输入当前时刻的price, temper, solar, load, heat, people（用逗号分隔）: \n")
+
+            # 将输入字符串分割并转换为浮点数列表
+            input_data = list(map(float, user_input.split(',')))
+
+            # 检查输入是否包含六个数值
+            if len(input_data) != 6:
+                print("输入格式不正确，请输入六个数值。")
+                continue
+
+            # 将数据放入队列
+            print(f"Sending data: {input_data}")
+            data_queue.put(input_data)
+
+        except ValueError:
+            print("输入格式不正确，请输入数值。")
+
+
+def main():
+    args = Arguments()
+    args.visible_gpu = '0'
+    for seed in args.random_seed_list:
+        args.random_seed = seed
+        args.agent = AgentPPO()
+        args.agent.cri_target = True
+        args.env = WgzGym()
+        args.init_before_training()
+
+        agent = args.agent
+        env = args.env
+        env.TRAIN = False
+        agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate)
+
+        act_save_path = './data/actor.pth'
+        agent.act.load_state_dict(torch.load(act_save_path))
+
+        # 创建一个队列用于线程间通信
+        data_queue = queue.Queue()
+
+        listener = threading.Thread(target=listener_thread, args=(env, agent, data_queue))
+        listener.daemon = True
+        listener.start()
+
+        sender = threading.Thread(target=sender_thread, args=(data_queue,))
+        sender.daemon = True
+        sender.start()
+
+        # 主线程保持运行，等待数据传递
+        while True:
+            time.sleep(10)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/models/env.py b/models/env.py
index ebb826c..c00789b 100644
--- a/models/env.py
+++ b/models/env.py
@@ -1,19 +1,18 @@
 import gym
+import numpy as np
 import pandas as pd
 
-from data_manager import *
-from module import *
-from parameters import *
+from models.data_manager import *
+from models.module import *
+from models.parameters import *
 
 
 class WgzGym(gym.Env):
     def __init__(self, **kwargs):
         super(WgzGym, self).__init__()
-        self.excess = None
-        self.shedding = None
+        self.rec_data = None
         self.unbalance = None
-        self.real_unbalance = None
-        self.operation_cost = None
+        self.reward = None
         self.current_output = None
         self.final_step_outputs = None
         self.data_manager = DataManager()
@@ -23,21 +22,26 @@ class WgzGym(gym.Env):
         self.TRAIN = True
         self.current_time = None
         self.episode_length = 24
-        self.penalty_coefficient = 50  # 约束惩罚系数
-        self.sell_coefficient = 0.1  # 售出利润系数
+        self.penalty_coefficient = 10  # 约束惩罚系数
+        self.sell_coefficient = 0.5  # 售出利润系数
+        self.a = 0.5
+        self.b = 0.3
+        self.c = 0.2
+        self.heat_a = 0.6
+        self.power_a = 0.4
         self.EC_parameters = kwargs.get('EC_parameters', EC_parameters)  # 电解水制氢器
-        self.HST_parameters = kwargs.get('dg_parameters', dg_parameters)  # 储氢罐
+        self.HST_parameters = kwargs.get('HST_parameters', HST_parameters)  # 储氢罐
 
         self.grid = Grid()
         self.EC = EC(self.EC_parameters)
         self.HST = HST(self.HST_parameters)
 
-        self.action_space = gym.spaces.Box(low=-1, high=1, shape=(3,), dtype=np.float32)
+        self.action_space = gym.spaces.Box(low=-1, high=1, shape=(2,), dtype=np.float32)
         '''
             时间 光伏 温度（湿度暂未考虑） 电需 热需（转化为对应热水所需瓦数） 人数 电价 7
-            电解水制氢功率 市电功率 储氢罐容量占比 3
+            电解水制氢功率 储氢罐容量占比 2 市电功率（注意标准化）->舍(由供需控制)
         '''
-        self.state_space = gym.spaces.Box(low=0, high=1, shape=(10,), dtype=np.float32)
+        self.state_space = gym.spaces.Box(low=0, high=1, shape=(9,), dtype=np.float32)
 
     def reset(self, *args):
         self.month = np.random.randint(1, 13)  # choose 12 month
@@ -51,74 +55,76 @@ class WgzGym(gym.Env):
         return self._build_state()
 
     def _build_state(self):
-        soc = self.HST.SOC()
-        ec_output = self.EC.current_output
+        hst_soc = self.HST.current_soc
+        ec_out = self.EC.get_hydrogen()
+        # grid_ex = self.grid.trade_energy
         time_step = self.current_time
 
-        price = self.data_manager.get_price_data(self.month, self.day, self.current_time)
-        temper = self.data_manager.get_temperature_data(self.month, self.day, self.current_time)
-        solar = self.data_manager.get_solar_data(self.month, self.day, self.current_time)
-        load = self.data_manager.get_load_data(self.month, self.day, self.current_time)
-        heat = self.data_manager.get_heat_data(self.month, self.day, self.current_time)
-        people = self.data_manager.get_people_data(self.month, self.day, self.current_time)
+        if self.TRAIN:
+            price = self.data_manager.get_price_data(self.month, self.day, self.current_time)
+            temper = self.data_manager.get_temper_data(self.month, self.day, self.current_time)
+            solar = self.data_manager.get_solar_data(self.month, self.day, self.current_time)
+            load = self.data_manager.get_load_data(self.month, self.day, self.current_time)
+            heat = self.data_manager.get_heat_data(self.month, self.day, self.current_time)
+            people = self.data_manager.get_people_data(self.month, self.day, self.current_time)
+        else:
+            price = self.rec_data[0]
+            temper = self.rec_data[1]
+            solar = self.rec_data[2]
+            load = self.rec_data[3]
+            heat = self.rec_data[4]
+            people = self.rec_data[5]
 
-        obs = np.concatenate((np.float32(time_step), np.float32(soc), np.float32(price), np.float32(netload),
-                              np.float32(dg1_output), np.float32(dg2_output), np.float32(dg3_output),
-                              np.float32(temperature), np.float32(irradiance), np.float32(windspeed)), axis=None)
+        obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(temper),
+                              np.float32(solar), np.float32(load), np.float32(heat),
+                              np.float32(people), np.float32(ec_out), np.float32(hst_soc)), axis=None)
         return obs
 
-    def step(self, action):  # state transition: current_obs->take_action->get_reward->get_finish->next_obs
-        # 在每个组件中添加动作
+    def step(self, action):
+        # 每个组件执行动作 one step
         current_obs = self._build_state()
-        temperature = current_obs[7]
-        irradiance = current_obs[8]
-        self.wind.current_power = current_obs[9]
-        self.battery.step(action[0])  # 执行状态转换，电池当前容量也改变
-        self.dg1.step(action[1])
-        self.dg2.step(action[2])
-        self.dg3.step(action[3])
-        self.solar.step(temperature, irradiance, action[4])
-        self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
-                                        -self.battery.energy_change, self.solar.current_power, self.wind.current_power))
-        actual_production = sum(self.current_output)
+        self.EC.step(action[0])
+        self.HST.step(action[1])
+        # self.grid.step(action[2], self.EC.power_max)
         price = current_obs[1]
-        netload = current_obs[3] - self.solar.output_change
-        unbalance = actual_production - netload
+        temper = current_obs[2]  # 用途待补充
+        solar = current_obs[3]
+        load = current_obs[4]
+        heat = current_obs[5]
+        people = current_obs[6]  # 用途待补充
+
+        power_gap = solar + self.HST.get_power() - self.EC.current_power - load
+        heat_gap = self.HST.get_heat() + self.EC.get_heat() - heat
 
         # reward = 0.0
-        excess_penalty = 0
-        deficient_penalty = 0
         sell_benefit, buy_cost = 0, 0
-        self.excess, self.shedding = 0, 0
-        if unbalance >= 0:  # 过剩
-            if unbalance <= self.grid.exchange_ability:
-                sell_benefit = self.grid.get_cost(price, unbalance) * self.sell_coefficient
-            else:
-                sell_benefit = self.grid.get_cost(price, self.grid.exchange_ability) * self.sell_coefficient
-                # real unbalance：超电网限值
-                self.excess = unbalance - self.grid.exchange_ability
-                excess_penalty = self.excess * self.penalty_coefficient
-        else:  # unbalance <0, 缺少惩罚
-            if abs(unbalance) <= self.grid.exchange_ability:
-                buy_cost = self.grid.get_cost(price, abs(unbalance))
-            else:
-                buy_cost = self.grid.get_cost(price, self.grid.exchange_ability)
-                self.shedding = abs(unbalance) - self.grid.exchange_ability
-                deficient_penalty = self.shedding * self.penalty_coefficient
-        battery_cost = self.battery.get_cost(self.battery.energy_change)
-        dg1_cost = self.dg1.get_cost(self.dg1.current_output)
-        dg2_cost = self.dg2.get_cost(self.dg2.current_output)
-        dg3_cost = self.dg3.get_cost(self.dg3.current_output)
-        solar_cost = self.solar.get_cost(self.solar.current_power)
-        wind_cost = self.wind.gen_cost(self.wind.current_power)
+        if power_gap >= 0:  # 过剩
+            sell_benefit = self.grid.get_cost(price, power_gap) * self.sell_coefficient
+            power_gap = 0
+            power_penalty = 0
+        else:  # 缺少
+            power_gap = abs(power_gap)
+            buy_cost = self.grid.get_cost(price, power_gap)
+            power_penalty = power_gap * self.penalty_coefficient
 
-        self.operation_cost = (battery_cost + dg1_cost + dg2_cost + dg3_cost + solar_cost + wind_cost
-                               + excess_penalty + deficient_penalty - sell_benefit + buy_cost)
-        reward = - self.operation_cost / 1e3
-        self.unbalance = unbalance
-        self.real_unbalance = self.shedding + self.excess
-        final_step_outputs = [self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
-                              self.battery.current_capacity, self.solar.current_power, self.wind.current_power]
+        if heat_gap >= 0:
+            heat_gap = 0
+            heat_penalty = 0
+        else:
+            heat_gap = abs(heat_gap)
+            heat_penalty = heat_gap * self.penalty_coefficient
+
+        hst_cost = self.HST.get_cost()
+        ec_cost = self.EC.get_cost(price)
+        solar_cost = solar  # 待补充
+
+        economic_cost = hst_cost + ec_cost + solar_cost - sell_benefit + buy_cost
+        demand_cost = self.heat_a * heat_penalty + self.power_a * power_penalty
+        eco_benifit = self.EC.less_carbon() - self.grid.get_carbon(power_gap)
+        reward = (- self.a * demand_cost - self.b * economic_cost + self.c * eco_benifit) / 1e3
+
+        self.unbalance = (power_gap + heat_gap) / 1e3
+        final_step_outputs = [self.HST.current_soc, self.HST.get_power(), self.EC.current_power]
         self.current_time += 1
         finish = (self.current_time == self.episode_length)
         if finish:
@@ -134,7 +140,7 @@ class WgzGym(gym.Env):
         solar = data_df['solar_power'].to_numpy(dtype=float)
         temper = data_df['temper'].to_numpy(dtype=float)
         energy = data_df['energy_demand'].to_numpy(dtype=float)
-        water = data_df['water_demand'].to_numpy(dtype=float)
+        heat = data_df['water_demand'].to_numpy(dtype=float)
         people = data_df['people_count'].to_numpy(dtype=float)
         price = data_df['price'].to_numpy(dtype=float)
 
@@ -145,9 +151,9 @@ class WgzGym(gym.Env):
                 transformed_e = transform_function(e)
                 add_function(transformed_e)
 
-        process_elements(solar, lambda x: x, self.data_manager.add_load_element)
-        process_elements(temper, lambda x: x, self.data_manager.add_load_element)
-        process_elements(energy, lambda x: x, self.data_manager.add_irradiance_element)
-        process_elements(water, lambda x: x, self.data_manager.add_temperature_element)
-        process_elements(people, lambda x: x, self.data_manager.add_wind_element)
+        process_elements(solar, lambda x: x, self.data_manager.add_solar_element)
+        process_elements(temper, lambda x: x, self.data_manager.add_temper_element)
+        process_elements(energy, lambda x: x, self.data_manager.add_electricity_element)
+        process_elements(heat, lambda x: x, self.data_manager.add_heat_element)
+        process_elements(people, lambda x: x, self.data_manager.add_people_element)
         process_elements(price, lambda x: x, self.data_manager.add_price_element)
diff --git a/models/module.py b/models/module.py
index 3154fe4..08ef162 100644
--- a/models/module.py
+++ b/models/module.py
@@ -1,70 +1,96 @@
 class EC:
     def __init__(self, params):
-        self.current_output = None
-        self.electricity_efficiency = params['electricity_efficiency']
+        self.current_power = None
         self.hydrogen_produce = params['hydrogen_produce']
         self.power_max = params['power_max']
         self.power_min = params['power_min']
         self.ramp = params['ramp']
         self.lifetime = params['lifetime']
         self.equipment_cost = params['equipment_cost']
+        self.electrolysis_efficiency = params['electrolysis_efficiency']
         self.carbon_reduce = params['carbon_reduce']
 
     def step(self, action_ec):
-        output = self.current_output + action_ec * self.ramp
+        output = self.current_power + action_ec * self.ramp
         output = max(self.power_min, min(self.power_max, output)) if output > 0 else 0
-        self.current_output = output
+        self.current_power = output
 
     def get_cost(self, price):
-        return self.equipment_cost / self.lifetime + price * self.current_output
+        # 成本 = 设备费用 / 生命周期 * 电价 * （用电量 / 最大用电量）
+        return self.equipment_cost / self.lifetime * price * self.current_power / self.power_max
 
     def get_hydrogen(self):
-        return self.current_output * self.electricity_efficiency * self.hydrogen_produce
+        return self.current_power * self.electrolysis_efficiency * self.hydrogen_produce
+
+    def get_heat(self):
+        return self.current_power * (1 - self.electrolysis_efficiency)
+
+    def less_carbon(self):
+        return self.current_power * self.carbon_reduce
 
     def reset(self):
-        self.current_output = 0
+        self.current_power = 0
 
 
 class HST:
     def __init__(self, params):
-        self.current_capacity = None
-        self.hydrogen_change = None
+        self.current_soc = None
+        self.hydrogen_charge = None
         self.capacity = params['capacity']
         self.min_soc = params['min_soc']
         self.max_soc = params['max_soc']
-        self.degradation = params['degradation']
-        self.holding = params['holding']
-        self.ramp = params['ramp']
-        self.efficiency = params['efficiency']
+        self.lifetime = params['lifetime']
+        self.equipment_cost = params['equipment_cost']
+        self.charge_efficiency = params['charge_efficiency']
+        self.generate_efficiency = params['generate_efficiency']
+        self.lower_heating_value = params['lower_heating_value']
 
     '''
-    储氢罐的充气速率 = 电解水制氢速率 （电解水制氢会满足热水需求?）
+    储氢罐的充气速率 = 电解水制氢速率 （电解水制氢放的热会满足热水需求?）
+    如何控制上述待补充
+    储氢罐的放气速率 = 供电 （电价低时多电解，电价高时释放）
     '''
+
     def step(self, action_hst):
-        energy = action_hst * self.ramp
-        current_energy = self.current_capacity * self.capacity
-        updated_capacity = max(self.min_soc, min(self.max_soc, (current_energy + energy) / self.capacity))
-        self.hydrogen_change = (updated_capacity - self.current_capacity) * self.capacity
-        self.current_capacity = updated_capacity  # update capacity to current state
+        energy = action_hst * self.capacity
+        updated_soc = max(self.min_soc, min(self.max_soc, (self.current_soc * self.capacity + energy) / self.capacity))
+        self.hydrogen_charge = (updated_soc - self.current_soc) * self.capacity
+        self.current_soc = updated_soc
 
-    def get_cost(self, energy_change):
-        cost = abs(energy_change) * self.degradation
+    def get_power(self):
+        if self.hydrogen_charge > 0:
+            return self.hydrogen_charge * self.charge_efficiency * self.lower_heating_value * self.generate_efficiency
+        else:
+            return 0
+
+    def get_heat(self):
+        if self.hydrogen_charge < 0:
+            return self.hydrogen_charge * self.charge_efficiency * (1 - self.generate_efficiency)
+        else:
+            return 0
+
+    def get_cost(self):
+        cost = self.equipment_cost / self.lifetime * abs(self.hydrogen_charge)
         return cost
 
-    def SOC(self):
-        return self.current_capacity
-
     def reset(self):
-        self.current_capacity = 0.2
+        self.current_soc = 0.1
 
 
 class Grid:
     def __init__(self):
         self.delta = 1
-        self.exchange_ability = 100
+        self.carbon_increace = 0.9
+        # self.trade_energy = None
 
-    def get_cost(self, current_price, energy_exchange):
-        return current_price * energy_exchange * self.delta
+    def get_cost(self, price, trade_energy):
+        return price * trade_energy * self.delta
+
+    def get_carbon(self, trade_energy):
+        return trade_energy * self.carbon_increace
+
+    # def step(self, action_grid, ec_power_max):
+    #     self.trade_energy = (action_grid + 1) / 2 * ec_power_max  # 反标准化
 
     def retrieve_past_price(self):
         result = []
diff --git a/models/parameters.py b/models/parameters.py
index 9ab1cc8..7321124 100644
--- a/models/parameters.py
+++ b/models/parameters.py
@@ -1,17 +1,21 @@
 EC_parameters = {
-    'electrolysis_efficiency': 0.8,
     'hydrogen_produce': 0.5,
     'power_max': 200,
     'power_min': 0,
     'ramp': 100,
     'lifetime': 6000,  # hour
     'equipment_cost': 10000,  # yuan
-    'carbon_reduce': 1,
+    'electrolysis_efficiency': 0.8,
+    'carbon_reduce': 0.9,
 }
 
 HST_parameters = {
     'capacity': 1000,
     'min_soc': 0.1,
     'max_soc': 0.9,
-    'efficiency': 0.95,
+    'lifetime': 6000,  # hour
+    'equipment_cost': 10000,  # yuan
+    'charge_efficiency': 0.95,
+    'generate_efficiency': 0.6,
+    'lower_heating_value': 33.33,
 }
diff --git a/models/tools.py b/models/tools.py
index 1432d1f..703e011 100644
--- a/models/tools.py
+++ b/models/tools.py
@@ -2,39 +2,24 @@ import torch
 
 
 def test_one_episode(env, act, device):
-    """to get evaluate information, here record the unbalance of after taking action"""
-    record_state = []
-    record_action = []
-    record_reward = []
-    record_unbalance = []
-    record_system_info = []  # [time,price,netload,action,real action,soc,output*4,unbalance(exchange+penalty),cost]
-    record_init_info = []  # include month,day,time,intial soc
+    """get evaluate information, record the unbalance of after taking action"""
+    record_system_info = []  # same as observation
+    record_init_info = []  # include month,day,time
     env.TRAIN = False
     state = env.reset()
-    record_init_info.append([env.month, env.day, env.current_time, env.battery.current_capacity])
-    print(f'current testing month is {env.month}, day is {env.day},initial_soc is {env.battery.current_capacity}')
+    record_init_info.append([env.month, env.day, env.current_time])
+    print(f'current testing month is {env.month}, day is {env.day}')
     for i in range(24):
         s_tensor = torch.as_tensor((state,), device=device)
         a_tensor = act(s_tensor)
-        action = a_tensor.detach().cpu().numpy()[0]  # not need detach(), because with torch.no_grad() outside
-        real_action = action
+        action = a_tensor.detach().cpu().numpy()[0]
         state, next_state, reward, done = env.step(action)
-
-        record_system_info.append([state[0], state[1], state[3] + env.wind.current_power, action, real_action,
-                                   env.battery.SOC(), env.battery.energy_change, next_state[4], next_state[5],
-                                   next_state[6], env.solar.current_power, env.wind.current_power, env.unbalance,
-                                   env.operation_cost, reward])
-        record_state.append(state)
-        record_action.append(real_action)
-        record_reward.append(reward)
-        record_unbalance.append(env.unbalance)
+        record_system_info.append([state[1], state[2], env.HST.current_soc(), env.HST.get_power(),
+                                   env.EC.current_power, action, reward])
         state = next_state
-    # add information of last step dg1, dh2, dg3, soc, tem, irr
-    record_system_info[-1][7:12] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2],
-                                    env.final_step_outputs[4], env.final_step_outputs[5]]
-    record_system_info[-1][5] = env.final_step_outputs[3]
-    record = {'init_info': record_init_info, 'system_info': record_system_info, 'state': record_state,
-              'action': record_action, 'reward': record_reward, 'unbalance': record_unbalance}
+    # add information of last step EC, HST.current_soc, HST.power, grid
+    record_system_info[-1][2:5] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2]]
+    record = {'init_info': record_init_info, 'system_info': record_system_info}
     return record
 
 
@@ -49,7 +34,7 @@ def get_episode_return(env, act, device):
         state, next_state, reward, done, = env.step(action)
         state = next_state
         episode_reward += reward
-        episode_unbalance += env.real_unbalance
+        episode_unbalance += env.unbalance
         if done:
             break
     return episode_reward, episode_unbalance
diff --git a/train.py b/train.py
index c1403df..d70cdae 100644
--- a/train.py
+++ b/train.py
@@ -3,11 +3,12 @@ import pickle
 
 os.environ['OMP_WAIT_POLICY'] = 'PASSIVE'  # 确保在pytorch前设置
 from copy import deepcopy
-import pandas as pd
+import numpy as np
+import torch
 import torch.nn.functional as F
 from models.env import WgzGym
 from models.net import ActorPPO, CriticAdv
-from models.tools import get_episode_return, test_one_episode
+from models.tools import get_episode_return
 
 
 def smooth_rewards(rewards, window=10):
@@ -170,8 +171,6 @@ class Arguments:
     def __init__(self, agent=None, env=None):
         self.agent = agent
         self.env = env
-        self.cwd = None  # current work directory. None means set automatically
-        self.if_remove = False  # remove the cwd folder? (True, False, None:ask me)
         self.visible_gpu = '0'  # os.environ['CUDA_VISIBLE_DEVICES'] = '0, 2,'
         self.num_threads = 32  # cpu_num for evaluate model
 
@@ -193,14 +192,8 @@ class Arguments:
         self.random_seed_list = [1234]
         self.train = True
         self.save_network = True
-        self.test_network = True
-        self.save_test_data = True
 
     def init_before_training(self):
-        if self.cwd is None:
-            agent_name = self.agent.__class__.__name__
-            self.cwd = f'./{agent_name}'
-
         np.random.seed(self.random_seed)
         torch.manual_seed(self.random_seed)
         torch.set_num_threads(self.num_threads)
@@ -216,7 +209,6 @@ if __name__ == '__main__':
     for seed in args.random_seed_list:
         args.random_seed = seed
         args.agent = AgentPPO()
-        agent_name = f'{args.agent.__class__.__name__}'
         args.agent.cri_target = True
         args.env = WgzGym()
         args.init_before_training()
@@ -225,9 +217,9 @@ if __name__ == '__main__':
         env = args.env
         agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate)
         gamma = args.gamma
-        batch_size = args.batch_size  # data used to update net
-        target_step = args.target_step  # steps of one episode should stop
-        repeat_times = args.repeat_times  # times should update for one batch size data
+        batch_size = args.batch_size
+        target_step = args.target_step
+        repeat_times = args.repeat_times
         soft_update_tau = args.soft_update_tau
         num_episode = args.num_episode
         agent.state = env.reset()
@@ -236,8 +228,6 @@ if __name__ == '__main__':
         '''init training params'''
         # args.train = False
         # args.save_network = False
-        # args.test_network = False
-        # args.save_test_data = False
         if args.train:
             for i_episode in range(num_episode):
                 with torch.no_grad():
@@ -255,9 +245,9 @@ if __name__ == '__main__':
                     reward_record['unbalance'].append(episode_unbalance)
                 print(f'epsiode: {i_episode}, reward: {episode_reward}, unbalance: {episode_unbalance}')
 
-    act_save_path = f'{args.cwd}/actor.pth'
-    loss_record_path = f'{args.cwd}/loss.pkl'
-    reward_record_path = f'{args.cwd}/reward.pkl'
+    act_save_path = './data/actor.pth'
+    loss_record_path = './data/loss.pkl'
+    reward_record_path = './data/reward.pkl'
 
     if args.save_network:
         with open(loss_record_path, 'wb') as tf:
@@ -266,16 +256,3 @@ if __name__ == '__main__':
             pickle.dump(reward_record, tf)
         torch.save(agent.act.state_dict(), act_save_path)
         print('actor params have been saved')
-
-    if args.test_network:
-        args.cwd = agent_name
-        agent.act.load_state_dict(torch.load(act_save_path))
-        print('params have been reload and test')
-        record = test_one_episode(env, agent.act, agent.device)
-        eval_data = pd.DataFrame(record['system_info'])
-        eval_data.columns = ['time_step', 'price', 'load', 'action', 'real_action', 'soc', 'battery',
-                             'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
-    if args.save_test_data:
-        test_data_save_path = f'{args.cwd}/test.pkl'
-        with open(test_data_save_path, 'wb') as tf:
-            pickle.dump(record, tf)