diff --git a/models/env.py b/models/env.py
index c33707b..ac3c554 100644
--- a/models/env.py
+++ b/models/env.py
@@ -25,6 +25,8 @@ class WgzGym(gym.Env):
         self.a = 0.5
         self.b = 0.3
         self.c = 0.2
+        self.heat_a = 0.6
+        self.power_a = 0.4
         self.EC_parameters = kwargs.get('EC_parameters', EC_parameters)  # 电解水制氢器
         self.HST_parameters = kwargs.get('dg_parameters', dg_parameters)  # 储氢罐
 
@@ -53,6 +55,7 @@ class WgzGym(gym.Env):
     def _build_state(self):
         hst_soc = self.HST.current_soc
         ec_out = self.EC.get_hydrogen()
+        grid_ex = self.grid
         time_step = self.current_time
 
         price = self.data_manager.get_price_data(self.month, self.day, self.current_time)
@@ -64,13 +67,13 @@ class WgzGym(gym.Env):
 
         obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(temper),
                               np.float32(solar), np.float32(load), np.float32(heat),
-                              np.float32(people), np.float32(ec_out), np.float32(hst_soc), np.float32(wind)), axis=None)
+                              np.float32(people), np.float32(ec_out), np.float32(hst_soc), np.float32(grid_ex)), axis=None)
         return obs
 
     def step(self, action):
-        # 在每个组件中添加动作
+        # 每个组件执行动作 one step
         current_obs = self._build_state()
-        self.EC.step(action[0])  # 执行状态转换，电池当前容量也改变
+        self.EC.step(action[0])
         self.HST.step(action[1])
         self.grid.step(action[2])
         price = current_obs[1]
@@ -78,26 +81,27 @@ class WgzGym(gym.Env):
         solar = current_obs[3]
         load = current_obs[4]
         heat = current_obs[5]
+
         gym_to_grid = solar + self.HST.get_power() - self.EC.current_power - load
+        heat_penalty = 0
 
         # reward = 0.0
         sell_benefit, buy_cost = 0, 0
-        excess_penalty, deficient_penalty = 0, 0
+        # power_penalty = 0
         if gym_to_grid >= 0:  # 过剩
             sell_benefit = self.grid.get_cost(price, gym_to_grid) * self.sell_coefficient
-            excess_penalty = gym_to_grid * self.penalty_coefficient
         else:  # 缺少
             buy_cost = self.grid.get_cost(price, abs(gym_to_grid))
-            deficient_penalty = abs(gym_to_grid) * self.penalty_coefficient
+        power_penalty = abs(gym_to_grid) * self.penalty_coefficient
 
         hst_cost = self.HST.get_cost()
         ec_cost = self.EC.get_cost(price)
         solar_cost = solar  # 待补充
 
         economic_cost = hst_cost + ec_cost + solar_cost - sell_benefit + buy_cost
-        demand_cost = excess_penalty + deficient_penalty
+        demand_cost = self.heat_a * heat_penalty + self.power_a * power_penalty
         eco_benifit = 0
-        reward = - self.a * economic_cost - self.b * demand_cost + self.c * eco_benifit
+        reward = - self.a * demand_cost - self.b * economic_cost + self.c * eco_benifit
 
         self.unbalance = gym_to_grid
         final_step_outputs = [self.HST.current_soc, self.EC.current_power, self.grid.current_power]
diff --git a/models/module.py b/models/module.py
index fdf284d..e3293f2 100644
--- a/models/module.py
+++ b/models/module.py
@@ -22,6 +22,9 @@ class EC:
     def get_hydrogen(self):
         return self.current_power * self.electricity_efficiency * self.hydrogen_produce
 
+    def get_heat(self):
+        return self.current_power * (1 - self.electricity_efficiency)
+
     def get_carbon(self):
         return self.current_power * self.carbon_reduce
 
@@ -51,6 +54,7 @@ class HST:
     
     储氢罐的放气速率 = 供电 （电价低时多电解，电价高时释放）
     '''
+
     def step(self, action_hst):
         energy = action_hst * self.ramp
         updated_soc = max(self.min_soc, min(self.max_soc, (self.current_soc * self.capacity + energy) / self.capacity))
@@ -59,7 +63,13 @@ class HST:
 
     def get_power(self):
         if self.hydrogen_charge > 0:
-            return self.hydrogen_charge * self.lower_heating_value * self.charge_efficiency * self.generate_efficiency
+            return self.hydrogen_charge * self.charge_efficiency * self.lower_heating_value * self.generate_efficiency
+        else:
+            return 0
+
+    def get_heat(self):
+        if self.hydrogen_charge < 0:
+            return self.hydrogen_charge * self.charge_efficiency * (1 - self.generate_efficiency)
         else:
             return 0
 
diff --git a/models/tools.py b/models/tools.py
index 7440af6..16253b3 100644
--- a/models/tools.py
+++ b/models/tools.py
@@ -2,39 +2,30 @@ import torch
 
 
 def test_one_episode(env, act, device):
-    """to get evaluate information, here record the unbalance of after taking action"""
-    record_state = []
-    record_action = []
-    record_reward = []
-    record_unbalance = []
-    record_system_info = []  # [time,price,netload,action,real action,soc,output*4,unbalance(exchange+penalty),cost]
-    record_init_info = []  # include month,day,time,intial soc
+    """get evaluate information, record the unbalance of after taking action"""
+    record_system_info = []  # same as observation
+    record_init_info = []  # include month,day,time
     env.TRAIN = False
     state = env.reset()
-    record_init_info.append([env.month, env.day, env.current_time, env.battery.current_soc])
-    print(f'current testing month is {env.month}, day is {env.day},initial_soc is {env.battery.current_soc}')
+    record_init_info.append([env.month, env.day, env.current_time])
+    print(f'current testing month is {env.month}, day is {env.day}')
     for i in range(24):
         s_tensor = torch.as_tensor((state,), device=device)
         a_tensor = act(s_tensor)
-        action = a_tensor.detach().cpu().numpy()[0]  # not need detach(), because with torch.no_grad() outside
-        real_action = action
+        action = a_tensor.detach().cpu().numpy()[0]
         state, next_state, reward, done = env.step(action)
-
-        record_system_info.append([state[0], state[1], state[3] + env.wind.current_power, action, real_action,
-                                   env.battery.SOC(), env.battery.energy_change, next_state[4], next_state[5],
-                                   next_state[6], env.solar.current_power, env.wind.current_power, env.unbalance,
-                                   env.operation_cost, reward])
-        record_state.append(state)
-        record_action.append(real_action)
-        record_reward.append(reward)
-        record_unbalance.append(env.unbalance)
+        obs = np.concatenate((np.float32(time_step), np.float32(price), np.float32(temper),
+                              np.float32(solar), np.float32(load), np.float32(heat),
+                              np.float32(people), np.float32(ec_out), np.float32(hst_soc), np.float32(wind)), axis=None)
+        record_system_info.append([state[0], state[1], state[2], action, EC.current_power(),
+                                   env.HST.current_soc(), env.HST.get_power(), next_state[4], next_state[5],
+                                   next_state[6], env.solar.current_power, env.power_demand, env.heat_demand, reward])
         state = next_state
-    # add information of last step dg1, dh2, dg3, soc, tem, irr
+    # add information of last step EC, HST.current_soc, HST.power, grid
     record_system_info[-1][7:12] = [env.final_step_outputs[0], env.final_step_outputs[1], env.final_step_outputs[2],
                                     env.final_step_outputs[4], env.final_step_outputs[5]]
     record_system_info[-1][5] = env.final_step_outputs[3]
-    record = {'init_info': record_init_info, 'system_info': record_system_info, 'state': record_state,
-              'action': record_action, 'reward': record_reward, 'unbalance': record_unbalance}
+    record = {'init_info': record_init_info, 'system_info': record_system_info}
     return record
 
 
diff --git a/train.py b/train.py
index c1403df..23fa770 100644
--- a/train.py
+++ b/train.py
@@ -270,12 +270,10 @@ if __name__ == '__main__':
     if args.test_network:
         args.cwd = agent_name
         agent.act.load_state_dict(torch.load(act_save_path))
-        print('params have been reload and test')
         record = test_one_episode(env, agent.act, agent.device)
-        eval_data = pd.DataFrame(record['system_info'])
-        eval_data.columns = ['time_step', 'price', 'load', 'action', 'real_action', 'soc', 'battery',
-                             'gen1', 'gen2', 'gen3', 'pv', 'wind', 'unbalance', 'operation_cost', 'reward']
+
     if args.save_test_data:
         test_data_save_path = f'{args.cwd}/test.pkl'
         with open(test_data_save_path, 'wb') as tf:
             pickle.dump(record, tf)
+        print('test data have been saved')