diff --git a/DDPG.py b/DDPG.py
index a79258a..0a9b6dd 100644
--- a/DDPG.py
+++ b/DDPG.py
@@ -48,16 +48,14 @@ if __name__ == '__main__':
         target_step = args.target_step  # how manysteps of one episode should stop
         repeat_times = args.repeat_times  # how many times should update for one batch size data
         soft_update_tau = args.soft_update_tau
-        # get the first experience from
-        agent.state = env.reset()
-        '''collect data and train and update network'''
         num_episode = args.num_episode
+        agent.state = env.reset()
+        '''collect data, train and update network'''
         # args.train=False
         # args.save_network=False
         # args.test_network=False
         # args.save_test_data=False
         # args.compare_with_gurobi=False
-
         if args.train:
             collect_data = True
             while collect_data:
diff --git a/PPO.py b/PPO.py
index 0e02e36..5fa3e2d 100644
--- a/PPO.py
+++ b/PPO.py
@@ -249,7 +249,7 @@ class Arguments:
         self.repeat_times = 2 ** 3  # collect target_step, then update network
         self.target_step = 4096  # repeatedly update network to keep critic's loss small
         self.max_memo = self.target_step  # capacity of replay buffer
-        self.if_per_or_gae = False  # GAE for on-policy sparse reward: Generalized Advantage Estimation.
+        self.if_gae_or_raw = True  # GAE for on-policy sparse reward: Generalized Advantage Estimation.
 
         '''Arguments for evaluate'''
         self.random_seed = 0  # initialize random seed in self.init_before_training()
@@ -270,7 +270,7 @@ class Arguments:
         if if_main:
             import shutil  # remove history according to bool(if_remove)
             if self.if_remove is None:
-                self.if_remove = bool(input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y')
+                self.if_remove = (input(f"| PRESS 'y' to REMOVE: {self.cwd}? ") == 'y')
             elif self.if_remove:
                 shutil.rmtree(self.cwd, ignore_errors=True)
                 print(f"| Remove cwd: {self.cwd}")
@@ -280,7 +280,6 @@ class Arguments:
         torch.manual_seed(self.random_seed)
         torch.set_num_threads(self.num_threads)
         torch.set_default_dtype(torch.float32)
-
         os.environ['CUDA_VISIBLE_DEVICES'] = str(self.visible_gpu)
 
 
@@ -308,7 +307,6 @@ if __name__ == '__main__':
     for seed in args.random_seed_list:
         args.random_seed = seed
         args.agent = AgentPPO()
-
         agent_name = f'{args.agent.__class__.__name__}'
         args.agent.cri_target = True
         args.env = ESSEnv()
@@ -317,19 +315,18 @@ if __name__ == '__main__':
         agent = args.agent
         env = args.env
         agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
-                   args.if_per_or_gae, layer_norm=True)
-
+                   args.if_gae_or_raw, layer_norm=True)
         cwd = args.cwd
         gamma = args.gamma
         batch_size = args.batch_size  # how much data should be used to update net
         target_step = args.target_step  # how manysteps of one episode should stop
         repeat_times = args.repeat_times  # how many times should update for one batch size data
         soft_update_tau = args.soft_update_tau
+        num_episode = args.num_episode
         agent.state = env.reset()
         '''init buffer'''
         buffer = list()
         '''init training parameters'''
-        num_episode = args.num_episode
         # args.train = False
         # args.save_network = False
         # args.test_network = False
@@ -388,7 +385,7 @@ if __name__ == '__main__':
         from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
 
         plot_args = PlotArgs()
-        plot_args.feature_change = ''
+        plot_args.feature_change = 'gae'
         args.cwd = agent_name
         plot_dir = make_dir(args.cwd, plot_args.feature_change)
         plot_optimization_result(base_result, plot_dir)
diff --git a/PPO_llm.py b/PPO_llm.py
index 24d4dbf..a1d4da7 100644
--- a/PPO_llm.py
+++ b/PPO_llm.py
@@ -249,7 +249,7 @@ class Arguments:
         self.repeat_times = 2 ** 3  # collect target_step, then update network
         self.target_step = 4096  # repeatedly update network to keep critic's loss small
         self.max_memo = self.target_step  # capacity of replay buffer
-        self.if_per_or_gae = False  # GAE for on-policy sparse reward: Generalized Advantage Estimation.
+        self.if_gae_or_raw = True  # GAE for on-policy sparse reward: Generalized Advantage Estimation.
 
         '''Arguments for evaluate'''
         self.random_seed = 0  # initialize random seed in self.init_before_training()
@@ -317,7 +317,7 @@ if __name__ == '__main__':
         agent = args.agent
         env = args.env
         agent.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
-                   args.if_per_or_gae, layer_norm=True)
+                   args.if_gae_or_raw, layer_norm=True)
 
         cwd = args.cwd
         gamma = args.gamma
@@ -389,7 +389,7 @@ if __name__ == '__main__':
         from plotDRL import PlotArgs, make_dir, plot_evaluation_information, plot_optimization_result
 
         plot_args = PlotArgs()
-        plot_args.feature_change = 'llm_5_no_mean'
+        plot_args.feature_change = 'llm'
         args.cwd = agent_name
         plot_dir = make_dir(args.cwd, plot_args.feature_change)
         plot_optimization_result(base_result, plot_dir)
diff --git a/SAC.py b/SAC.py
index 950953e..bfe8f67 100644
--- a/SAC.py
+++ b/SAC.py
@@ -47,20 +47,15 @@ if __name__ == '__main__':
         target_step = args.target_step  # how manysteps of one episode should stop
         repeat_times = args.repeat_times  # how many times should update for one batch size data
         soft_update_tau = args.soft_update_tau
-
         agent.state = env.reset()
-
         '''collect data and train and update network'''
         num_episode = args.num_episode
         '''here record real unbalance'''
-
-        ##
-        args.train = False
-        args.save_network = False
+        # args.train = False
+        # args.save_network = False
         # args.test_network = False
         # args.save_test_data = False
         # args.compare_with_gurobi = False
-        #
         if args.train:
             collect_data = True
             while collect_data:
diff --git a/TD3.py b/TD3.py
index aa2f390..5decf00 100644
--- a/TD3.py
+++ b/TD3.py
@@ -47,11 +47,9 @@ if __name__ == '__main__':
         target_step = args.target_step  # how manysteps of one episode should stop
         repeat_times = args.repeat_times  # how many times should update for one batch size data
         soft_update_tau = args.soft_update_tau
-
         agent.state = env.reset()
         '''collect data and train and update network'''
         num_episode = args.num_episode
-
         # args.train=False
         # args.save_network=False
         # args.test_network=False
diff --git a/environment.py b/environment.py
index ec80116..9f90c72 100644
--- a/environment.py
+++ b/environment.py
@@ -90,7 +90,7 @@ class ESSEnv(gym.Env):
         self.dg1.step(action[1])
         self.dg2.step(action[2])
         self.dg3.step(action[3])
-        self.solar.step(action[4], temperature, irradiance)
+        self.solar.step(temperature, irradiance, action[4])
         self.wind.step(wind_speed)
         self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
                                        -self.battery.energy_change))
diff --git a/environment_llm.py b/environment_llm.py
index 1e1d5a8..f8ad984 100644
--- a/environment_llm.py
+++ b/environment_llm.py
@@ -93,7 +93,7 @@ class ESSEnv(gym.Env):
         self.dg1.step(action[1])
         self.dg2.step(action[2])
         self.dg3.step(action[3])
-        self.solar.step(action[4], temperature, irradiance)
+        self.solar.step(temperature, irradiance, action[4])
         self.wind.step(wind_speed)
         self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
                                         -self.battery.energy_change))
diff --git a/environment_primal_dual.py b/environment_primal_dual.py
index 4ca18ba..5d9ee05 100644
--- a/environment_primal_dual.py
+++ b/environment_primal_dual.py
@@ -85,7 +85,7 @@ class ESSEnv(gym.Env):
         self.dg1.step(action[1])
         self.dg2.step(action[2])
         self.dg3.step(action[3])
-        self.solar.step(action[4], temperature, irradiance)
+        self.solar.step(temperature, irradiance, action[4])
         self.wind.step(wind_speed)
         self.current_output = np.array((self.dg1.current_output, self.dg2.current_output, self.dg3.current_output,
                                         -self.battery.energy_change, self.solar.current_power, self.wind.current_power))
diff --git a/module.py b/module.py
index f47be9f..17372b9 100644
--- a/module.py
+++ b/module.py
@@ -81,13 +81,14 @@ class Solar:
         self.opex_cofficient = parameters['O_c']
         self.refer_irradiance = parameters['I_ref']
         self.refer_temperature = parameters['T_ref']
+        self.change_percent = parameters['C_per']
 
     def step(self, temperature, irradiance, action_voltage=0):
         I_sc = self.sc_current * (irradiance / self.refer_irradiance)
         V_oc = self.oc_voltage + self.temper_coefficient * (temperature - self.refer_temperature)
 
         current = I_sc - (V_oc / self.sh_resistance)
-        self.current_power = max((1 + action_voltage) * self.base_voltage * current, 0)
+        self.current_power = max((1 + self.change_percent * action_voltage) * self.base_voltage * current, 0)
         return self.current_power
 
     def get_cost(self, current_power):