137 lines
4.9 KiB
Python
137 lines
4.9 KiB
Python
import random
|
|
from PPO_llm import *
|
|
from plotDRL import *
|
|
|
|
|
|
def generate_test_dates(num):
|
|
dates = []
|
|
months_days = {1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30, 7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31}
|
|
while len(dates) < num:
|
|
month = random.randint(1, 12)
|
|
day = random.randint(20, months_days[month])
|
|
if (month, day) not in dates:
|
|
dates.append((month, day))
|
|
return dates
|
|
|
|
|
|
def test_llm(env, llm_actions, index):
|
|
sum_reward = 0
|
|
sum_unbalance = 0
|
|
for i in range(24):
|
|
action = llm_actions[index + i]
|
|
state, next_state, reward, done = env.step(action)
|
|
sum_reward += reward
|
|
sum_unbalance += env.real_unbalance
|
|
record = {'reward': [sum_reward], 'unbalance': [sum_unbalance]}
|
|
return record
|
|
|
|
|
|
def test_one_episode(env, act, device):
|
|
state = env.reset()
|
|
sum_reward = 0
|
|
sum_unbalance = 0
|
|
for i in range(24):
|
|
s_tensor = torch.as_tensor((state,), device=device)
|
|
a_tensor = act(s_tensor)
|
|
action = a_tensor.detach().cpu().numpy()[0]
|
|
# rl_action = a_tensor.detach().cpu().numpy()[0]
|
|
# llm_action = llm_actions[index + i]
|
|
# action = 0.95 * np.array(rl_action) + 0.05 * np.array(llm_action)
|
|
state, next_state, reward, done = env.step(action)
|
|
state = next_state
|
|
sum_reward += reward
|
|
sum_unbalance += env.real_unbalance
|
|
record = {'reward': [sum_reward], 'unbalance': [sum_unbalance]}
|
|
return record
|
|
|
|
|
|
def run_multiple_tests(env, args, agent1, agent2, num_tests=10):
|
|
all_ppo = pd.DataFrame()
|
|
all_llm = pd.DataFrame()
|
|
all_lmppo = pd.DataFrame()
|
|
all_milp = pd.DataFrame()
|
|
|
|
test_dates = generate_test_dates(num_tests)
|
|
llm_actions = load_llm_actions('data/results.json')
|
|
|
|
for month, day in test_dates:
|
|
print(f'current testing month is {month}, day is {day}')
|
|
index = (sum(Constant.MONTHS_LEN[:month - 1]) + day - 1) * 24
|
|
|
|
ppo_res = test_one_episode(env, agent1.act, agent1.device)
|
|
base_res = optimization_base_result(env, month, day)
|
|
llm_res = test_llm(env, llm_actions, index)
|
|
lmppo_res = test_one_episode(env, agent2.act, agent2.device)
|
|
|
|
ppo_data = pd.DataFrame(ppo_res)
|
|
m_reward = - base_res['step_cost'].sum()/1e3
|
|
milp_data = pd.DataFrame({'reward': [m_reward]})
|
|
llm_data = pd.DataFrame(llm_res)
|
|
lmppo_data = pd.DataFrame(lmppo_res)
|
|
|
|
all_ppo = pd.concat([all_ppo, ppo_data], ignore_index=True)
|
|
all_milp = pd.concat([all_milp, milp_data], ignore_index=True)
|
|
all_llm = pd.concat([all_llm, llm_data], ignore_index=True)
|
|
all_lmppo = pd.concat([all_lmppo, lmppo_data], ignore_index=True)
|
|
|
|
test_ppo_path = f'{args.cwd}/melt_ppo.pkl'
|
|
test_llm_path = f'{args.cwd}/melt_llm.pkl'
|
|
test_lmppo_path = f'{args.cwd}/melt_lmppo.pkl'
|
|
test_milp_path = f'{args.cwd}/melt_milp.pkl'
|
|
|
|
with open(test_ppo_path, 'wb') as tf:
|
|
pickle.dump(all_ppo, tf)
|
|
with open(test_milp_path, 'wb') as tf:
|
|
pickle.dump(all_milp, tf)
|
|
with open(test_llm_path, 'wb') as f:
|
|
pickle.dump(all_llm, f)
|
|
with open(test_lmppo_path, 'wb') as tf:
|
|
pickle.dump(all_lmppo, tf)
|
|
|
|
plot_args = PlotArgs()
|
|
plot_args.feature_change = 'llm_1015'
|
|
args.cwd = agent_name
|
|
plot_dir = make_dir(args.cwd, plot_args.feature_change)
|
|
plot_melt(test_ppo_path, test_llm_path, test_lmppo_path, test_milp_path, plot_dir)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
args = Arguments()
|
|
args.visible_gpu = '0'
|
|
for seed in args.random_seed_list:
|
|
args.random_seed = seed
|
|
args.agent1 = args.agent2 = AgentPPO()
|
|
args.agent1.cri_target = args.agent2.cri_target = True
|
|
agent_name = f'{args.agent1.__class__.__name__}'
|
|
|
|
args.env = ESSEnv()
|
|
args.init_before_training()
|
|
|
|
agent1 = args.agent1
|
|
agent2 = args.agent2
|
|
env = args.env
|
|
agent1.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
|
args.if_gae_or_raw)
|
|
agent2.init(args.net_dim, env.state_space.shape[0], env.action_space.shape[0], args.learning_rate,
|
|
args.if_gae_or_raw)
|
|
cwd = args.cwd
|
|
gamma = args.gamma
|
|
batch_size = args.batch_size
|
|
target_step = args.target_step
|
|
repeat_times = args.repeat_times
|
|
soft_update_tau = args.soft_update_tau
|
|
agent1.state = agent2.state = env.reset()
|
|
buffer = list()
|
|
num_episode = args.num_episode
|
|
# args.test_network = False
|
|
|
|
if args.test_network:
|
|
args.cwd = agent_name
|
|
ppo_act_save_path = f'{args.cwd}/actor_10.pth'
|
|
lmppo_act_save_path = f'{args.cwd}/actor_llm_1015.pth'
|
|
agent1.act.load_state_dict(torch.load(ppo_act_save_path))
|
|
agent2.act.load_state_dict(torch.load(lmppo_act_save_path))
|
|
print('parameters have been reload and test')
|
|
env.TRAIN = False
|
|
run_multiple_tests(env, args, agent1, agent2, num_tests=12)
|