288 lines
11 KiB
Python
288 lines
11 KiB
Python
|
from guangfufadian.data.data_loader import Dataset_MTS
|
|||
|
from guangfufadian.cross_exp.exp_basic import Exp_Basic
|
|||
|
from guangfufadian.cross_models.cross_former import Crossformer
|
|||
|
|
|||
|
from guangfufadian.utils.tools import EarlyStopping, adjust_learning_rate
|
|||
|
from guangfufadian.utils.metrics import metric
|
|||
|
|
|||
|
import numpy as np
|
|||
|
|
|||
|
import torch
|
|||
|
import torch.nn as nn
|
|||
|
from torch import optim
|
|||
|
from torch.utils.data import DataLoader
|
|||
|
from torch.nn import DataParallel
|
|||
|
|
|||
|
import os
|
|||
|
import time
|
|||
|
import json
|
|||
|
import pickle
|
|||
|
|
|||
|
import warnings
|
|||
|
warnings.filterwarnings('ignore')
|
|||
|
# 定义 Exp_crossformer 类,继承自 Exp_Basic。在初始化时调用父类的构造函数
|
|||
|
class Exp_crossformer(Exp_Basic):
|
|||
|
def __init__(self, args):
|
|||
|
super(Exp_crossformer, self).__init__(args)
|
|||
|
# 这段代码的目的是为了在多GPU环境下高效地训练模型,通过nn.DataParallel实现数据并行性,利用多个GPU的计算能力来加快训练过程。
|
|||
|
def _build_model(self):
|
|||
|
model = Crossformer(
|
|||
|
self.args.data_dim, # 7
|
|||
|
self.args.in_len, # 96
|
|||
|
self.args.out_len, # 24
|
|||
|
self.args.seg_len, # 6
|
|||
|
self.args.win_size, # 2
|
|||
|
self.args.factor, # 10
|
|||
|
self.args.d_model, # 256
|
|||
|
self.args.d_ff, # 512
|
|||
|
self.args.n_heads, # 4
|
|||
|
self.args.e_layers, # 3
|
|||
|
self.args.dropout, # 0.2
|
|||
|
self.args.baseline, # True
|
|||
|
self.device
|
|||
|
).float()
|
|||
|
|
|||
|
if self.args.use_multi_gpu and self.args.use_gpu:
|
|||
|
model = nn.DataParallel(model, device_ids=self.args.device_ids)
|
|||
|
return model
|
|||
|
|
|||
|
def _get_data(self, flag):
|
|||
|
args = self.args
|
|||
|
# drop_last 设置为 False,表示如果最后一个批次的样本数量少于 batch_size,则保留这个批次
|
|||
|
if flag == 'test':
|
|||
|
shuffle_flag = False; drop_last = False; batch_size = args.batch_size;
|
|||
|
else:
|
|||
|
shuffle_flag = True; drop_last = False; batch_size = args.batch_size;
|
|||
|
data_set = Dataset_MTS(
|
|||
|
root_path=args.root_path,
|
|||
|
data_path=args.data_path,
|
|||
|
flag=flag,
|
|||
|
size=[args.in_len, args.out_len],
|
|||
|
data_split = args.data_split,
|
|||
|
)
|
|||
|
|
|||
|
print(flag, len(data_set))
|
|||
|
"""
|
|||
|
使用 PyTorch 的 DataLoader 创建一个数据加载器,负责将数据集分批次加载。参数包括:
|
|||
|
data_set: 之前创建的数据集实例。
|
|||
|
batch_size: 每个批次的样本数量。
|
|||
|
shuffle: 是否打乱数据顺序(根据 shuffle_flag)。
|
|||
|
num_workers: 用于数据加载的子进程数量,通常设置为 CPU 核心数以提高加载速度。
|
|||
|
drop_last: 是否丢弃最后一个不完整的批次(根据 drop_last)
|
|||
|
"""
|
|||
|
data_loader = DataLoader(
|
|||
|
data_set,
|
|||
|
batch_size=batch_size,
|
|||
|
shuffle=shuffle_flag,
|
|||
|
num_workers=args.num_workers,
|
|||
|
drop_last=drop_last)
|
|||
|
|
|||
|
return data_set, data_loader
|
|||
|
# _select_optimizer 方法选择 Adam 优化器,并设置学习率。
|
|||
|
def _select_optimizer(self):
|
|||
|
model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
|
|||
|
return model_optim
|
|||
|
# _select_criterion 方法选择均方误差损失函数(MSE)。
|
|||
|
def _select_criterion(self):
|
|||
|
criterion = nn.MSELoss()
|
|||
|
return criterion
|
|||
|
# 验证损失
|
|||
|
def vali(self, vali_data, vali_loader, criterion):
|
|||
|
self.model.eval()
|
|||
|
total_loss = []
|
|||
|
with torch.no_grad():
|
|||
|
for i, (batch_x,batch_y) in enumerate(vali_loader):
|
|||
|
pred, true = self._process_one_batch(
|
|||
|
vali_data, batch_x, batch_y)
|
|||
|
loss = criterion(pred.detach().cpu(), true.detach().cpu())
|
|||
|
total_loss.append(loss.detach().item())
|
|||
|
total_loss = np.average(total_loss)
|
|||
|
self.model.train() # 切换回训练状态
|
|||
|
return total_loss
|
|||
|
# 模型训练
|
|||
|
def train(self, setting):
|
|||
|
train_data, train_loader = self._get_data(flag = 'train')
|
|||
|
vali_data, vali_loader = self._get_data(flag = 'val')
|
|||
|
test_data, test_loader = self._get_data(flag = 'test')
|
|||
|
|
|||
|
path = os.path.join(self.args.checkpoints, setting)
|
|||
|
if not os.path.exists(path):
|
|||
|
os.makedirs(path)
|
|||
|
with open(os.path.join(path, "args.json"), 'w') as f:
|
|||
|
json.dump(vars(self.args), f, indent=True)
|
|||
|
scale_statistic = {'mean': train_data.scaler.mean, 'std': train_data.scaler.std}
|
|||
|
with open(os.path.join(path, "scale_statistic.pkl"), 'wb') as f: # 将训练数据的缩放统计信息(均值和标准差)保存为 pickle 文件。
|
|||
|
pickle.dump(scale_statistic, f)
|
|||
|
|
|||
|
train_steps = len(train_loader) # train_steps 计算训练步骤的数量
|
|||
|
early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
|
|||
|
|
|||
|
model_optim = self._select_optimizer()
|
|||
|
criterion = self._select_criterion()
|
|||
|
|
|||
|
for epoch in range(self.args.train_epochs):
|
|||
|
time_now = time.time()
|
|||
|
iter_count = 0
|
|||
|
train_loss = []
|
|||
|
|
|||
|
self.model.train()
|
|||
|
epoch_time = time.time()
|
|||
|
for i, (batch_x,batch_y) in enumerate(train_loader):
|
|||
|
iter_count += 1
|
|||
|
|
|||
|
model_optim.zero_grad()
|
|||
|
pred, true = self._process_one_batch(
|
|||
|
train_data, batch_x, batch_y)
|
|||
|
loss = criterion(pred, true)
|
|||
|
train_loss.append(loss.item())
|
|||
|
|
|||
|
if (i+1) % 100==0:
|
|||
|
print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
|
|||
|
speed = (time.time()-time_now)/iter_count
|
|||
|
left_time = speed*((self.args.train_epochs - epoch)*train_steps - i)
|
|||
|
print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
|
|||
|
iter_count = 0
|
|||
|
time_now = time.time()
|
|||
|
|
|||
|
loss.backward() # 反向传播
|
|||
|
model_optim.step() # 梯度更新
|
|||
|
|
|||
|
print("Epoch: {} cost time: {}".format(epoch+1, time.time()-epoch_time))
|
|||
|
train_loss = np.average(train_loss)
|
|||
|
vali_loss = self.vali(vali_data, vali_loader, criterion)
|
|||
|
test_loss = self.vali(test_data, test_loader, criterion)
|
|||
|
|
|||
|
print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
|
|||
|
epoch + 1, train_steps, train_loss, vali_loss, test_loss))
|
|||
|
early_stopping(vali_loss, self.model, path)
|
|||
|
if early_stopping.early_stop:
|
|||
|
print("Early stopping")
|
|||
|
break
|
|||
|
|
|||
|
adjust_learning_rate(model_optim, epoch+1, self.args)
|
|||
|
|
|||
|
best_model_path = path+'/'+'checkpoint.pth'
|
|||
|
self.model.load_state_dict(torch.load(best_model_path))
|
|||
|
state_dict = self.model.module.state_dict() if isinstance(self.model, DataParallel) else self.model.state_dict()
|
|||
|
torch.save(state_dict, path+'/'+'checkpoint.pth')
|
|||
|
|
|||
|
return self.model
|
|||
|
|
|||
|
def test(self, setting, save_pred = False, inverse = False):
|
|||
|
test_data, test_loader = self._get_data(flag='test')
|
|||
|
|
|||
|
self.model.eval()
|
|||
|
|
|||
|
preds = []
|
|||
|
trues = []
|
|||
|
metrics_all = []
|
|||
|
instance_num = 0
|
|||
|
|
|||
|
with torch.no_grad():
|
|||
|
for i, (batch_x,batch_y) in enumerate(test_loader):
|
|||
|
pred, true = self._process_one_batch(
|
|||
|
test_data, batch_x, batch_y, inverse)
|
|||
|
batch_size = pred.shape[0] # 32
|
|||
|
instance_num += batch_size
|
|||
|
batch_metric = np.array(metric(pred.detach().cpu().numpy(), true.detach().cpu().numpy())) * batch_size # 计算平均值,所以这里乘以batch_size
|
|||
|
metrics_all.append(batch_metric)
|
|||
|
if (save_pred):
|
|||
|
preds.append(pred.detach().cpu().numpy())
|
|||
|
trues.append(true.detach().cpu().numpy())
|
|||
|
|
|||
|
metrics_all = np.stack(metrics_all, axis = 0)
|
|||
|
metrics_mean = metrics_all.sum(axis = 0) / instance_num # 计算平均值,所以这里除以instance_num。
|
|||
|
|
|||
|
# result save
|
|||
|
folder_path = './results/' + setting +'/'
|
|||
|
if not os.path.exists(folder_path):
|
|||
|
os.makedirs(folder_path)
|
|||
|
|
|||
|
mae, mse, rmse, mape, mspe = metrics_mean
|
|||
|
print('mse:{}, mae:{}'.format(mse, mae))
|
|||
|
|
|||
|
np.save(folder_path+'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
|
|||
|
if (save_pred):
|
|||
|
preds = np.concatenate(preds, axis = 0)
|
|||
|
trues = np.concatenate(trues, axis = 0)
|
|||
|
np.save(folder_path+'pred.npy', preds)
|
|||
|
np.save(folder_path+'true.npy', trues)
|
|||
|
|
|||
|
return
|
|||
|
def _process_one_batch(self, dataset_object, batch_x, batch_y, inverse = False):
|
|||
|
|
|||
|
batch_x = batch_x.float().to(self.device)
|
|||
|
batch_y = batch_y.float().to(self.device)
|
|||
|
|
|||
|
outputs = self.model(batch_x)
|
|||
|
|
|||
|
if inverse:
|
|||
|
outputs = dataset_object.inverse_transform(outputs)
|
|||
|
batch_y = dataset_object.inverse_transform(batch_y)
|
|||
|
|
|||
|
return outputs, batch_y
|
|||
|
|
|||
|
def _predict_batch(self, batch_x):
|
|||
|
batch_x = batch_x.float().to(self.device)
|
|||
|
outputs = self.model(batch_x)
|
|||
|
return outputs
|
|||
|
|
|||
|
def eval(self, setting, save_pred = False, inverse = False):
|
|||
|
#evaluate a saved model
|
|||
|
args = self.args
|
|||
|
data_set = Dataset_MTS(
|
|||
|
root_path=args.root_path,
|
|||
|
data_path=args.data_path,
|
|||
|
flag='test',
|
|||
|
size=[args.in_len, args.out_len],
|
|||
|
data_split = args.data_split,
|
|||
|
scale = True,
|
|||
|
scale_statistic = args.scale_statistic,
|
|||
|
)
|
|||
|
|
|||
|
data_loader = DataLoader(
|
|||
|
data_set,
|
|||
|
batch_size=args.batch_size,
|
|||
|
shuffle=False,
|
|||
|
num_workers=args.num_workers,
|
|||
|
drop_last=False)
|
|||
|
|
|||
|
self.model.eval()
|
|||
|
|
|||
|
preds = []
|
|||
|
trues = []
|
|||
|
metrics_all = []
|
|||
|
instance_num = 0
|
|||
|
|
|||
|
with torch.no_grad():
|
|||
|
for i, (batch_x,batch_y) in enumerate(data_loader):
|
|||
|
pred, true = self._process_one_batch(
|
|||
|
data_set, batch_x, batch_y, inverse)
|
|||
|
batch_size = pred.shape[0]
|
|||
|
instance_num += batch_size
|
|||
|
batch_metric = np.array(metric(pred.detach().cpu().numpy(), true.detach().cpu().numpy())) * batch_size
|
|||
|
metrics_all.append(batch_metric)
|
|||
|
if (save_pred):
|
|||
|
preds.append(pred.detach().cpu().numpy())
|
|||
|
trues.append(true.detach().cpu().numpy())
|
|||
|
|
|||
|
metrics_all = np.stack(metrics_all, axis = 0)
|
|||
|
metrics_mean = metrics_all.sum(axis = 0) / instance_num
|
|||
|
|
|||
|
# result save
|
|||
|
folder_path = './results/' + setting +'/'
|
|||
|
if not os.path.exists(folder_path):
|
|||
|
os.makedirs(folder_path)
|
|||
|
|
|||
|
mae, mse, rmse, mape, mspe = metrics_mean
|
|||
|
print('mse:{}, mae:{}'.format(mse, mae))
|
|||
|
|
|||
|
np.save(folder_path+'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
|
|||
|
if (save_pred):
|
|||
|
preds = np.concatenate(preds, axis = 0)
|
|||
|
trues = np.concatenate(trues, axis = 0)
|
|||
|
np.save(folder_path+'pred.npy', preds)
|
|||
|
np.save(folder_path+'true.npy', trues)
|
|||
|
|
|||
|
return mae, mse, rmse, mape, mspe
|