ai-station-code/guangfufadian/cross_exp/exp_crossformer.py

288 lines
11 KiB
Python
Raw Normal View History

2025-05-06 11:18:48 +08:00
from guangfufadian.data.data_loader import Dataset_MTS
from guangfufadian.cross_exp.exp_basic import Exp_Basic
from guangfufadian.cross_models.cross_former import Crossformer
from guangfufadian.utils.tools import EarlyStopping, adjust_learning_rate
from guangfufadian.utils.metrics import metric
import numpy as np
import torch
import torch.nn as nn
from torch import optim
from torch.utils.data import DataLoader
from torch.nn import DataParallel
import os
import time
import json
import pickle
import warnings
warnings.filterwarnings('ignore')
# 定义 Exp_crossformer 类,继承自 Exp_Basic。在初始化时调用父类的构造函数
class Exp_crossformer(Exp_Basic):
def __init__(self, args):
super(Exp_crossformer, self).__init__(args)
# 这段代码的目的是为了在多GPU环境下高效地训练模型通过nn.DataParallel实现数据并行性利用多个GPU的计算能力来加快训练过程。
def _build_model(self):
model = Crossformer(
self.args.data_dim, # 7
self.args.in_len, # 96
self.args.out_len, # 24
self.args.seg_len, # 6
self.args.win_size, # 2
self.args.factor, # 10
self.args.d_model, # 256
self.args.d_ff, # 512
self.args.n_heads, # 4
self.args.e_layers, # 3
self.args.dropout, # 0.2
self.args.baseline, # True
self.device
).float()
if self.args.use_multi_gpu and self.args.use_gpu:
model = nn.DataParallel(model, device_ids=self.args.device_ids)
return model
def _get_data(self, flag):
args = self.args
# drop_last 设置为 False表示如果最后一个批次的样本数量少于 batch_size则保留这个批次
if flag == 'test':
shuffle_flag = False; drop_last = False; batch_size = args.batch_size;
else:
shuffle_flag = True; drop_last = False; batch_size = args.batch_size;
data_set = Dataset_MTS(
root_path=args.root_path,
data_path=args.data_path,
flag=flag,
size=[args.in_len, args.out_len],
data_split = args.data_split,
)
print(flag, len(data_set))
"""
使用 PyTorch DataLoader 创建一个数据加载器负责将数据集分批次加载参数包括
data_set: 之前创建的数据集实例
batch_size: 每个批次的样本数量
shuffle: 是否打乱数据顺序根据 shuffle_flag
num_workers: 用于数据加载的子进程数量通常设置为 CPU 核心数以提高加载速度
drop_last: 是否丢弃最后一个不完整的批次根据 drop_last
"""
data_loader = DataLoader(
data_set,
batch_size=batch_size,
shuffle=shuffle_flag,
num_workers=args.num_workers,
drop_last=drop_last)
return data_set, data_loader
# _select_optimizer 方法选择 Adam 优化器,并设置学习率。
def _select_optimizer(self):
model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
return model_optim
# _select_criterion 方法选择均方误差损失函数MSE
def _select_criterion(self):
criterion = nn.MSELoss()
return criterion
# 验证损失
def vali(self, vali_data, vali_loader, criterion):
self.model.eval()
total_loss = []
with torch.no_grad():
for i, (batch_x,batch_y) in enumerate(vali_loader):
pred, true = self._process_one_batch(
vali_data, batch_x, batch_y)
loss = criterion(pred.detach().cpu(), true.detach().cpu())
total_loss.append(loss.detach().item())
total_loss = np.average(total_loss)
self.model.train() # 切换回训练状态
return total_loss
# 模型训练
def train(self, setting):
train_data, train_loader = self._get_data(flag = 'train')
vali_data, vali_loader = self._get_data(flag = 'val')
test_data, test_loader = self._get_data(flag = 'test')
path = os.path.join(self.args.checkpoints, setting)
if not os.path.exists(path):
os.makedirs(path)
with open(os.path.join(path, "args.json"), 'w') as f:
json.dump(vars(self.args), f, indent=True)
scale_statistic = {'mean': train_data.scaler.mean, 'std': train_data.scaler.std}
with open(os.path.join(path, "scale_statistic.pkl"), 'wb') as f: # 将训练数据的缩放统计信息(均值和标准差)保存为 pickle 文件。
pickle.dump(scale_statistic, f)
train_steps = len(train_loader) # train_steps 计算训练步骤的数量
early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
model_optim = self._select_optimizer()
criterion = self._select_criterion()
for epoch in range(self.args.train_epochs):
time_now = time.time()
iter_count = 0
train_loss = []
self.model.train()
epoch_time = time.time()
for i, (batch_x,batch_y) in enumerate(train_loader):
iter_count += 1
model_optim.zero_grad()
pred, true = self._process_one_batch(
train_data, batch_x, batch_y)
loss = criterion(pred, true)
train_loss.append(loss.item())
if (i+1) % 100==0:
print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
speed = (time.time()-time_now)/iter_count
left_time = speed*((self.args.train_epochs - epoch)*train_steps - i)
print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
iter_count = 0
time_now = time.time()
loss.backward() # 反向传播
model_optim.step() # 梯度更新
print("Epoch: {} cost time: {}".format(epoch+1, time.time()-epoch_time))
train_loss = np.average(train_loss)
vali_loss = self.vali(vali_data, vali_loader, criterion)
test_loss = self.vali(test_data, test_loader, criterion)
print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
epoch + 1, train_steps, train_loss, vali_loss, test_loss))
early_stopping(vali_loss, self.model, path)
if early_stopping.early_stop:
print("Early stopping")
break
adjust_learning_rate(model_optim, epoch+1, self.args)
best_model_path = path+'/'+'checkpoint.pth'
self.model.load_state_dict(torch.load(best_model_path))
state_dict = self.model.module.state_dict() if isinstance(self.model, DataParallel) else self.model.state_dict()
torch.save(state_dict, path+'/'+'checkpoint.pth')
return self.model
def test(self, setting, save_pred = False, inverse = False):
test_data, test_loader = self._get_data(flag='test')
self.model.eval()
preds = []
trues = []
metrics_all = []
instance_num = 0
with torch.no_grad():
for i, (batch_x,batch_y) in enumerate(test_loader):
pred, true = self._process_one_batch(
test_data, batch_x, batch_y, inverse)
batch_size = pred.shape[0] # 32
instance_num += batch_size
batch_metric = np.array(metric(pred.detach().cpu().numpy(), true.detach().cpu().numpy())) * batch_size # 计算平均值所以这里乘以batch_size
metrics_all.append(batch_metric)
if (save_pred):
preds.append(pred.detach().cpu().numpy())
trues.append(true.detach().cpu().numpy())
metrics_all = np.stack(metrics_all, axis = 0)
metrics_mean = metrics_all.sum(axis = 0) / instance_num # 计算平均值所以这里除以instance_num。
# result save
folder_path = './results/' + setting +'/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
mae, mse, rmse, mape, mspe = metrics_mean
print('mse:{}, mae:{}'.format(mse, mae))
np.save(folder_path+'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
if (save_pred):
preds = np.concatenate(preds, axis = 0)
trues = np.concatenate(trues, axis = 0)
np.save(folder_path+'pred.npy', preds)
np.save(folder_path+'true.npy', trues)
return
def _process_one_batch(self, dataset_object, batch_x, batch_y, inverse = False):
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float().to(self.device)
outputs = self.model(batch_x)
if inverse:
outputs = dataset_object.inverse_transform(outputs)
batch_y = dataset_object.inverse_transform(batch_y)
return outputs, batch_y
def _predict_batch(self, batch_x):
batch_x = batch_x.float().to(self.device)
outputs = self.model(batch_x)
return outputs
def eval(self, setting, save_pred = False, inverse = False):
#evaluate a saved model
args = self.args
data_set = Dataset_MTS(
root_path=args.root_path,
data_path=args.data_path,
flag='test',
size=[args.in_len, args.out_len],
data_split = args.data_split,
scale = True,
scale_statistic = args.scale_statistic,
)
data_loader = DataLoader(
data_set,
batch_size=args.batch_size,
shuffle=False,
num_workers=args.num_workers,
drop_last=False)
self.model.eval()
preds = []
trues = []
metrics_all = []
instance_num = 0
with torch.no_grad():
for i, (batch_x,batch_y) in enumerate(data_loader):
pred, true = self._process_one_batch(
data_set, batch_x, batch_y, inverse)
batch_size = pred.shape[0]
instance_num += batch_size
batch_metric = np.array(metric(pred.detach().cpu().numpy(), true.detach().cpu().numpy())) * batch_size
metrics_all.append(batch_metric)
if (save_pred):
preds.append(pred.detach().cpu().numpy())
trues.append(true.detach().cpu().numpy())
metrics_all = np.stack(metrics_all, axis = 0)
metrics_mean = metrics_all.sum(axis = 0) / instance_num
# result save
folder_path = './results/' + setting +'/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
mae, mse, rmse, mape, mspe = metrics_mean
print('mse:{}, mae:{}'.format(mse, mae))
np.save(folder_path+'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
if (save_pred):
preds = np.concatenate(preds, axis = 0)
trues = np.concatenate(trues, axis = 0)
np.save(folder_path+'pred.npy', preds)
np.save(folder_path+'true.npy', trues)
return mae, mse, rmse, mape, mspe