from fenglifadian.data.data_loader import Dataset_MTS from fenglifadian.cross_exp.exp_basic import Exp_Basic from fenglifadian.cross_models.cross_former import Crossformer from fenglifadian.utils.tools import EarlyStopping, adjust_learning_rate from fenglifadian.utils.metrics import metric import numpy as np import torch import torch.nn as nn from torch import optim from torch.utils.data import DataLoader from torch.nn import DataParallel import os import time import json import pickle import warnings warnings.filterwarnings('ignore') # 定义 Exp_crossformer 类,继承自 Exp_Basic。在初始化时调用父类的构造函数 class Exp_crossformer(Exp_Basic): def __init__(self, args): super(Exp_crossformer, self).__init__(args) # 这段代码的目的是为了在多GPU环境下高效地训练模型,通过nn.DataParallel实现数据并行性,利用多个GPU的计算能力来加快训练过程。 def _build_model(self): model = Crossformer( self.args.data_dim, # 7 self.args.in_len, # 96 self.args.out_len, # 24 self.args.seg_len, # 6 self.args.win_size, # 2 self.args.factor, # 10 self.args.d_model, # 256 self.args.d_ff, # 512 self.args.n_heads, # 4 self.args.e_layers, # 3 self.args.dropout, # 0.2 self.args.baseline, # True self.device ).float() if self.args.use_multi_gpu and self.args.use_gpu: model = nn.DataParallel(model, device_ids=self.args.device_ids) return model def _get_data(self, flag): args = self.args # drop_last 设置为 False,表示如果最后一个批次的样本数量少于 batch_size,则保留这个批次 if flag == 'test': shuffle_flag = False; drop_last = False; batch_size = args.batch_size; else: shuffle_flag = True; drop_last = False; batch_size = args.batch_size; data_set = Dataset_MTS( root_path=args.root_path, data_path=args.data_path, flag=flag, size=[args.in_len, args.out_len], data_split = args.data_split, ) print(flag, len(data_set)) """ 使用 PyTorch 的 DataLoader 创建一个数据加载器,负责将数据集分批次加载。参数包括: data_set: 之前创建的数据集实例。 batch_size: 每个批次的样本数量。 shuffle: 是否打乱数据顺序(根据 shuffle_flag)。 num_workers: 用于数据加载的子进程数量,通常设置为 CPU 核心数以提高加载速度。 drop_last: 是否丢弃最后一个不完整的批次(根据 drop_last) """ data_loader = DataLoader( data_set, batch_size=batch_size, shuffle=shuffle_flag, num_workers=args.num_workers, drop_last=drop_last) return data_set, data_loader # _select_optimizer 方法选择 Adam 优化器,并设置学习率。 def _select_optimizer(self): model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) return model_optim # _select_criterion 方法选择均方误差损失函数(MSE)。 def _select_criterion(self): criterion = nn.MSELoss() return criterion # 验证损失 def vali(self, vali_data, vali_loader, criterion): self.model.eval() total_loss = [] with torch.no_grad(): for i, (batch_x,batch_y) in enumerate(vali_loader): pred, true = self._process_one_batch( vali_data, batch_x, batch_y) loss = criterion(pred.detach().cpu(), true.detach().cpu()) total_loss.append(loss.detach().item()) total_loss = np.average(total_loss) self.model.train() # 切换回训练状态 return total_loss # 模型训练 def train(self, setting): train_data, train_loader = self._get_data(flag = 'train') vali_data, vali_loader = self._get_data(flag = 'val') test_data, test_loader = self._get_data(flag = 'test') path = os.path.join(self.args.checkpoints, setting) if not os.path.exists(path): os.makedirs(path) with open(os.path.join(path, "args.json"), 'w') as f: json.dump(vars(self.args), f, indent=True) scale_statistic = {'mean': train_data.scaler.mean, 'std': train_data.scaler.std} with open(os.path.join(path, "scale_statistic.pkl"), 'wb') as f: # 将训练数据的缩放统计信息(均值和标准差)保存为 pickle 文件。 pickle.dump(scale_statistic, f) train_steps = len(train_loader) # train_steps 计算训练步骤的数量 early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) model_optim = self._select_optimizer() criterion = self._select_criterion() for epoch in range(self.args.train_epochs): time_now = time.time() iter_count = 0 train_loss = [] self.model.train() epoch_time = time.time() for i, (batch_x,batch_y) in enumerate(train_loader): iter_count += 1 model_optim.zero_grad() pred, true = self._process_one_batch( train_data, batch_x, batch_y) loss = criterion(pred, true) train_loss.append(loss.item()) if (i+1) % 100==0: print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) speed = (time.time()-time_now)/iter_count left_time = speed*((self.args.train_epochs - epoch)*train_steps - i) print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) iter_count = 0 time_now = time.time() loss.backward() # 反向传播 model_optim.step() # 梯度更新 print("Epoch: {} cost time: {}".format(epoch+1, time.time()-epoch_time)) train_loss = np.average(train_loss) vali_loss = self.vali(vali_data, vali_loader, criterion) test_loss = self.vali(test_data, test_loader, criterion) print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format( epoch + 1, train_steps, train_loss, vali_loss, test_loss)) early_stopping(vali_loss, self.model, path) if early_stopping.early_stop: print("Early stopping") break adjust_learning_rate(model_optim, epoch+1, self.args) best_model_path = path+'/'+'checkpoint.pth' self.model.load_state_dict(torch.load(best_model_path)) state_dict = self.model.module.state_dict() if isinstance(self.model, DataParallel) else self.model.state_dict() torch.save(state_dict, path+'/'+'checkpoint.pth') return self.model def test(self, setting, save_pred = False, inverse = False): test_data, test_loader = self._get_data(flag='test') self.model.eval() preds = [] trues = [] metrics_all = [] instance_num = 0 with torch.no_grad(): for i, (batch_x,batch_y) in enumerate(test_loader): pred, true = self._process_one_batch( test_data, batch_x, batch_y, inverse) batch_size = pred.shape[0] # 32 instance_num += batch_size batch_metric = np.array(metric(pred.detach().cpu().numpy(), true.detach().cpu().numpy())) * batch_size # 计算平均值,所以这里乘以batch_size metrics_all.append(batch_metric) if (save_pred): preds.append(pred.detach().cpu().numpy()) trues.append(true.detach().cpu().numpy()) metrics_all = np.stack(metrics_all, axis = 0) metrics_mean = metrics_all.sum(axis = 0) / instance_num # 计算平均值,所以这里除以instance_num。 # result save folder_path = './results/' + setting +'/' if not os.path.exists(folder_path): os.makedirs(folder_path) mae, mse, rmse, mape, mspe = metrics_mean print('mse:{}, mae:{}'.format(mse, mae)) np.save(folder_path+'metrics.npy', np.array([mae, mse, rmse, mape, mspe])) if (save_pred): preds = np.concatenate(preds, axis = 0) trues = np.concatenate(trues, axis = 0) np.save(folder_path+'pred.npy', preds) np.save(folder_path+'true.npy', trues) return def _process_one_batch(self, dataset_object, batch_x, batch_y, inverse = False): batch_x = batch_x.float().to(self.device) batch_y = batch_y.float().to(self.device) outputs = self.model(batch_x) if inverse: outputs = dataset_object.inverse_transform(outputs) batch_y = dataset_object.inverse_transform(batch_y) return outputs, batch_y def _predict_batch(self, batch_x): batch_x = batch_x.float().to(self.device) outputs = self.model(batch_x) return outputs def eval(self, setting, save_pred = False, inverse = False): #evaluate a saved model args = self.args data_set = Dataset_MTS( root_path=args.root_path, data_path=args.data_path, flag='test', size=[args.in_len, args.out_len], data_split = args.data_split, scale = True, scale_statistic = args.scale_statistic, ) data_loader = DataLoader( data_set, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, drop_last=False) self.model.eval() preds = [] trues = [] metrics_all = [] instance_num = 0 with torch.no_grad(): for i, (batch_x,batch_y) in enumerate(data_loader): pred, true = self._process_one_batch( data_set, batch_x, batch_y, inverse) batch_size = pred.shape[0] instance_num += batch_size batch_metric = np.array(metric(pred.detach().cpu().numpy(), true.detach().cpu().numpy())) * batch_size metrics_all.append(batch_metric) if (save_pred): preds.append(pred.detach().cpu().numpy()) trues.append(true.detach().cpu().numpy()) metrics_all = np.stack(metrics_all, axis = 0) metrics_mean = metrics_all.sum(axis = 0) / instance_num # result save folder_path = './results/' + setting +'/' if not os.path.exists(folder_path): os.makedirs(folder_path) mae, mse, rmse, mape, mspe = metrics_mean print('mse:{}, mae:{}'.format(mse, mae)) np.save(folder_path+'metrics.npy', np.array([mae, mse, rmse, mape, mspe])) if (save_pred): preds = np.concatenate(preds, axis = 0) trues = np.concatenate(trues, axis = 0) np.save(folder_path+'pred.npy', preds) np.save(folder_path+'true.npy', trues) return mae, mse, rmse, mape, mspe