import os import numpy as np import pandas as pd import torch from torch.utils.data import Dataset, DataLoader from fenglifadian.utils.tools import StandardScaler import warnings warnings.filterwarnings('ignore') class Dataset_MTS(Dataset): def __init__(self, root_path, data_path='ETTh1.csv', flag='train', size=None, data_split = [0.7, 0.1, 0.2], scale=True, scale_statistic=None): # size [seq_len, label_len, pred_len] # info self.in_len = size[0] self.out_len = size[1] # init assert flag in ['train', 'test', 'val'] type_map = {'train':0, 'val':1, 'test':2} self.set_type = type_map[flag] self.scale = scale #self.inverse = inverse self.root_path = root_path self.data_path = data_path self.data_split = data_split self.scale_statistic = scale_statistic self.__read_data__() def __read_data__(self): df_raw = pd.read_csv(os.path.join(self.root_path, self.data_path)) if (self.data_split[0] > 1): train_num = self.data_split[0]; val_num = self.data_split[1]; test_num = self.data_split[2]; else: train_num = int(len(df_raw)*self.data_split[0]); test_num = int(len(df_raw)*self.data_split[2]) val_num = len(df_raw) - train_num - test_num; border1s = [0, train_num - self.in_len, train_num + val_num - self.in_len] # 左边界 border2s = [train_num, train_num+val_num, train_num + val_num + test_num] # 右边界 border1 = border1s[self.set_type] border2 = border2s[self.set_type] cols_data = df_raw.columns[1:] df_data = df_raw[cols_data] # 数据归一化的逻辑 if self.scale: if self.scale_statistic is None: self.scaler = StandardScaler() train_data = df_data[border1s[0]:border2s[0]] self.scaler.fit(train_data.values) else: self.scaler = StandardScaler(mean = self.scale_statistic['mean'], std = self.scale_statistic['std']) data = self.scaler.transform(df_data.values) else: data = df_data.values self.data_x = data[border1:border2] self.data_y = data[border1:border2] # 该方法根据给定的索引 index 提取输入序列和目标序列。获取特定的长度 def __getitem__(self, index): s_begin = index s_end = s_begin + self.in_len r_begin = s_end r_end = r_begin + self.out_len seq_x = self.data_x[s_begin:s_end] seq_y = self.data_y[r_begin:r_end] return seq_x, seq_y # 返回值: 返回数据集中可用样本的数量。这是训练和验证模型时的重要信息 def __len__(self): return len(self.data_x) - self.in_len- self.out_len + 1 # 该方法用于将标准化后的数据转换回原始数据 def inverse_transform(self, data): return self.scaler.inverse_transform(data)