Compare commits
No commits in common. "main" and "zero" have entirely different histories.
|
@ -1,4 +0,0 @@
|
|||
1. 新建一个以自命名的分支,并向main分支上提交
|
||||
2. 新建一个以自己名字命名的文件夹,并在里面写代码,然后提交
|
||||
### 请注意:
|
||||
请不要直接修改main分支的代码,即使你有main分支的权限
|
|
@ -0,0 +1,227 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.nn.utils import weight_norm
|
||||
import pandas as pd
|
||||
from sklearn import preprocessing
|
||||
import numpy as np
|
||||
from sklearn.model_selection import train_test_split
|
||||
from torch.utils.data import TensorDataset, DataLoader
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from sklearn.metrics import confusion_matrix
|
||||
from sklearn.feature_selection import SelectKBest, chi2
|
||||
|
||||
# 检查GPU可用性
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
print(f"Using device: {device}")
|
||||
|
||||
# 1. 加载数据集
|
||||
df = pd.read_csv('sensor.csv', index_col=0)
|
||||
|
||||
# 2. 数据预处理
|
||||
df.drop(columns=['sensor_50', 'sensor_51', 'sensor_15'], inplace=True)
|
||||
x = df.iloc[:, 1:50].fillna(method='ffill')
|
||||
|
||||
scaler = preprocessing.MinMaxScaler()
|
||||
x = scaler.fit_transform(x)
|
||||
x = pd.DataFrame(x, columns=df.iloc[:, 1:50].columns)
|
||||
|
||||
conditions = [(df['machine_status'] =='NORMAL'), (df['machine_status'] =='BROKEN'), (df['machine_status'] =='RECOVERING')]
|
||||
choices = [1, 0, 2]
|
||||
df['Operation'] = np.select(conditions, choices, default=0)
|
||||
df.drop(['machine_status'],axis=1, inplace=True)
|
||||
|
||||
# 4. 特征选择
|
||||
y = df['Operation']
|
||||
|
||||
selector = SelectKBest(score_func=chi2, k=20)
|
||||
x_new = selector.fit_transform(x, y)
|
||||
|
||||
# 3. 构建输入数据
|
||||
def create_sequences(data, target, time_steps=24):
|
||||
X, y = [], []
|
||||
for i in range(len(data) - time_steps):
|
||||
X.append(data[i:i + time_steps, :])
|
||||
y.append(target[i + time_steps])
|
||||
return np.array(X), np.array(y)
|
||||
|
||||
X, y = create_sequences(x_new, y)
|
||||
|
||||
# 4. 划分数据集
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
|
||||
# 转换为Tensor并移动到GPU
|
||||
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
|
||||
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
|
||||
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
|
||||
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)
|
||||
|
||||
# 创建DataLoader
|
||||
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
|
||||
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
|
||||
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
|
||||
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
|
||||
|
||||
# 扩张因果卷积模块
|
||||
class DilatedCausalConv1d(nn.Module):
|
||||
def __init__(self, in_channels, out_channels, kernel_size, dilation):
|
||||
super().__init__()
|
||||
self.padding = (kernel_size - 1) * dilation # 保证时序因果关系(不泄露未来信息)
|
||||
self.conv = weight_norm(
|
||||
nn.Conv1d(in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
padding=self.padding,
|
||||
dilation=dilation)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
return x[:, :, :-self.padding] # 切片操作去除右侧padding
|
||||
|
||||
# 残差块模块
|
||||
class ResidualBlock(nn.Module):
|
||||
def __init__(self, in_channels, out_channels, kernel_size, dilation, dropout=0.2):
|
||||
super().__init__()
|
||||
self.conv1 = DilatedCausalConv1d(in_channels, out_channels, kernel_size, dilation)
|
||||
self.conv2 = DilatedCausalConv1d(out_channels, out_channels, kernel_size, dilation)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
self.relu = nn.ReLU()
|
||||
self.downsample = nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels else None
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
x = self.conv1(x)
|
||||
x = self.relu(x)
|
||||
x = self.dropout(x)
|
||||
x = self.conv2(x)
|
||||
x = self.relu(x)
|
||||
x = self.dropout(x)
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(residual)
|
||||
return residual + x
|
||||
|
||||
# 完整TCN模型
|
||||
class TCN(nn.Module):
|
||||
def __init__(self, input_size, num_channels, kernel_size=3, dropout=0.2):
|
||||
super().__init__()
|
||||
layers = []
|
||||
num_levels = len(num_channels)
|
||||
for i in range(num_levels):
|
||||
dilation = 2 ** i
|
||||
in_channels = input_size if i == 0 else num_channels[i - 1]
|
||||
out_channels = num_channels[i]
|
||||
layers += [
|
||||
ResidualBlock(in_channels, out_channels, kernel_size, dilation, dropout)
|
||||
]
|
||||
self.network = nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
return self.network(x)
|
||||
|
||||
|
||||
# 定义分类模型(整合TCN和分类器)
|
||||
class TCNClassifier(nn.Module):
|
||||
def __init__(self, input_size, num_channels, num_classes, kernel_size=3, dropout=0.2):
|
||||
super().__init__()
|
||||
self.tcn = TCN(input_size, num_channels, kernel_size, dropout)
|
||||
self.linear = nn.Linear(num_channels[-1], num_classes)
|
||||
|
||||
def forward(self, x):
|
||||
# 调整输入维度:(batch_size, seq_len, features) -> (batch_size, features, seq_len)
|
||||
x = x.permute(0, 2, 1)
|
||||
tcn_output = self.tcn(x) # (batch_size, num_channels[-1], seq_len)
|
||||
|
||||
# 取最后一个时间步的特征用于分类
|
||||
last_time_step = tcn_output[:, :, -1]
|
||||
return self.linear(last_time_step)
|
||||
|
||||
|
||||
# 初始化模型
|
||||
input_size = x_new.shape[1] # 特征数量(20)
|
||||
num_channels = [64, 64, 64] # 各层通道数
|
||||
num_classes = 3 # 输出类别数
|
||||
|
||||
model = TCNClassifier(input_size, num_channels, num_classes).to(device)
|
||||
|
||||
# 计算类别权重(处理不平衡数据)
|
||||
y_train_np = y_train.cpu().numpy() if isinstance(y_train, torch.Tensor) else y_train
|
||||
class_counts = np.bincount(y_train_np)
|
||||
class_weights = 1. / torch.tensor(class_counts, dtype=torch.float32)
|
||||
class_weights = class_weights / class_weights.sum()
|
||||
class_weights = class_weights.to(device)
|
||||
|
||||
# 定义损失函数和优化器
|
||||
criterion = nn.CrossEntropyLoss(weight=class_weights)
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
|
||||
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3)
|
||||
|
||||
# 训练参数
|
||||
num_epochs = 5
|
||||
best_accuracy = 0
|
||||
train_losses = []
|
||||
val_accuracies = []
|
||||
|
||||
# 训练循环
|
||||
for epoch in range(num_epochs):
|
||||
model.train()
|
||||
epoch_loss = 0
|
||||
|
||||
for batch_X, batch_y in train_loader:
|
||||
optimizer.zero_grad()
|
||||
|
||||
# 前向传播
|
||||
outputs = model(batch_X)
|
||||
loss = criterion(outputs, batch_y)
|
||||
|
||||
# 反向传播
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) # 梯度裁剪
|
||||
optimizer.step()
|
||||
|
||||
epoch_loss += loss.item() * batch_X.size(0)
|
||||
|
||||
# 计算平均损失
|
||||
avg_loss = epoch_loss / len(train_loader.dataset)
|
||||
train_losses.append(avg_loss)
|
||||
|
||||
# 验证阶段
|
||||
model.eval()
|
||||
correct = 0
|
||||
total = 0
|
||||
with torch.no_grad():
|
||||
for batch_X, batch_y in test_loader:
|
||||
outputs = model(batch_X)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += batch_y.size(0)
|
||||
correct += (predicted == batch_y).sum().item()
|
||||
|
||||
accuracy = correct / total
|
||||
val_accuracies.append(accuracy)
|
||||
scheduler.step(avg_loss) # 调整学习率
|
||||
|
||||
print(f"Epoch [{epoch + 1}/{num_epochs}] | "
|
||||
f"Loss: {avg_loss:.4f} | "
|
||||
f"Val Acc: {accuracy * 100:.2f}% | "
|
||||
f"LR: {optimizer.param_groups[0]['lr']:.6f}")
|
||||
|
||||
|
||||
# 评估并输出混淆矩阵
|
||||
model.eval()
|
||||
all_preds = []
|
||||
all_labels = []
|
||||
|
||||
with torch.no_grad():
|
||||
for batch_X, batch_y in test_loader:
|
||||
outputs = model(batch_X)
|
||||
_, predicted = torch.max(outputs, 1)
|
||||
all_preds.extend(predicted.cpu().numpy())
|
||||
all_labels.extend(batch_y.cpu().numpy())
|
||||
|
||||
# 输出混淆矩阵
|
||||
cm = confusion_matrix(all_labels, all_preds)
|
||||
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
|
||||
xticklabels=['BROKEN', 'NORMAL', 'RECOVERING'],
|
||||
yticklabels=['BROKEN', 'NORMAL', 'RECOVERING'])
|
||||
plt.title("Confusion Matrix")
|
||||
plt.show()
|
|
@ -0,0 +1,136 @@
|
|||
import xgboost as xgb # 导入XGBoost
|
||||
import pandas as pd
|
||||
from sklearn import preprocessing
|
||||
import numpy as np
|
||||
from sklearn.model_selection import train_test_split
|
||||
# 移除了 torch 和 torch.nn 相关导入
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report # 增加评估指标
|
||||
from sklearn.feature_selection import SelectKBest, chi2
|
||||
from sklearn.utils.class_weight import compute_sample_weight # 用于计算样本权重
|
||||
|
||||
# 检查GPU可用性(XGBoost 可配置使用GPU,但方式不同,这里简化为CPU)
|
||||
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
# print(f"Using device: {device}")
|
||||
print("Using CPU for XGBoost (GPU can be configured if needed and available)")
|
||||
|
||||
# 1. 加载数据集
|
||||
df = pd.read_csv('sensor.csv', index_col=0)
|
||||
print("Dataset loaded.")
|
||||
|
||||
# 2. 数据预处理
|
||||
df.drop(columns=['sensor_50', 'sensor_51', 'sensor_15'], inplace=True)
|
||||
x = df.iloc[:, 1:50].fillna(method='ffill')
|
||||
|
||||
scaler = preprocessing.MinMaxScaler()
|
||||
x_scaled = scaler.fit_transform(x)
|
||||
x_scaled = pd.DataFrame(x_scaled, columns=df.iloc[:, 1:50].columns)
|
||||
print("Data scaled and NaNs filled.")
|
||||
|
||||
# 目标变量编码
|
||||
conditions = [(df['machine_status'] =='NORMAL'), (df['machine_status'] =='BROKEN'), (df['machine_status'] =='RECOVERING')]
|
||||
choices = [1, 0, 2] # BROKEN: 0, NORMAL: 1, RECOVERING: 2
|
||||
df['Operation'] = np.select(conditions, choices, default=0) # 保持原始编码
|
||||
# df.drop(['machine_status'],axis=1, inplace=True) # 保留原始列以便检查
|
||||
y = df['Operation'].values # 直接获取numpy数组
|
||||
print("Target variable encoded.")
|
||||
print("Class distribution in y:", np.bincount(y))
|
||||
|
||||
# 4. 特征选择 (在缩放后的数据上进行)
|
||||
selector = SelectKBest(score_func=chi2, k=20)
|
||||
# chi2要求非负特征,MinMaxScaler保证了这一点
|
||||
x_new = selector.fit_transform(x_scaled, y)
|
||||
selected_features_indices = selector.get_support(indices=True)
|
||||
selected_features_names = x_scaled.columns[selected_features_indices]
|
||||
print(f"Selected {len(selected_features_names)} features:", selected_features_names.tolist())
|
||||
|
||||
# 3. 构建时序输入数据 (仍需要创建窗口)
|
||||
def create_sequences(data, target, time_steps=24):
|
||||
X, y_seq = [], []
|
||||
print(f"Creating sequences with time_steps={time_steps}...")
|
||||
for i in range(len(data) - time_steps):
|
||||
X.append(data[i:i + time_steps, :])
|
||||
# 目标是预测 time_steps 之后的那个点的状态
|
||||
y_seq.append(target[i + time_steps])
|
||||
print(f"Finished creating sequences. X shape: {np.array(X).shape}, y shape: {np.array(y_seq).shape}")
|
||||
return np.array(X), np.array(y_seq)
|
||||
|
||||
time_steps = 24 # 定义时间窗口大小
|
||||
X_seq, y_seq = create_sequences(x_new, y, time_steps=time_steps)
|
||||
|
||||
# *** 重要:为XGBoost重塑数据 ***
|
||||
# 将 (n_samples, time_steps, n_features) 转换为 (n_samples, time_steps * n_features)
|
||||
n_samples, _, n_features = X_seq.shape
|
||||
X_reshaped = X_seq.reshape(n_samples, time_steps * n_features)
|
||||
print(f"Reshaped X for XGBoost. New shape: {X_reshaped.shape}")
|
||||
|
||||
# 4. 划分数据集 (使用重塑后的X和对应的y)
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X_reshaped, y_seq, test_size=0.2, random_state=42, stratify=y_seq # 使用stratify保持类别比例
|
||||
)
|
||||
print(f"Dataset split. Train shape: {X_train.shape}, Test shape: {X_test.shape}")
|
||||
print("Class distribution in y_train:", np.bincount(y_train))
|
||||
print("Class distribution in y_test:", np.bincount(y_test))
|
||||
|
||||
|
||||
# 计算样本权重以处理类别不平衡 (可选但推荐)
|
||||
# 使用 scikit-learn 的工具函数计算权重
|
||||
sample_weights = compute_sample_weight(class_weight='balanced', y=y_train)
|
||||
print("Sample weights computed for training.")
|
||||
|
||||
# 初始化 XGBoost 模型
|
||||
print("Initializing XGBoost Classifier...")
|
||||
# 如果需要GPU加速,且已安装GPU支持的XGBoost,可添加 tree_method='gpu_hist'
|
||||
model = xgb.XGBClassifier(
|
||||
objective='multi:softprob', # 输出每个类别的概率
|
||||
num_class=len(np.unique(y_seq)), # 类别数量
|
||||
eval_metric='mlogloss', # 多分类对数损失
|
||||
use_label_encoder=False, # 推荐设置,避免警告
|
||||
random_state=42,
|
||||
n_estimators=100, # 树的数量 (可调)
|
||||
learning_rate=0.1, # 学习率 (可调)
|
||||
max_depth=5, # 树的最大深度 (可调)
|
||||
# tree_method='gpu_hist' # 取消注释以尝试GPU加速
|
||||
# 其他超参数可根据需要调整...
|
||||
)
|
||||
|
||||
# --- 移除了 PyTorch 损失函数和优化器 ---
|
||||
|
||||
# --- 移除了 PyTorch 训练循环 ---
|
||||
|
||||
# 训练 XGBoost 模型
|
||||
print("Training XGBoost model...")
|
||||
# 使用 eval_set 进行早停可以防止过拟合,这里简化训练过程
|
||||
# eval_set = [(X_test, y_test)]
|
||||
# model.fit(X_train, y_train, sample_weight=sample_weights, eval_set=eval_set, early_stopping_rounds=10, verbose=True)
|
||||
model.fit(X_train, y_train, sample_weight=sample_weights, verbose=True) # 使用样本权重
|
||||
print("XGBoost training finished.")
|
||||
|
||||
|
||||
# 评估模型
|
||||
print("Evaluating XGBoost model...")
|
||||
y_pred = model.predict(X_test) # 直接预测类别标签
|
||||
|
||||
# 计算准确率
|
||||
accuracy = accuracy_score(y_test, y_pred)
|
||||
print(f"Test Accuracy: {accuracy * 100:.2f}%")
|
||||
|
||||
# 输出详细分类报告
|
||||
print("\nClassification Report:")
|
||||
print(classification_report(y_test, y_pred, target_names=['BROKEN', 'NORMAL', 'RECOVERING'])) # 确保标签顺序正确
|
||||
|
||||
# 输出混淆矩阵
|
||||
print("\nConfusion Matrix:")
|
||||
cm = confusion_matrix(y_test, y_pred)
|
||||
print(cm)
|
||||
|
||||
# 可视化混淆矩阵
|
||||
plt.figure(figsize=(8, 6))
|
||||
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
|
||||
xticklabels=['BROKEN', 'NORMAL', 'RECOVERING'], # 与 choices 对应
|
||||
yticklabels=['BROKEN', 'NORMAL', 'RECOVERING'])
|
||||
plt.title("XGBoost Confusion Matrix")
|
||||
plt.xlabel("Predicted Label")
|
||||
plt.ylabel("True Label")
|
||||
plt.show()
|
Loading…
Reference in New Issue